Add lambda state to dhdl.xvg with AWH fep
[alexxy/gromacs.git] / src / gromacs / mdrun / minimize.cpp
index c46bbec827592184c63ae2676dc71fe70ad8d464..eec144faf42604fa6ae6a41a0607abc4bf580253 100644 (file)
@@ -52,6 +52,7 @@
 #include <ctime>
 
 #include <algorithm>
+#include <limits>
 #include <vector>
 
 #include "gromacs/commandline/filenm.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdatom.h"
 #include "gromacs/mdtypes/mdrunoptions.h"
+#include "gromacs/mdtypes/observablesreducer.h"
 #include "gromacs/mdtypes/state.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/timing/wallcycle.h"
@@ -140,18 +142,18 @@ typedef struct em_state
 static void print_em_start(FILE*                     fplog,
                            const t_commrec*          cr,
                            gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
+                           gmx_wallcycle           wcycle,
                            const char*               name)
 {
     walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
     print_start(fplog, cr, walltime_accounting, name);
 }
 
 //! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle* wcycle)
 {
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle, WallCycleCounter::Run);
 
     walltime_accounting_end_time(walltime_accounting);
 }
@@ -313,7 +315,7 @@ static void get_f_norm_max(const t_commrec*               cr,
         }
     }
 
-    if (la_max >= 0 && DOMAINDECOMP(cr))
+    if (la_max >= 0 && haveDDAtomOrdering(*cr))
     {
         a_max = cr->dd->globalAtomIndices[la_max];
     }
@@ -370,7 +372,7 @@ static void init_em(FILE*                fplog,
                     gmx::ImdSession*     imdSession,
                     pull_t*              pull_work,
                     t_state*             state_global,
-                    const gmx_mtop_t*    top_global,
+                    const gmx_mtop_t&    top_global,
                     em_state_t*          ems,
                     gmx_localtop_t*      top,
                     t_nrnb*              nrnb,
@@ -394,7 +396,15 @@ static void init_em(FILE*                fplog,
     }
     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
-    initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
+    initialize_lambdas(fplog,
+                       ir->efep,
+                       ir->bSimTemp,
+                       *ir->fepvals,
+                       ir->simtempvals->temperatures,
+                       gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
+                       MASTER(cr),
+                       fep_state,
+                       lambda);
 
     if (ir->eI == IntegrationAlgorithm::NM)
     {
@@ -404,7 +414,7 @@ static void init_em(FILE*                fplog,
                                       top_global,
                                       constr ? constr->numFlexibleConstraints() : 0,
                                       ir->nstcalcenergy,
-                                      DOMAINDECOMP(cr),
+                                      haveDDAtomOrdering(*cr),
                                       thisRankHasDuty(cr, DUTY_PME));
     }
     else
@@ -422,8 +432,9 @@ static void init_em(FILE*                fplog,
         }
     }
 
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
     {
+        // Local state only becomes valid now.
         dd_init_local_state(*cr->dd, state_global, &ems->s);
 
         /* Distribute the charge groups over the nodes from the master node */
@@ -434,7 +445,7 @@ static void init_em(FILE*                fplog,
                             TRUE,
                             1,
                             state_global,
-                            *top_global,
+                            top_global,
                             *ir,
                             imdSession,
                             pull_work,
@@ -458,7 +469,7 @@ static void init_em(FILE*                fplog,
         state_change_natoms(&ems->s, ems->s.natoms);
 
         mdAlgorithmsSetupAtomData(
-                cr, *ir, *top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
+                cr, *ir, top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
     }
 
     update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[FreeEnergyPerturbationCouplingType::Mass]);
@@ -515,7 +526,7 @@ static void init_em(FILE*                fplog,
 static void finish_em(const t_commrec*          cr,
                       gmx_mdoutf_t              outf,
                       gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t           wcycle)
+                      gmx_wallcycle           wcycle)
 {
     if (!thisRankHasDuty(cr, DUTY_PME))
     {
@@ -545,7 +556,7 @@ static void write_em_traj(FILE*               fplog,
                           gmx_bool            bX,
                           gmx_bool            bF,
                           const char*         confout,
-                          const gmx_mtop_t*   top_global,
+                          const gmx_mtop_t&   top_global,
                           const t_inputrec*   ir,
                           int64_t             step,
                           em_state_t*         state,
@@ -574,7 +585,7 @@ static void write_em_traj(FILE*               fplog,
                                      cr,
                                      outf,
                                      mdof_flags,
-                                     top_global->natoms,
+                                     top_global.natoms,
                                      step,
                                      static_cast<double>(step),
                                      &state->s,
@@ -585,7 +596,7 @@ static void write_em_traj(FILE*               fplog,
 
     if (confout != nullptr)
     {
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
         {
             /* If bX=true, x was collected to state_global in the call above */
             if (!bX)
@@ -603,14 +614,14 @@ static void write_em_traj(FILE*               fplog,
 
         if (MASTER(cr))
         {
-            if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr))
+            if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && haveDDAtomOrdering(*cr))
             {
                 /* Make molecules whole only for confout writing */
-                do_pbc_mtop(ir->pbcType, state->s.box, top_global, state_global->x.rvec_array());
+                do_pbc_mtop(ir->pbcType, state->s.box, &top_global, state_global->x.rvec_array());
             }
 
             write_sto_conf_mtop(confout,
-                                *top_global->name,
+                                *top_global.name,
                                 top_global,
                                 state_global->x.rvec_array(),
                                 nullptr,
@@ -634,9 +645,9 @@ static bool do_em_step(const t_commrec*                          cr,
                        int64_t                                   count)
 
 {
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
+    t_state *    s1, *s2;
+    int          start, end;
+    real         dvdl_constr;
     int nthreads gmx_unused;
 
     bool validStep = true;
@@ -644,7 +655,7 @@ static bool do_em_step(const t_commrec*                          cr,
     s1 = &ems1->s;
     s2 = &ems2->s;
 
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
+    if (haveDDAtomOrdering(*cr) && s1->ddp_count != cr->dd->ddp_count)
     {
         gmx_incons("state mismatch in do_em_step");
     }
@@ -656,7 +667,7 @@ static bool do_em_step(const t_commrec*                          cr,
         state_change_natoms(s2, s1->natoms);
         ems2->f.resize(s2->natoms);
     }
-    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
+    if (haveDDAtomOrdering(*cr) && s2->cg_gl.size() != s1->cg_gl.size())
     {
         s2->cg_gl.resize(s1->cg_gl.size());
     }
@@ -669,7 +680,7 @@ static bool do_em_step(const t_commrec*                          cr,
     start = 0;
     end   = md->homenr;
 
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
+    nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
 #pragma omp parallel num_threads(nthreads)
     {
         const rvec* x1 = s1->x.rvec_array();
@@ -714,7 +725,7 @@ static bool do_em_step(const t_commrec*                          cr,
             }
         }
 
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
         {
             /* OpenMP does not supported unsigned loop variables */
 #pragma omp for schedule(static) nowait
@@ -725,7 +736,7 @@ static bool do_em_step(const t_commrec*                          cr,
         }
     }
 
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
     {
         s2->ddp_count       = s1->ddp_count;
         s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
@@ -781,7 +792,7 @@ static void em_dd_partition_system(FILE*                fplog,
                                    const gmx::MDLogger& mdlog,
                                    int                  step,
                                    const t_commrec*     cr,
-                                   const gmx_mtop_t*    top_global,
+                                   const gmx_mtop_t&    top_global,
                                    const t_inputrec*    ir,
                                    gmx::ImdSession*     imdSession,
                                    pull_t*              pull_work,
@@ -792,7 +803,7 @@ static void em_dd_partition_system(FILE*                fplog,
                                    VirtualSitesHandler* vsite,
                                    gmx::Constraints*    constr,
                                    t_nrnb*              nrnb,
-                                   gmx_wallcycle_t      wcycle)
+                                   gmx_wallcycle      wcycle)
 {
     /* Repartition the domain decomposition */
     dd_partition_system(fplog,
@@ -802,7 +813,7 @@ static void em_dd_partition_system(FILE*                fplog,
                         FALSE,
                         1,
                         nullptr,
-                        *top_global,
+                        top_global,
                         *ir,
                         imdSession,
                         pull_work,
@@ -822,6 +833,53 @@ static void em_dd_partition_system(FILE*                fplog,
 namespace
 {
 
+//! Copy coordinates, OpenMP parallelized, from \p refCoords to coords
+void setCoordinates(std::vector<RVec>* coords, ArrayRef<const RVec> refCoords)
+{
+    coords->resize(refCoords.size());
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(refCoords); i++)
+    {
+        (*coords)[i] = refCoords[i];
+    }
+}
+
+//! Returns the maximum difference an atom moved between two coordinate sets, over all ranks
+real maxCoordinateDifference(ArrayRef<const RVec> coords1, ArrayRef<const RVec> coords2, MPI_Comm mpiCommMyGroup)
+{
+    GMX_RELEASE_ASSERT(coords1.size() == coords2.size(), "Coordinate counts should match");
+
+    real maxDiffSquared = 0;
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for reduction(max : maxDiffSquared) num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(coords1); i++)
+    {
+        maxDiffSquared = std::max(maxDiffSquared, gmx::norm2(coords1[i] - coords2[i]));
+    }
+
+#if GMX_MPI
+    int numRanks = 1;
+    if (mpiCommMyGroup != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(mpiCommMyGroup, &numRanks);
+    }
+    if (numRanks > 1)
+    {
+        real maxDiffSquaredReduced;
+        MPI_Allreduce(
+                &maxDiffSquared, &maxDiffSquaredReduced, 1, GMX_DOUBLE ? MPI_DOUBLE : MPI_FLOAT, MPI_MAX, mpiCommMyGroup);
+        maxDiffSquared = maxDiffSquaredReduced;
+    }
+#else
+    GMX_UNUSED_VALUE(mpiCommMyGroup);
+#endif
+
+    return std::sqrt(maxDiffSquared);
+}
+
 /*! \brief Class to handle the work of setting and doing an energy evaluation.
  *
  * This class is a mere aggregate of parameters to pass to evaluate an
@@ -845,7 +903,7 @@ public:
      * unsuited for aggregate initialization. When the types
      * improve, the call signature of this method can be reduced.
      */
-    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
+    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step);
     //! Handles logging (deprecated).
     FILE* fplog;
     //! Handles logging.
@@ -855,7 +913,7 @@ public:
     //! Coordinates multi-simulations.
     const gmx_multisim_t* ms;
     //! Holds the simulation topology.
-    const gmx_mtop_t* top_global;
+    const gmx_mtop_t& top_global;
     //! Holds the domain topology.
     gmx_localtop_t* top;
     //! User input options.
@@ -867,9 +925,11 @@ public:
     //! Manages flop accounting.
     t_nrnb* nrnb;
     //! Manages wall cycle accounting.
-    gmx_wallcycle_t wcycle;
-    //! Coordinates global reduction.
+    gmx_wallcycle* wcycle;
+    //! Legacy coordinator of global reduction.
     gmx_global_stat_t gstat;
+    //! Coordinates reduction for observables
+    gmx::ObservablesReducer* observablesReducer;
     //! Handles virtual sites.
     VirtualSitesHandler* vsite;
     //! Handles constraints.
@@ -882,9 +942,13 @@ public:
     MdrunScheduleWorkload* runScheduleWork;
     //! Stores the computed energies.
     gmx_enerdata_t* enerd;
+    //! The DD partitioning count at which the pair list was generated
+    int ddpCountPairSearch;
+    //! The local coordinates that were used for pair searching, stored for computing displacements
+    std::vector<RVec> pairSearchCoordinates;
 };
 
-void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
+void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step)
 {
     real     t;
     gmx_bool bNS;
@@ -895,32 +959,46 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
     /* Set the time to the initial time, the time does not change during EM */
     t = inputrec->init_t;
 
-    if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
+    if (vsite)
+    {
+        vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
+    }
+
+    // Compute the buffer size of the pair list
+    const real bufferSize = inputrec->rlist - std::max(inputrec->rcoulomb, inputrec->rvdw);
+
+    if (bFirst || bufferSize <= 0 || (haveDDAtomOrdering(*cr) && ems->s.ddp_count != ddpCountPairSearch))
     {
         /* This is the first state or an old state used before the last ns */
         bNS = TRUE;
     }
     else
     {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
+        // We need to generate a new pairlist when one atom moved more than half the buffer size
+        ArrayRef<const RVec> localCoordinates =
+                ArrayRef<const RVec>(ems->s.x).subArray(0, mdAtoms->mdatoms()->homenr);
+        bNS = 2 * maxCoordinateDifference(pairSearchCoordinates, localCoordinates, cr->mpi_comm_mygroup)
+              > bufferSize;
     }
 
-    if (vsite)
-    {
-        vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
+    if (haveDDAtomOrdering(*cr) && bNS)
     {
         /* Repartition the domain decomposition */
         em_dd_partition_system(
                 fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
+        ddpCountPairSearch = cr->dd->ddp_count;
+    }
+
+    /* Store the local coordinates that will be used in the pair search, after we re-partitioned */
+    if (bufferSize > 0 && bNS)
+    {
+        ArrayRef<const RVec> localCoordinates =
+                constArrayRefFromArray(ems->s.x.data(), mdAtoms->mdatoms()->homenr);
+        setCoordinates(&pairSearchCoordinates, localCoordinates);
     }
 
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+
     /* Calc force & energy on new trial position  */
     /* do_force always puts the charge groups in the box and shifts again
      * We do not unshift, so molecules are always whole in congrad.c
@@ -951,6 +1029,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
              mu_tot,
              t,
              nullptr,
+             fr->longRangeNonbondeds.get(),
              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
                      | (bNS ? GMX_FORCE_NS : 0),
              DDBalanceRegionHandler(cr));
@@ -962,24 +1041,23 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
     /* Communicate stuff when parallel */
     if (PAR(cr) && inputrec->eI != IntegrationAlgorithm::NM)
     {
-        wallcycle_start(wcycle, ewcMoveE);
+        wallcycle_start(wcycle, WallCycleCounter::MoveE);
 
-        global_stat(gstat,
+        global_stat(*gstat,
                     cr,
                     enerd,
                     force_vir,
                     shake_vir,
-                    inputrec,
+                    *inputrec,
                     nullptr,
-                    gmx::ArrayRef<real>{},
-                    nullptr,
-                    1,
-                    &terminate,
                     nullptr,
+                    std::vector<real>(1, terminate),
                     FALSE,
-                    CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
+                    CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT,
+                    step,
+                    observablesReducer);
 
-        wallcycle_stop(wcycle, ewcMoveE);
+        wallcycle_stop(wcycle, WallCycleCounter::MoveE);
     }
 
     if (fr->dispersionCorrection)
@@ -1050,7 +1128,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
 //! Parallel utility summing energies and forces
 static double reorder_partsum(const t_commrec*  cr,
                               const t_grpopts*  opts,
-                              const gmx_mtop_t* top_global,
+                              const gmx_mtop_t& top_global,
                               const em_state_t* s_min,
                               const em_state_t* s_b)
 {
@@ -1066,7 +1144,7 @@ static double reorder_partsum(const t_commrec*  cr,
      * This conflicts with the spirit of domain decomposition,
      * but to fully optimize this a much more complicated algorithm is required.
      */
-    const int natoms = top_global->natoms;
+    const int natoms = top_global.natoms;
     rvec*     fmg;
     snew(fmg, natoms);
 
@@ -1077,7 +1155,7 @@ static double reorder_partsum(const t_commrec*  cr,
         copy_rvec(fm[i], fmg[a]);
         i++;
     }
-    gmx_sum(top_global->natoms * 3, fmg[0], cr);
+    gmx_sum(top_global.natoms * 3, fmg[0], cr);
 
     /* Now we will determine the part of the sum for the cgs in state s_b */
     gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
@@ -1086,7 +1164,7 @@ static double reorder_partsum(const t_commrec*  cr,
     i                                     = 0;
     int                                gf = 0;
     gmx::ArrayRef<const unsigned char> grpnrFREEZE =
-            top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
+            top_global.groups.groupNumbers[SimulationAtomGroupType::Freeze];
     for (int a : indicesB)
     {
         if (!grpnrFREEZE.empty())
@@ -1112,7 +1190,7 @@ static double reorder_partsum(const t_commrec*  cr,
 static real pr_beta(const t_commrec*  cr,
                     const t_grpopts*  opts,
                     t_mdatoms*        mdatoms,
-                    const gmx_mtop_t* top_global,
+                    const gmx_mtop_t& top_global,
                     const em_state_t* s_min,
                     const em_state_t* s_b)
 {
@@ -1123,7 +1201,7 @@ static real pr_beta(const t_commrec*  cr,
      * and might have to sum it in parallel runs.
      */
 
-    if (!DOMAINDECOMP(cr)
+    if (!haveDDAtomOrdering(*cr)
         || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
     {
         auto fm = s_min->f.view().force();
@@ -1168,7 +1246,6 @@ void LegacySimulator::do_cg()
 {
     const char* CG = "Polak-Ribiere Conjugate Gradients";
 
-    gmx_localtop_t    top(top_global->ffparams);
     gmx_global_stat_t gstat;
     double            tmp, minstep;
     real              stepsize;
@@ -1181,7 +1258,7 @@ void LegacySimulator::do_cg()
     tensor            vir, pres;
     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
     int               m, step, nminstep;
-    auto              mdatoms = mdAtoms->mdatoms();
+    auto*             mdatoms = mdAtoms->mdatoms();
 
     GMX_LOG(mdlog.info)
             .asParagraph()
@@ -1215,6 +1292,8 @@ void LegacySimulator::do_cg()
     em_state_t* s_b   = &s2;
     em_state_t* s_c   = &s3;
 
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
     /* Init em and store the local state in s_min */
     init_em(fplog,
             mdlog,
@@ -1226,7 +1305,7 @@ void LegacySimulator::do_cg()
             state_global,
             top_global,
             s_min,
-            &top,
+            top,
             nrnb,
             fr,
             mdAtoms,
@@ -1241,7 +1320,7 @@ void LegacySimulator::do_cg()
                                    mdrunOptions,
                                    cr,
                                    outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                    inputrec,
                                    top_global,
                                    nullptr,
@@ -1251,13 +1330,13 @@ void LegacySimulator::do_cg()
                                    ms);
     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                    top_global,
-                                   inputrec,
+                                   *inputrec,
                                    pull_work,
                                    nullptr,
                                    false,
                                    StartingBehavior::NewSimulation,
                                    simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
 
     /* Print to log file */
     print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
@@ -1274,14 +1353,33 @@ void LegacySimulator::do_cg()
         sp_header(fplog, CG, inputrec->em_tol, number_steps);
     }
 
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd,
+                                     -1,
+                                     {} };
     /* Call the force routine and some auxiliary (neighboursearching etc.) */
     /* do_force always puts the charge groups in the box and shifts again
      * We do not unshift, so molecules are always whole in congrad.c
      */
-    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
+    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE, step);
+    observablesReducer.markAsReadyToReduce();
 
     if (MASTER(cr))
     {
@@ -1293,12 +1391,9 @@ void LegacySimulator::do_cg()
                                          mdatoms->tmass,
                                          enerd,
                                          nullptr,
-                                         nullptr,
                                          nullBox,
                                          PTCouplingArrays(),
                                          0,
-                                         nullptr,
-                                         nullptr,
                                          vir,
                                          pres,
                                          nullptr,
@@ -1415,7 +1510,7 @@ void LegacySimulator::do_cg()
             gmx_sumd(1, &minstep, cr);
         }
 
-        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms));
+        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global.natoms));
 
         if (stepsize < minstep)
         {
@@ -1451,7 +1546,7 @@ void LegacySimulator::do_cg()
         a         = 0.0;
         c         = a + stepsize; /* reference position along line is zero */
 
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
+        if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count < cr->dd->ddp_count)
         {
             em_dd_partition_system(fplog,
                                    mdlog,
@@ -1462,7 +1557,7 @@ void LegacySimulator::do_cg()
                                    imdSession,
                                    pull_work,
                                    s_min,
-                                   &top,
+                                   top,
                                    mdAtoms,
                                    fr,
                                    vsite,
@@ -1476,7 +1571,8 @@ void LegacySimulator::do_cg()
 
         neval++;
         /* Calculate energy for the trial step */
-        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
+        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE, step);
+        observablesReducer.markAsReadyToReduce();
 
         /* Calc derivative along line */
         const rvec*                    pc  = s_c->s.cg_p.rvec_array();
@@ -1565,7 +1661,7 @@ void LegacySimulator::do_cg()
                     b = 0.5 * (a + c);
                 }
 
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+                if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
                 {
                     /* Reload the old state */
                     em_dd_partition_system(fplog,
@@ -1577,7 +1673,7 @@ void LegacySimulator::do_cg()
                                            imdSession,
                                            pull_work,
                                            s_min,
-                                           &top,
+                                           top,
                                            mdAtoms,
                                            fr,
                                            vsite,
@@ -1591,7 +1687,8 @@ void LegacySimulator::do_cg()
 
                 neval++;
                 /* Calculate energy for the trial step */
-                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
+                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE, step);
+                observablesReducer.markAsReadyToReduce();
 
                 /* p does not change within a step, but since the domain decomposition
                  * might change, we have to use cg_p of s_b here.
@@ -1744,12 +1841,9 @@ void LegacySimulator::do_cg()
                                              mdatoms->tmass,
                                              enerd,
                                              nullptr,
-                                             nullptr,
                                              nullBox,
                                              PTCouplingArrays(),
                                              0,
-                                             nullptr,
-                                             nullptr,
                                              vir,
                                              pres,
                                              nullptr,
@@ -1777,7 +1871,7 @@ void LegacySimulator::do_cg()
         }
 
         /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
+        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x, 0))
         {
             imdSession->sendPositionsAndEnergies();
         }
@@ -1786,7 +1880,7 @@ void LegacySimulator::do_cg()
          * If we have reached machine precision, converged is already set to true.
          */
         converged = converged || (s_min->fmax < inputrec->em_tol);
-
+        observablesReducer.markAsReadyToReduce();
     } /* End of the loop */
 
     if (converged)
@@ -1870,22 +1964,8 @@ void LegacySimulator::do_lbfgs()
 {
     static const char* LBFGS = "Low-Memory BFGS Minimizer";
     em_state_t         ems;
-    gmx_localtop_t     top(top_global->ffparams);
     gmx_global_stat_t  gstat;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real *             rho, *alpha, *p, *s, **dx, **dg;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0;
-    real               dgdx, dgdg, sq, yr, beta;
-    gmx_bool           converged;
-    rvec               mu_tot = { 0 };
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    int                i, k, m, n, gf, step;
-    int                mdof_flags;
-    auto               mdatoms = mdAtoms->mdatoms();
+    auto*              mdatoms = mdAtoms->mdatoms();
 
     GMX_LOG(mdlog.info)
             .asParagraph()
@@ -1895,6 +1975,10 @@ void LegacySimulator::do_lbfgs()
                     "be available in a different form in a future version of GROMACS, "
                     "e.g. gmx minimize and an .mdp option.");
 
+    if (haveDDAtomOrdering(*cr))
+    {
+        gmx_fatal(FARGS, "L_BFGS is currently not supported");
+    }
     if (PAR(cr))
     {
         gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
@@ -1908,29 +1992,29 @@ void LegacySimulator::do_lbfgs()
                 "do not use constraints, or use another minimizer (e.g. steepest descent).");
     }
 
-    n        = 3 * state_global->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
+    const int n        = 3 * state_global->natoms;
+    const int nmaxcorr = inputrec->nbfgscorr;
 
-    snew(frozen, n);
+    std::vector<real> p(n);
+    std::vector<real> rho(nmaxcorr);
+    std::vector<real> alpha(nmaxcorr);
 
-    snew(p, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dx(nmaxcorr);
+    for (auto& dxCorr : dx)
     {
-        snew(dx[i], n);
+        dxCorr.resize(n);
     }
 
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dg(nmaxcorr);
+    for (auto& dgCorr : dg)
     {
-        snew(dg[i], n);
+        dgCorr.resize(n);
     }
 
-    step  = 0;
-    neval = 0;
+    int step  = 0;
+    int neval = 0;
+
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
 
     /* Init em */
     init_em(fplog,
@@ -1943,7 +2027,7 @@ void LegacySimulator::do_lbfgs()
             state_global,
             top_global,
             &ems,
-            &top,
+            top,
             nrnb,
             fr,
             mdAtoms,
@@ -1958,7 +2042,7 @@ void LegacySimulator::do_lbfgs()
                                    mdrunOptions,
                                    cr,
                                    outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                    inputrec,
                                    top_global,
                                    nullptr,
@@ -1968,16 +2052,16 @@ void LegacySimulator::do_lbfgs()
                                    ms);
     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                    top_global,
-                                   inputrec,
+                                   *inputrec,
                                    pull_work,
                                    nullptr,
                                    false,
                                    StartingBehavior::NewSimulation,
                                    simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
 
-    start = 0;
-    end   = mdatoms->homenr;
+    const int start = 0;
+    const int end   = mdatoms->homenr;
 
     /* We need 4 working states */
     em_state_t  s0{}, s1{}, s2{}, s3{};
@@ -1993,20 +2077,19 @@ void LegacySimulator::do_lbfgs()
     /* Print to log file */
     print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
 
-    do_log = do_ene = do_f = TRUE;
-
     /* Max number of steps */
-    number_steps = inputrec->nsteps;
+    const int number_steps = inputrec->nsteps;
 
     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
+    std::vector<bool> frozen(n);
+    int               gf = 0;
+    for (int i = start; i < end; i++)
     {
         if (mdatoms->cFREEZE)
         {
             gf = mdatoms->cFREEZE[i];
         }
-        for (m = 0; m < DIM; m++)
+        for (int m = 0; m < DIM; m++)
         {
             frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
         }
@@ -2030,10 +2113,29 @@ void LegacySimulator::do_lbfgs()
      * We do not unshift, so molecules are always whole
      */
     neval++;
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
-    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    rvec            mu_tot;
+    tensor          vir;
+    tensor          pres;
+    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE, step);
 
     if (MASTER(cr))
     {
@@ -2045,12 +2147,9 @@ void LegacySimulator::do_lbfgs()
                                          mdatoms->tmass,
                                          enerd,
                                          nullptr,
-                                         nullptr,
                                          nullBox,
                                          PTCouplingArrays(),
                                          0,
-                                         nullptr,
-                                         nullptr,
                                          vir,
                                          pres,
                                          nullptr,
@@ -2083,11 +2182,11 @@ void LegacySimulator::do_lbfgs()
     }
 
     // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
+    int point = 0;
 
     // Set initial search direction to the force (-gradient), or 0 for frozen particles.
     real* fInit = static_cast<real*>(ems.f.view().force().data()[0]);
-    for (i = 0; i < n; i++)
+    for (int i = 0; i < n; i++)
     {
         if (!frozen[i])
         {
@@ -2103,25 +2202,28 @@ void LegacySimulator::do_lbfgs()
     // (the main efficiency in the algorithm comes from changing directions), but
     // we still need an initial value, so estimate it as the inverse of the norm
     // so we take small steps where the potential fluctuates a lot.
-    stepsize = 1.0 / ems.fnorm;
+    double stepsize = 1.0 / ems.fnorm;
 
     /* Start the loop over BFGS steps.
      * Each successful step is counted, and we continue until
      * we either converge or reach the max number of steps.
      */
 
-    ncorr = 0;
+    bool do_log = true;
+    bool do_ene = true;
+
+    int ncorr = 0;
 
     /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
+    bool converged = false;
+    for (int step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
     {
 
         /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
+        const bool do_x = do_per_step(step, inputrec->nstxout);
+        const bool do_f = do_per_step(step, inputrec->nstfout);
 
-        mdof_flags = 0;
+        int mdof_flags = 0;
         if (do_x)
         {
             mdof_flags |= MDOF_X;
@@ -2142,7 +2244,7 @@ void LegacySimulator::do_lbfgs()
                                          cr,
                                          outf,
                                          mdof_flags,
-                                         top_global->natoms,
+                                         top_global.natoms,
                                          step,
                                          static_cast<real>(step),
                                          &ems.s,
@@ -2154,13 +2256,14 @@ void LegacySimulator::do_lbfgs()
         /* Do the linesearching in the direction dx[point][0..(n-1)] */
 
         /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
+        gmx::ArrayRef<const real> s = dx[point];
 
-        real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
-        real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
+        const real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
+        const real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
 
         // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
+        double gpa = 0;
+        for (int i = 0; i < n; i++)
         {
             gpa -= s[i] * ff[i];
         }
@@ -2168,9 +2271,10 @@ void LegacySimulator::do_lbfgs()
         /* Calculate minimum allowed stepsize along the line, before the average (norm)
          * relative change in coordinate is smaller than precision
          */
-        for (minstep = 0, i = 0; i < n; i++)
+        double minstep = 0;
+        for (int i = 0; i < n; i++)
         {
-            tmp = fabs(xx[i]);
+            double tmp = fabs(xx[i]);
             if (tmp < 1.0)
             {
                 tmp = 1.0;
@@ -2182,15 +2286,15 @@ void LegacySimulator::do_lbfgs()
 
         if (stepsize < minstep)
         {
-            converged = TRUE;
+            converged = true;
             break;
         }
 
         // Before taking any steps along the line, store the old position
-        *last       = ems;
-        real* lastx = static_cast<real*>(last->s.x.data()[0]);
-        real* lastf = static_cast<real*>(last->f.view().force().data()[0]);
-        Epot0       = ems.epot;
+        *last            = ems;
+        real*      lastx = static_cast<real*>(last->s.x.data()[0]);
+        real*      lastf = static_cast<real*>(last->f.view().force().data()[0]);
+        const real Epot0 = ems.epot;
 
         *sa = ems;
 
@@ -2221,11 +2325,13 @@ void LegacySimulator::do_lbfgs()
 
         // State "A" is the first position along the line.
         // reference position along line is initially zero
-        a = 0.0;
+        real a = 0;
 
         // Check stepsize first. We do not allow displacements
         // larger than emstep.
         //
+        real c;
+        real maxdelta;
         do
         {
             // Pick a new position C by adding stepsize to A.
@@ -2234,9 +2340,9 @@ void LegacySimulator::do_lbfgs()
             // Calculate what the largest change in any individual coordinate
             // would be (translation along line * gradient along line)
             maxdelta = 0;
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
             {
-                delta = c * s[i];
+                real delta = c * s[i];
                 if (delta > maxdelta)
                 {
                     maxdelta = delta;
@@ -2251,18 +2357,19 @@ void LegacySimulator::do_lbfgs()
 
         // Take a trial step and move the coordinate array xc[] to position C
         real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             xc[i] = lastx[i] + c * s[i];
         }
 
         neval++;
         // Calculate energy for the trial step in position C
-        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
+        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE, step);
 
         // Calc line gradient in position C
-        real* fc = static_cast<real*>(sc->f.view().force()[0]);
-        for (gpc = 0, i = 0; i < n; i++)
+        real*  fc  = static_cast<real*>(sc->f.view().force()[0]);
+        double gpc = 0;
+        for (int i = 0; i < n; i++)
         {
             gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
         }
@@ -2275,11 +2382,11 @@ void LegacySimulator::do_lbfgs()
         // This is the max amount of increase in energy we tolerate.
         // By allowing VERY small changes (close to numerical precision) we
         // frequently find even better (lower) final energies.
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
+        double tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
 
         // Accept the step if the energy is lower in the new position C (compared to A),
         // or if it is not significantly higher and the line derivative is still negative.
-        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
+        bool foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
         // If true, great, we found a better energy. We no longer try to alter the
         // stepsize, but simply accept this new better position. The we select a new
         // search direction instead, which will be much more efficient than continuing
@@ -2295,6 +2402,7 @@ void LegacySimulator::do_lbfgs()
         // than with the stepsize, so no need to modify it. For the next search direction
         // it will be reset to 1/fnorm anyway.
 
+        double step_taken;
         if (!foundlower)
         {
             // OK, if we didn't find a lower value we will have to locate one now - there must
@@ -2305,14 +2413,15 @@ void LegacySimulator::do_lbfgs()
             // I also have a safeguard for potentially really pathological functions so we never
             // take more than 20 steps before we give up.
             // If we already found a lower value we just skip this step and continue to the update.
-            real fnorm = 0;
-            nminstep   = 0;
+            real fnorm    = 0;
+            int  nminstep = 0;
             do
             {
                 // Select a new trial point B in the interval [A,C].
                 // If the derivatives at points a & c have different sign we interpolate to zero,
                 // otherwise just do a bisection since there might be multiple minima/maxima
                 // inside the interval.
+                real b;
                 if (gpa < 0 && gpc > 0)
                 {
                     b = a + gpa * (a - c) / (gpc - gpa);
@@ -2332,19 +2441,20 @@ void LegacySimulator::do_lbfgs()
 
                 // Take a trial step to point B
                 real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
-                for (i = 0; i < n; i++)
+                for (int i = 0; i < n; i++)
                 {
                     xb[i] = lastx[i] + b * s[i];
                 }
 
                 neval++;
                 // Calculate energy for the trial step in point B
-                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
+                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE, step);
                 fnorm = sb->fnorm;
 
                 // Calculate gradient in point B
-                real* fb = static_cast<real*>(sb->f.view().force()[0]);
-                for (gpb = 0, i = 0; i < n; i++)
+                real*  fb  = static_cast<real*>(sb->f.view().force()[0]);
+                double gpb = 0;
+                for (int i = 0; i < n; i++)
                 {
                     gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
                 }
@@ -2388,7 +2498,7 @@ void LegacySimulator::do_lbfgs()
                 if (ncorr == 0)
                 {
                     /* Converged */
-                    converged = TRUE;
+                    converged = true;
                     break;
                 }
                 else
@@ -2396,7 +2506,7 @@ void LegacySimulator::do_lbfgs()
                     /* Reset memory */
                     ncorr = 0;
                     /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
+                    for (int i = 0; i < n; i++)
                     {
                         dx[point][i] = ff[i];
                     }
@@ -2439,21 +2549,21 @@ void LegacySimulator::do_lbfgs()
             ncorr++;
         }
 
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             dg[point][i] = lastf[i] - ff[i];
             dx[point][i] *= step_taken;
         }
 
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
+        real dgdg = 0;
+        real dgdx = 0;
+        for (int i = 0; i < n; i++)
         {
             dgdg += dg[point][i] * dg[point][i];
             dgdx += dg[point][i] * dx[point][i];
         }
 
-        diag = dgdx / dgdg;
+        const real diag = dgdx / dgdg;
 
         rho[point] = 1.0 / dgdx;
         point++;
@@ -2464,15 +2574,15 @@ void LegacySimulator::do_lbfgs()
         }
 
         /* Update */
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             p[i] = ff[i];
         }
 
-        cp = point;
+        int cp = point;
 
         /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
         {
             cp--;
             if (cp < 0)
@@ -2480,38 +2590,38 @@ void LegacySimulator::do_lbfgs()
                 cp = ncorr - 1;
             }
 
-            sq = 0;
-            for (i = 0; i < n; i++)
+            real sq = 0;
+            for (int i = 0; i < n; i++)
             {
                 sq += dx[cp][i] * p[i];
             }
 
             alpha[cp] = rho[cp] * sq;
 
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
             {
                 p[i] -= alpha[cp] * dg[cp][i];
             }
         }
 
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             p[i] *= diag;
         }
 
         /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
         {
-            yr = 0;
-            for (i = 0; i < n; i++)
+            real yr = 0;
+            for (int i = 0; i < n; i++)
             {
                 yr += p[i] * dg[cp][i];
             }
 
-            beta = rho[cp] * yr;
-            beta = alpha[cp] - beta;
+            real beta = rho[cp] * yr;
+            beta      = alpha[cp] - beta;
 
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
             {
                 p[i] += beta * dx[cp][i];
             }
@@ -2523,7 +2633,7 @@ void LegacySimulator::do_lbfgs()
             }
         }
 
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             if (!frozen[i])
             {
@@ -2558,12 +2668,9 @@ void LegacySimulator::do_lbfgs()
                                              mdatoms->tmass,
                                              enerd,
                                              nullptr,
-                                             nullptr,
                                              nullBox,
                                              PTCouplingArrays(),
                                              0,
-                                             nullptr,
-                                             nullptr,
                                              vir,
                                              pres,
                                              nullptr,
@@ -2591,7 +2698,7 @@ void LegacySimulator::do_lbfgs()
         }
 
         /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
+        if (imdSession->run(step, TRUE, state_global->box, state_global->x, 0) && MASTER(cr))
         {
             imdSession->sendPositionsAndEnergies();
         }
@@ -2603,7 +2710,7 @@ void LegacySimulator::do_lbfgs()
          * If we have reached machine precision, converged is already set to true.
          */
         converged = converged || (ems.fmax < inputrec->em_tol);
-
+        observablesReducer.markAsReadyToReduce();
     } /* End of the loop */
 
     if (converged)
@@ -2652,8 +2759,8 @@ void LegacySimulator::do_lbfgs()
      * However, we should only do it if we did NOT already write this step
      * above (which we did if do_x or do_f was true).
      */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
+    const bool do_x = !do_per_step(step, inputrec->nstxout);
+    const bool do_f = !do_per_step(step, inputrec->nstfout);
     write_em_traj(
             fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, &ems, state_global, observablesHistory);
 
@@ -2675,7 +2782,6 @@ void LegacySimulator::do_lbfgs()
 void LegacySimulator::do_steep()
 {
     const char*       SD = "Steepest Descents";
-    gmx_localtop_t    top(top_global->ffparams);
     gmx_global_stat_t gstat;
     real              stepsize;
     real              ustep;
@@ -2685,7 +2791,7 @@ void LegacySimulator::do_steep()
     int               nsteps;
     int               count          = 0;
     int               steps_accepted = 0;
-    auto              mdatoms        = mdAtoms->mdatoms();
+    auto*             mdatoms        = mdAtoms->mdatoms();
 
     GMX_LOG(mdlog.info)
             .asParagraph()
@@ -2700,6 +2806,8 @@ void LegacySimulator::do_steep()
     em_state_t* s_min = &s0;
     em_state_t* s_try = &s1;
 
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
     /* Init em and store the local state in s_try */
     init_em(fplog,
             mdlog,
@@ -2711,7 +2819,7 @@ void LegacySimulator::do_steep()
             state_global,
             top_global,
             s_try,
-            &top,
+            top,
             nrnb,
             fr,
             mdAtoms,
@@ -2726,7 +2834,7 @@ void LegacySimulator::do_steep()
                                    mdrunOptions,
                                    cr,
                                    outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                    inputrec,
                                    top_global,
                                    nullptr,
@@ -2736,13 +2844,13 @@ void LegacySimulator::do_steep()
                                    ms);
     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                    top_global,
-                                   inputrec,
+                                   *inputrec,
                                    pull_work,
                                    nullptr,
                                    false,
                                    StartingBehavior::NewSimulation,
                                    simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
 
     /* Print to log file  */
     print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
@@ -2765,9 +2873,25 @@ void LegacySimulator::do_steep()
     {
         sp_header(fplog, SD, inputrec->em_tol, nsteps);
     }
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
 
     /**** HERE STARTS THE LOOP ****
      * count is the counter for the number of steps
@@ -2792,7 +2916,7 @@ void LegacySimulator::do_steep()
 
         if (validStep)
         {
-            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
+            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0, count);
         }
         else
         {
@@ -2836,12 +2960,9 @@ void LegacySimulator::do_steep()
                                                  mdatoms->tmass,
                                                  enerd,
                                                  nullptr,
-                                                 nullptr,
                                                  nullBox,
                                                  PTCouplingArrays(),
                                                  0,
-                                                 nullptr,
-                                                 nullptr,
                                                  vir,
                                                  pres,
                                                  nullptr,
@@ -2890,7 +3011,7 @@ void LegacySimulator::do_steep()
             /* If energy is not smaller make the step smaller...  */
             ustep *= 0.5;
 
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+            if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
             {
                 /* Reload the old state */
                 em_dd_partition_system(fplog,
@@ -2902,7 +3023,7 @@ void LegacySimulator::do_steep()
                                        imdSession,
                                        pull_work,
                                        s_min,
-                                       &top,
+                                       top,
                                        mdAtoms,
                                        fr,
                                        vsite,
@@ -2940,7 +3061,7 @@ void LegacySimulator::do_steep()
         if (imdSession->run(count,
                             TRUE,
                             MASTER(cr) ? state_global->box : nullptr,
-                            MASTER(cr) ? state_global->x.rvec_array() : nullptr,
+                            MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>(),
                             0)
             && MASTER(cr))
         {
@@ -2948,6 +3069,7 @@ void LegacySimulator::do_steep()
         }
 
         count++;
+        observablesReducer.markAsReadyToReduce();
     } /* End of the loop  */
 
     /* Print some data...  */
@@ -2992,7 +3114,6 @@ void LegacySimulator::do_nm()
 {
     const char*         NM = "Normal Mode Analysis";
     int                 nnodes;
-    gmx_localtop_t      top(top_global->ffparams);
     gmx_global_stat_t   gstat;
     tensor              vir, pres;
     rvec                mu_tot = { 0 };
@@ -3003,11 +3124,11 @@ void LegacySimulator::do_nm()
     real*               full_matrix   = nullptr;
 
     /* added with respect to mdrun */
-    int  row, col;
-    real der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
-    real x_min;
-    bool bIsMaster = MASTER(cr);
-    auto mdatoms   = mdAtoms->mdatoms();
+    int   row, col;
+    real  der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
+    real  x_min;
+    bool  bIsMaster = MASTER(cr);
+    auto* mdatoms   = mdAtoms->mdatoms();
 
     GMX_LOG(mdlog.info)
             .asParagraph()
@@ -3028,6 +3149,9 @@ void LegacySimulator::do_nm()
 
     em_state_t state_work{};
 
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
     /* Init em and store the local state in state_minimum */
     init_em(fplog,
             mdlog,
@@ -3039,7 +3163,7 @@ void LegacySimulator::do_nm()
             state_global,
             top_global,
             &state_work,
-            &top,
+            top,
             nrnb,
             fr,
             mdAtoms,
@@ -3054,7 +3178,7 @@ void LegacySimulator::do_nm()
                                    mdrunOptions,
                                    cr,
                                    outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                    inputrec,
                                    top_global,
                                    nullptr,
@@ -3132,7 +3256,7 @@ void LegacySimulator::do_nm()
     {
         fprintf(stderr,
                 "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
-                *(top_global->name),
+                *(top_global.name),
                 inputrec->nsteps);
     }
 
@@ -3140,10 +3264,26 @@ void LegacySimulator::do_nm()
 
     /* Make evaluate_energy do a single node force calculation */
     cr->nnodes = 1;
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
-    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE, 0);
     cr->nnodes = nnodes;
 
     /* if forces are not small, warn user */
@@ -3211,7 +3351,7 @@ void LegacySimulator::do_nm()
                                         pull_work,
                                         bNS,
                                         force_flags,
-                                        &top,
+                                        top,
                                         constr,
                                         enerd,
                                         state_work.s.natoms,
@@ -3222,7 +3362,8 @@ void LegacySimulator::do_nm()
                                         &state_work.s.hist,
                                         &state_work.f.view(),
                                         vir,
-                                        mdatoms,
+                                        *mdatoms,
+                                        fr->longRangeNonbondeds.get(),
                                         nrnb,
                                         wcycle,
                                         shellfc,
@@ -3237,7 +3378,7 @@ void LegacySimulator::do_nm()
                 }
                 else
                 {
-                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
+                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE, step);
                 }
 
                 cr->nnodes = nnodes;