Add lambda state to dhdl.xvg with AWH fep

[alexxy/gromacs.git] / src / gromacs / mdrun / minimize.cpp
diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp

index ab29f1e3155af3afff16e6f3f440d9dfac824b41..eec144faf42604fa6ae6a41a0607abc4bf580253 100644 (file)
--- a/src/gromacs/mdrun/minimize.cpp
+++ b/src/gromacs/mdrun/minimize.cpp
@@ -3,7 +3,8 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team.
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +52,7 @@
  #include <ctime>
  
  #include <algorithm>
+#include <limits>
  #include <vector>
  
  #include "gromacs/commandline/filenm.h"
@@ -60,22 +62,24 @@
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mtxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/imd/imd.h"
  #include "gromacs/linearalgebra/sparsematrix.h"
-#include "gromacs/listed_forces/manage_threading.h"
+#include "gromacs/listed_forces/listed_forces.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/mdlib/constr.h"
+#include "gromacs/mdlib/coupling.h"
  #include "gromacs/mdlib/dispersioncorrection.h"
  #include "gromacs/mdlib/ebin.h"
  #include "gromacs/mdlib/enerdata_utils.h"
  #include "gromacs/mdlib/energyoutput.h"
  #include "gromacs/mdlib/force.h"
+#include "gromacs/mdlib/force_flags.h"
  #include "gromacs/mdlib/forcerec.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/md_support.h"
@@ -87,12 +91,17 @@
  #include "gromacs/mdlib/vsite.h"
  #include "gromacs/mdrunutility/handlerestart.h"
  #include "gromacs/mdrunutility/printtime.h"
+#include "gromacs/mdtypes/checkpointdata.h"
  #include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/forcebuffers.h"
+#include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdatom.h"
  #include "gromacs/mdtypes/mdrunoptions.h"
+#include "gromacs/mdtypes/observablesreducer.h"
  #include "gromacs/mdtypes/state.h"
-#include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/timing/walltime_accounting.h"
@@ -107,47 +116,50 @@
  #include "legacysimulator.h"
  #include "shellfc.h"
  
+using gmx::ArrayRef;
  using gmx::MdrunScheduleWorkload;
+using gmx::RVec;
+using gmx::VirtualSitesHandler;
  
  //! Utility structure for manipulating states during EM
-typedef struct {
+typedef struct em_state
+{
      //! Copy of the global state
-    t_state                     s;
+    t_state s;
      //! Force array
-    PaddedHostVector<gmx::RVec> f;
+    gmx::ForceBuffers f;
      //! Potential energy
-    real                        epot;
+    real epot;
      //! Norm of the force
-    real                        fnorm;
+    real fnorm;
      //! Maximum force
-    real                        fmax;
+    real fmax;
      //! Direction
-    int                         a_fmax;
+    int a_fmax;
  } em_state_t;
  
  //! Print the EM starting conditions
-static void print_em_start(FILE                     *fplog,
-                           const t_commrec          *cr,
+static void print_em_start(FILE*                     fplog,
+                           const t_commrec*          cr,
                             gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
-                           const char               *name)
+                           gmx_wallcycle*            wcycle,
+                           const char*               name)
  {
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, name);
  }
  
  //! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
-                        gmx_wallcycle_t           wcycle)
+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle* wcycle)
  {
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle, WallCycleCounter::Run);
  
      walltime_accounting_end_time(walltime_accounting);
  }
  
  //! Printing a log file and console header
-static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
+static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps)
  {
      fprintf(out, "\n");
      fprintf(out, "%s:\n", minimizer);
@@ -156,11 +168,7 @@ static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
  }
  
  //! Print warning message
-static void warn_step(FILE     *fp,
-                      real      ftol,
-                      real      fmax,
-                      gmx_bool  bLastStep,
-                      gmx_bool  bConstrain)
+static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain)
  {
      constexpr bool realIsDouble = GMX_DOUBLE;
      char           buffer[2048];
@@ -173,17 +181,17 @@ static void warn_step(FILE     *fp,
                  "atoms are overlapping. Modify the input coordinates to "
                  "remove atom overlap or use soft-core potentials with "
                  "the free energy code to avoid infinite forces.\n%s",
-                !realIsDouble ?
-                "You could also be lucky that switching to double precision "
-                "is sufficient to obtain finite forces.\n" :
-                "");
+                !realIsDouble ? "You could also be lucky that switching to double precision "
+                                "is sufficient to obtain finite forces.\n"
+                              : "");
      }
      else if (bLastStep)
      {
          sprintf(buffer,
                  "\nEnergy minimization reached the maximum number "
                  "of steps before the forces reached the requested "
-                "precision Fmax < %g.\n", ftol);
+                "precision Fmax < %g.\n",
+                ftol);
      }
      else
      {
@@ -197,15 +205,13 @@ static void warn_step(FILE     *fp,
                  "converged to within the available machine precision, "
                  "given your starting configuration and EM parameters.\n%s%s",
                  ftol,
-                !realIsDouble ?
-                "\nDouble precision normally gives you higher accuracy, but "
-                "this is often not needed for preparing to run molecular "
-                "dynamics.\n" :
-                "",
-                bConstrain ?
-                "You might need to increase your constraint accuracy, or turn\n"
-                "off constraints altogether (set constraints = none in mdp file)\n" :
-                "");
+                !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but "
+                                "this is often not needed for preparing to run molecular "
+                                "dynamics.\n"
+                              : "",
+                bConstrain ? "You might need to increase your constraint accuracy, or turn\n"
+                             "off constraints altogether (set constraints = none in mdp file)\n"
+                           : "");
      }
  
      fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
@@ -213,44 +219,54 @@ static void warn_step(FILE     *fp,
  }
  
  //! Print message about convergence of the EM
-static void print_converged(FILE *fp, const char *alg, real ftol,
-                            int64_t count, gmx_bool bDone, int64_t nsteps,
-                            const em_state_t *ems, double sqrtNumAtoms)
+static void print_converged(FILE*             fp,
+                            const char*       alg,
+                            real              ftol,
+                            int64_t           count,
+                            gmx_bool          bDone,
+                            int64_t           nsteps,
+                            const em_state_t* ems,
+                            double            sqrtNumAtoms)
  {
      char buf[STEPSTRSIZE];
  
      if (bDone)
      {
-        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
-                alg, ftol, gmx_step_str(count, buf));
+        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf));
      }
      else if (count < nsteps)
      {
-        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
+        fprintf(fp,
+                "\n%s converged to machine precision in %s steps,\n"
                  "but did not reach the requested Fmax < %g.\n",
-                alg, gmx_step_str(count, buf), ftol);
+                alg,
+                gmx_step_str(count, buf),
+                ftol);
      }
      else
      {
-        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
-                alg, ftol, gmx_step_str(count, buf));
+        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol, gmx_step_str(count, buf));
      }
  
  #if GMX_DOUBLE
      fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
      fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm/sqrtNumAtoms);
+    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm / sqrtNumAtoms);
  #else
      fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
      fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm/sqrtNumAtoms);
+    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm / sqrtNumAtoms);
  #endif
  }
  
  //! Compute the norm and max of the force array in parallel
-static void get_f_norm_max(const t_commrec *cr,
-                           t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f,
-                           real *fnorm, real *fmax, int *a_fmax)
+static void get_f_norm_max(const t_commrec*               cr,
+                           const t_grpopts*               opts,
+                           t_mdatoms*                     mdatoms,
+                           gmx::ArrayRef<const gmx::RVec> f,
+                           real*                          fnorm,
+                           real*                          fmax,
+                           int*                           a_fmax)
  {
      double fnorm2, *sum;
      real   fmax2, fam;
@@ -289,7 +305,7 @@ static void get_f_norm_max(const t_commrec *cr,
      {
          for (i = start; i < end; i++)
          {
-            fam     = norm2(f[i]);
+            fam = norm2(f[i]);
              fnorm2 += fam;
              if (fam > fmax2)
              {
@@ -299,7 +315,7 @@ static void get_f_norm_max(const t_commrec *cr,
          }
      }
  
-    if (la_max >= 0 && DOMAINDECOMP(cr))
+    if (la_max >= 0 && haveDDAtomOrdering(*cr))
      {
          a_max = cr->dd->globalAtomIndices[la_max];
      }
@@ -309,19 +325,19 @@ static void get_f_norm_max(const t_commrec *cr,
      }
      if (PAR(cr))
      {
-        snew(sum, 2*cr->nnodes+1);
-        sum[2*cr->nodeid]   = fmax2;
-        sum[2*cr->nodeid+1] = a_max;
-        sum[2*cr->nnodes]   = fnorm2;
-        gmx_sumd(2*cr->nnodes+1, sum, cr);
-        fnorm2 = sum[2*cr->nnodes];
+        snew(sum, 2 * cr->nnodes + 1);
+        sum[2 * cr->nodeid]     = fmax2;
+        sum[2 * cr->nodeid + 1] = a_max;
+        sum[2 * cr->nnodes]     = fnorm2;
+        gmx_sumd(2 * cr->nnodes + 1, sum, cr);
+        fnorm2 = sum[2 * cr->nnodes];
          /* Determine the global maximum */
          for (i = 0; i < cr->nnodes; i++)
          {
-            if (sum[2*i] > fmax2)
+            if (sum[2 * i] > fmax2)
              {
-                fmax2 = sum[2*i];
-                a_max = gmx::roundToInt(sum[2*i+1]);
+                fmax2 = sum[2 * i];
+                a_max = gmx::roundToInt(sum[2 * i + 1]);
              }
          }
          sfree(sum);
@@ -333,7 +349,7 @@ static void get_f_norm_max(const t_commrec *cr,
      }
      if (fmax)
      {
-        *fmax  = sqrt(fmax2);
+        *fmax = sqrt(fmax2);
      }
      if (a_fmax)
      {
@@ -342,28 +358,30 @@ static void get_f_norm_max(const t_commrec *cr,
  }
  
  //! Compute the norm of the force
-static void get_state_f_norm_max(const t_commrec *cr,
-                                 t_grpopts *opts, t_mdatoms *mdatoms,
-                                 em_state_t *ems)
+static void get_state_f_norm_max(const t_commrec* cr, const t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems)
  {
-    get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(),
-                   &ems->fnorm, &ems->fmax, &ems->a_fmax);
+    get_f_norm_max(cr, opts, mdatoms, ems->f.view().force(), &ems->fnorm, &ems->fmax, &ems->a_fmax);
  }
  
  //! Initialize the energy minimization
-static void init_em(FILE *fplog,
-                    const gmx::MDLogger &mdlog,
-                    const char *title,
-                    const t_commrec *cr,
-                    t_inputrec *ir,
-                    gmx::ImdSession *imdSession,
-                    pull_t *pull_work,
-                    t_state *state_global, gmx_mtop_t *top_global,
-                    em_state_t *ems, gmx_localtop_t *top,
-                    t_nrnb *nrnb,
-                    t_forcerec *fr,
-                    t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat,
-                    gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc)
+static void init_em(FILE*                fplog,
+                    const gmx::MDLogger& mdlog,
+                    const char*          title,
+                    const t_commrec*     cr,
+                    const t_inputrec*    ir,
+                    gmx::ImdSession*     imdSession,
+                    pull_t*              pull_work,
+                    t_state*             state_global,
+                    const gmx_mtop_t&    top_global,
+                    em_state_t*          ems,
+                    gmx_localtop_t*      top,
+                    t_nrnb*              nrnb,
+                    t_forcerec*          fr,
+                    gmx::MDAtoms*        mdAtoms,
+                    gmx_global_stat_t*   gstat,
+                    VirtualSitesHandler* vsite,
+                    gmx::Constraints*    constr,
+                    gmx_shellfc_t**      shellfc)
  {
      real dvdl_constr;
  
@@ -376,9 +394,19 @@ static void init_em(FILE *fplog,
      {
          state_global->ngtc = 0;
      }
-    initialize_lambdas(fplog, *ir, MASTER(cr), &(state_global->fep_state), state_global->lambda, nullptr);
+    int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
+    gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
+    initialize_lambdas(fplog,
+                       ir->efep,
+                       ir->bSimTemp,
+                       *ir->fepvals,
+                       ir->simtempvals->temperatures,
+                       gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
+                       MASTER(cr),
+                       fep_state,
+                       lambda);
  
-    if (ir->eI == eiNM)
+    if (ir->eI == IntegrationAlgorithm::NM)
      {
          GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
  
@@ -386,11 +414,14 @@ static void init_em(FILE *fplog,
                                        top_global,
                                        constr ? constr->numFlexibleConstraints() : 0,
                                        ir->nstcalcenergy,
-                                      DOMAINDECOMP(cr));
+                                      haveDDAtomOrdering(*cr),
+                                      thisRankHasDuty(cr, DUTY_PME));
      }
      else
      {
-        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support");
+        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI),
+                   "This else currently only handles energy minimizers, consider if your algorithm "
+                   "needs shell/flexible-constraint support");
  
          /* With energy minimization, shells and flexible constraints are
           * automatically minimized when treated like normal DOFS.
@@ -401,23 +432,34 @@ static void init_em(FILE *fplog,
          }
      }
  
-    auto mdatoms = mdAtoms->mdatoms();
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
      {
-        top->useInDomainDecomp_ = true;
-        dd_init_local_top(*top_global, top);
-
-        dd_init_local_state(cr->dd, state_global, &ems->s);
+        // Local state only becomes valid now.
+        dd_init_local_state(*cr->dd, state_global, &ems->s);
  
          /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
-                            state_global, *top_global, ir, imdSession, pull_work,
-                            &ems->s, &ems->f, mdAtoms, top,
-                            fr, vsite, constr,
-                            nrnb, nullptr, FALSE);
-        dd_store_state(cr->dd, &ems->s);
-
-        *graph = nullptr;
+        dd_partition_system(fplog,
+                            mdlog,
+                            ir->init_step,
+                            cr,
+                            TRUE,
+                            1,
+                            state_global,
+                            top_global,
+                            *ir,
+                            imdSession,
+                            pull_work,
+                            &ems->s,
+                            &ems->f,
+                            mdAtoms,
+                            top,
+                            fr,
+                            vsite,
+                            constr,
+                            nrnb,
+                            nullptr,
+                            FALSE);
+        dd_store_state(*cr->dd, &ems->s);
      }
      else
      {
@@ -425,42 +467,46 @@ static void init_em(FILE *fplog,
          /* Just copy the state */
          ems->s = *state_global;
          state_change_natoms(&ems->s, ems->s.natoms);
-        ems->f.resizeWithPadding(ems->s.natoms);
-
-        mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr,
-                                  graph, mdAtoms,
-                                  constr, vsite, shellfc ? *shellfc : nullptr);
  
-        if (vsite)
-        {
-            set_vsite_top(vsite, top, mdatoms);
-        }
+        mdAlgorithmsSetupAtomData(
+                cr, *ir, top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
      }
  
-    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);
+    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[FreeEnergyPerturbationCouplingType::Mass]);
  
      if (constr)
      {
          // TODO how should this cross-module support dependency be managed?
-        if (ir->eConstrAlg == econtSHAKE &&
-            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
+        if (ir->eConstrAlg == ConstraintAlgorithm::Shake && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
          {
-            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
-                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
+            gmx_fatal(FARGS,
+                      "Can not do energy minimization with %s, use %s\n",
+                      enumValueToString(ConstraintAlgorithm::Shake),
+                      enumValueToString(ConstraintAlgorithm::Lincs));
          }
  
          if (!ir->bContinuation)
          {
              /* Constrain the starting coordinates */
-            dvdl_constr = 0;
-            constr->apply(TRUE, TRUE,
-                          -1, 0, 1.0,
-                          ems->s.x.rvec_array(),
-                          ems->s.x.rvec_array(),
-                          nullptr,
+            bool needsLogging  = true;
+            bool computeEnergy = true;
+            bool computeVirial = false;
+            dvdl_constr        = 0;
+            constr->apply(needsLogging,
+                          computeEnergy,
+                          -1,
+                          0,
+                          1.0,
+                          ems->s.x.arrayRefWithPadding(),
+                          ems->s.x.arrayRefWithPadding(),
+                          ArrayRef<RVec>(),
                            ems->s.box,
-                          ems->s.lambda[efptFEP], &dvdl_constr,
-                          nullptr, nullptr, gmx::ConstraintVariable::Positions);
+                          ems->s.lambda[FreeEnergyPerturbationCouplingType::Fep],
+                          &dvdl_constr,
+                          gmx::ArrayRefWithPadding<RVec>(),
+                          computeVirial,
+                          nullptr,
+                          gmx::ConstraintVariable::Positions);
          }
      }
  
@@ -477,9 +523,10 @@ static void init_em(FILE *fplog,
  }
  
  //! Finalize the minimization
-static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf,
+static void finish_em(const t_commrec*          cr,
+                      gmx_mdoutf_t              outf,
                        gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t wcycle)
+                      gmx_wallcycle*            wcycle)
  {
      if (!thisRankHasDuty(cr, DUTY_PME))
      {
@@ -493,9 +540,9 @@ static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf,
  }
  
  //! Swap two different EM states during minimization
-static void swap_em_state(em_state_t **ems1, em_state_t **ems2)
+static void swap_em_state(em_state_t** ems1, em_state_t** ems2)
  {
-    em_state_t *tmp;
+    em_state_t* tmp;
  
      tmp   = *ems1;
      *ems1 = *ems2;
@@ -503,14 +550,18 @@ static void swap_em_state(em_state_t **ems1, em_state_t **ems2)
  }
  
  //! Save the EM trajectory
-static void write_em_traj(FILE *fplog, const t_commrec *cr,
-                          gmx_mdoutf_t outf,
-                          gmx_bool bX, gmx_bool bF, const char *confout,
-                          gmx_mtop_t *top_global,
-                          t_inputrec *ir, int64_t step,
-                          em_state_t *state,
-                          t_state *state_global,
-                          ObservablesHistory *observablesHistory)
+static void write_em_traj(FILE*               fplog,
+                          const t_commrec*    cr,
+                          gmx_mdoutf_t        outf,
+                          gmx_bool            bX,
+                          gmx_bool            bF,
+                          const char*         confout,
+                          const gmx_mtop_t&   top_global,
+                          const t_inputrec*   ir,
+                          int64_t             step,
+                          em_state_t*         state,
+                          t_state*            state_global,
+                          ObservablesHistory* observablesHistory)
  {
      int mdof_flags = 0;
  
@@ -529,20 +580,30 @@ static void write_em_traj(FILE *fplog, const t_commrec *cr,
          mdof_flags |= MDOF_IMD;
      }
  
-    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                     top_global->natoms, step, static_cast<double>(step),
-                                     &state->s, state_global, observablesHistory,
-                                     state->f);
+    gmx::WriteCheckpointDataHolder checkpointDataHolder;
+    mdoutf_write_to_trajectory_files(fplog,
+                                     cr,
+                                     outf,
+                                     mdof_flags,
+                                     top_global.natoms,
+                                     step,
+                                     static_cast<double>(step),
+                                     &state->s,
+                                     state_global,
+                                     observablesHistory,
+                                     state->f.view().force(),
+                                     &checkpointDataHolder);
  
      if (confout != nullptr)
      {
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
          {
              /* If bX=true, x was collected to state_global in the call above */
              if (!bX)
              {
                  auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
-                dd_collect_vec(cr->dd, &state->s, state->s.x, globalXRef);
+                dd_collect_vec(
+                        cr->dd, state->s.ddp_count, state->s.ddp_count_cg_gl, state->s.cg_gl, state->s.x, globalXRef);
              }
          }
          else
@@ -553,16 +614,19 @@ static void write_em_traj(FILE *fplog, const t_commrec *cr,
  
          if (MASTER(cr))
          {
-            if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
+            if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && haveDDAtomOrdering(*cr))
              {
                  /* Make molecules whole only for confout writing */
-                do_pbc_mtop(ir->ePBC, state->s.box, top_global,
-                            state_global->x.rvec_array());
+                do_pbc_mtop(ir->pbcType, state->s.box, &top_global, state_global->x.rvec_array());
              }
  
              write_sto_conf_mtop(confout,
-                                *top_global->name, top_global,
-                                state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box);
+                                *top_global.name,
+                                top_global,
+                                state_global->x.rvec_array(),
+                                nullptr,
+                                ir->pbcType,
+                                state->s.box);
          }
      }
  }
@@ -570,25 +634,28 @@ static void write_em_traj(FILE *fplog, const t_commrec *cr,
  //! \brief Do one minimization step
  //
  // \returns true when the step succeeded, false when a constraint error occurred
-static bool do_em_step(const t_commrec *cr,
-                       t_inputrec *ir, t_mdatoms *md,
-                       em_state_t *ems1, real a, const PaddedHostVector<gmx::RVec> *force,
-                       em_state_t *ems2,
-                       gmx::Constraints *constr,
-                       int64_t count)
+static bool do_em_step(const t_commrec*                          cr,
+                       const t_inputrec*                         ir,
+                       t_mdatoms*                                md,
+                       em_state_t*                               ems1,
+                       real                                      a,
+                       gmx::ArrayRefWithPadding<const gmx::RVec> force,
+                       em_state_t*                               ems2,
+                       gmx::Constraints*                         constr,
+                       int64_t                                   count)
  
  {
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
-    int      nthreads gmx_unused;
+    t_state *    s1, *s2;
+    int          start, end;
+    real         dvdl_constr;
+    int nthreads gmx_unused;
  
-    bool     validStep = true;
+    bool validStep = true;
  
      s1 = &ems1->s;
      s2 = &ems2->s;
  
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
+    if (haveDDAtomOrdering(*cr) && s1->ddp_count != cr->dd->ddp_count)
      {
          gmx_incons("state mismatch in do_em_step");
      }
@@ -598,9 +665,9 @@ static bool do_em_step(const t_commrec *cr,
      if (s2->natoms != s1->natoms)
      {
          state_change_natoms(s2, s1->natoms);
-        ems2->f.resizeWithPadding(s2->natoms);
+        ems2->f.resize(s2->natoms);
      }
-    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
+    if (haveDDAtomOrdering(*cr) && s2->cg_gl.size() != s1->cg_gl.size())
      {
          s2->cg_gl.resize(s1->cg_gl.size());
      }
@@ -613,14 +680,14 @@ static bool do_em_step(const t_commrec *cr,
      start = 0;
      end   = md->homenr;
  
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
+    nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
  #pragma omp parallel num_threads(nthreads)
      {
-        const rvec *x1 = s1->x.rvec_array();
-        rvec       *x2 = s2->x.rvec_array();
-        const rvec *f  = force->rvec_array();
+        const rvec* x1 = s1->x.rvec_array();
+        rvec*       x2 = s2->x.rvec_array();
+        const rvec* f  = as_rvec_array(force.unpaddedArrayRef().data());
  
-        int         gf = 0;
+        int gf = 0;
  #pragma omp for schedule(static) nowait
          for (int i = start; i < end; i++)
          {
@@ -638,18 +705,18 @@ static bool do_em_step(const t_commrec *cr,
                      }
                      else
                      {
-                        x2[i][m] = x1[i][m] + a*f[i][m];
+                        x2[i][m] = x1[i][m] + a * f[i][m];
                      }
                  }
              }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
          }
  
-        if (s2->flags & (1<<estCGP))
+        if (s2->flags & enumValueToBitMask(StateEntry::Cgp))
          {
              /* Copy the CG p vector */
-            const rvec *p1 = s1->cg_p.rvec_array();
-            rvec       *p2 = s2->cg_p.rvec_array();
+            const rvec* p1 = s1->cg_p.rvec_array();
+            rvec*       p2 = s2->cg_p.rvec_array();
  #pragma omp for schedule(static) nowait
              for (int i = start; i < end; i++)
              {
@@ -658,30 +725,41 @@ static bool do_em_step(const t_commrec *cr,
              }
          }
  
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
          {
-            s2->ddp_count = s1->ddp_count;
-
              /* OpenMP does not supported unsigned loop variables */
  #pragma omp for schedule(static) nowait
              for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++)
              {
                  s2->cg_gl[i] = s1->cg_gl[i];
              }
-            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
          }
      }
  
+    if (haveDDAtomOrdering(*cr))
+    {
+        s2->ddp_count       = s1->ddp_count;
+        s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
+    }
+
      if (constr)
      {
          dvdl_constr = 0;
-        validStep   =
-            constr->apply(TRUE, TRUE,
-                          count, 0, 1.0,
-                          s1->x.rvec_array(), s2->x.rvec_array(),
-                          nullptr, s2->box,
-                          s2->lambda[efptBONDED], &dvdl_constr,
-                          nullptr, nullptr, gmx::ConstraintVariable::Positions);
+        validStep   = constr->apply(TRUE,
+                                  TRUE,
+                                  count,
+                                  0,
+                                  1.0,
+                                  s1->x.arrayRefWithPadding(),
+                                  s2->x.arrayRefWithPadding(),
+                                  ArrayRef<RVec>(),
+                                  s2->box,
+                                  s2->lambda[FreeEnergyPerturbationCouplingType::Bonded],
+                                  &dvdl_constr,
+                                  gmx::ArrayRefWithPadding<RVec>(),
+                                  false,
+                                  nullptr,
+                                  gmx::ConstraintVariable::Positions);
  
          if (cr->nnodes > 1)
          {
@@ -691,14 +769,18 @@ static bool do_em_step(const t_commrec *cr,
               */
              int reductionBuffer = static_cast<int>(!validStep);
              gmx_sumi(1, &reductionBuffer, cr);
-            validStep           = (reductionBuffer == 0);
+            validStep = (reductionBuffer == 0);
          }
  
          // We should move this check to the different minimizers
-        if (!validStep && ir->eI != eiSteep)
+        if (!validStep && ir->eI != IntegrationAlgorithm::Steep)
          {
-            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
-                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
+            gmx_fatal(FARGS,
+                      "The coordinates could not be constrained. Minimizer '%s' can not handle "
+                      "constraint failures, use minimizer '%s' before using '%s'.",
+                      enumValueToString(ir->eI),
+                      enumValueToString(IntegrationAlgorithm::Steep),
+                      enumValueToString(ir->eI));
          }
      }
  
@@ -706,29 +788,98 @@ static bool do_em_step(const t_commrec *cr,
  }
  
  //! Prepare EM for using domain decomposition parallellization
-static void em_dd_partition_system(FILE *fplog,
-                                   const gmx::MDLogger &mdlog,
-                                   int step, const t_commrec *cr,
-                                   gmx_mtop_t *top_global, t_inputrec *ir,
-                                   gmx::ImdSession *imdSession,
-                                   pull_t *pull_work,
-                                   em_state_t *ems, gmx_localtop_t *top,
-                                   gmx::MDAtoms *mdAtoms, t_forcerec *fr,
-                                   gmx_vsite_t *vsite, gmx::Constraints *constr,
-                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
+static void em_dd_partition_system(FILE*                fplog,
+                                   const gmx::MDLogger& mdlog,
+                                   int                  step,
+                                   const t_commrec*     cr,
+                                   const gmx_mtop_t&    top_global,
+                                   const t_inputrec*    ir,
+                                   gmx::ImdSession*     imdSession,
+                                   pull_t*              pull_work,
+                                   em_state_t*          ems,
+                                   gmx_localtop_t*      top,
+                                   gmx::MDAtoms*        mdAtoms,
+                                   t_forcerec*          fr,
+                                   VirtualSitesHandler* vsite,
+                                   gmx::Constraints*    constr,
+                                   t_nrnb*              nrnb,
+                                   gmx_wallcycle*       wcycle)
  {
      /* Repartition the domain decomposition */
-    dd_partition_system(fplog, mdlog, step, cr, FALSE, 1,
-                        nullptr, *top_global, ir, imdSession, pull_work,
-                        &ems->s, &ems->f,
-                        mdAtoms, top, fr, vsite, constr,
-                        nrnb, wcycle, FALSE);
-    dd_store_state(cr->dd, &ems->s);
+    dd_partition_system(fplog,
+                        mdlog,
+                        step,
+                        cr,
+                        FALSE,
+                        1,
+                        nullptr,
+                        top_global,
+                        *ir,
+                        imdSession,
+                        pull_work,
+                        &ems->s,
+                        &ems->f,
+                        mdAtoms,
+                        top,
+                        fr,
+                        vsite,
+                        constr,
+                        nrnb,
+                        wcycle,
+                        FALSE);
+    dd_store_state(*cr->dd, &ems->s);
  }
  
  namespace
  {
  
+//! Copy coordinates, OpenMP parallelized, from \p refCoords to coords
+void setCoordinates(std::vector<RVec>* coords, ArrayRef<const RVec> refCoords)
+{
+    coords->resize(refCoords.size());
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(refCoords); i++)
+    {
+        (*coords)[i] = refCoords[i];
+    }
+}
+
+//! Returns the maximum difference an atom moved between two coordinate sets, over all ranks
+real maxCoordinateDifference(ArrayRef<const RVec> coords1, ArrayRef<const RVec> coords2, MPI_Comm mpiCommMyGroup)
+{
+    GMX_RELEASE_ASSERT(coords1.size() == coords2.size(), "Coordinate counts should match");
+
+    real maxDiffSquared = 0;
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for reduction(max : maxDiffSquared) num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(coords1); i++)
+    {
+        maxDiffSquared = std::max(maxDiffSquared, gmx::norm2(coords1[i] - coords2[i]));
+    }
+
+#if GMX_MPI
+    int numRanks = 1;
+    if (mpiCommMyGroup != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(mpiCommMyGroup, &numRanks);
+    }
+    if (numRanks > 1)
+    {
+        real maxDiffSquaredReduced;
+        MPI_Allreduce(
+                &maxDiffSquared, &maxDiffSquaredReduced, 1, GMX_DOUBLE ? MPI_DOUBLE : MPI_FLOAT, MPI_MAX, mpiCommMyGroup);
+        maxDiffSquared = maxDiffSquaredReduced;
+    }
+#else
+    GMX_UNUSED_VALUE(mpiCommMyGroup);
+#endif
+
+    return std::sqrt(maxDiffSquared);
+}
+
  /*! \brief Class to handle the work of setting and doing an energy evaluation.
   *
   * This class is a mere aggregate of parameters to pass to evaluate an
@@ -743,64 +894,61 @@ namespace
   * Use a braced initializer list to construct one of these. */
  class EnergyEvaluator
  {
-    public:
-        /*! \brief Evaluates an energy on the state in \c ems.
-         *
-         * \todo In practice, the same objects mu_tot, vir, and pres
-         * are always passed to this function, so we would rather have
-         * them as data members. However, their C-array types are
-         * unsuited for aggregate initialization. When the types
-         * improve, the call signature of this method can be reduced.
-         */
-        void run(em_state_t *ems, rvec mu_tot,
-                 tensor vir, tensor pres,
-                 int64_t count, gmx_bool bFirst);
-        //! Handles logging (deprecated).
-        FILE                    *fplog;
-        //! Handles logging.
-        const gmx::MDLogger     &mdlog;
-        //! Handles communication.
-        const t_commrec         *cr;
-        //! Coordinates multi-simulations.
-        const gmx_multisim_t    *ms;
-        //! Holds the simulation topology.
-        gmx_mtop_t              *top_global;
-        //! Holds the domain topology.
-        gmx_localtop_t          *top;
-        //! User input options.
-        t_inputrec              *inputrec;
-        //! The Interactive Molecular Dynamics session.
-        gmx::ImdSession         *imdSession;
-        //! The pull work object.
-        pull_t                  *pull_work;
-        //! Manages flop accounting.
-        t_nrnb                  *nrnb;
-        //! Manages wall cycle accounting.
-        gmx_wallcycle_t          wcycle;
-        //! Coordinates global reduction.
-        gmx_global_stat_t        gstat;
-        //! Handles virtual sites.
-        gmx_vsite_t             *vsite;
-        //! Handles constraints.
-        gmx::Constraints        *constr;
-        //! Handles strange things.
-        t_fcdata                *fcd;
-        //! Molecular graph for SHAKE.
-        t_graph                 *graph;
-        //! Per-atom data for this domain.
-        gmx::MDAtoms            *mdAtoms;
-        //! Handles how to calculate the forces.
-        t_forcerec              *fr;
-        //! Schedule of force-calculation work each step for this task.
-        MdrunScheduleWorkload   *runScheduleWork;
-        //! Stores the computed energies.
-        gmx_enerdata_t          *enerd;
+public:
+    /*! \brief Evaluates an energy on the state in \c ems.
+     *
+     * \todo In practice, the same objects mu_tot, vir, and pres
+     * are always passed to this function, so we would rather have
+     * them as data members. However, their C-array types are
+     * unsuited for aggregate initialization. When the types
+     * improve, the call signature of this method can be reduced.
+     */
+    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step);
+    //! Handles logging (deprecated).
+    FILE* fplog;
+    //! Handles logging.
+    const gmx::MDLogger& mdlog;
+    //! Handles communication.
+    const t_commrec* cr;
+    //! Coordinates multi-simulations.
+    const gmx_multisim_t* ms;
+    //! Holds the simulation topology.
+    const gmx_mtop_t& top_global;
+    //! Holds the domain topology.
+    gmx_localtop_t* top;
+    //! User input options.
+    const t_inputrec* inputrec;
+    //! The Interactive Molecular Dynamics session.
+    gmx::ImdSession* imdSession;
+    //! The pull work object.
+    pull_t* pull_work;
+    //! Manages flop accounting.
+    t_nrnb* nrnb;
+    //! Manages wall cycle accounting.
+    gmx_wallcycle* wcycle;
+    //! Legacy coordinator of global reduction.
+    gmx_global_stat_t gstat;
+    //! Coordinates reduction for observables
+    gmx::ObservablesReducer* observablesReducer;
+    //! Handles virtual sites.
+    VirtualSitesHandler* vsite;
+    //! Handles constraints.
+    gmx::Constraints* constr;
+    //! Per-atom data for this domain.
+    gmx::MDAtoms* mdAtoms;
+    //! Handles how to calculate the forces.
+    t_forcerec* fr;
+    //! Schedule of force-calculation work each step for this task.
+    MdrunScheduleWorkload* runScheduleWork;
+    //! Stores the computed energies.
+    gmx_enerdata_t* enerd;
+    //! The DD partitioning count at which the pair list was generated
+    int ddpCountPairSearch;
+    //! The local coordinates that were used for pair searching, stored for computing displacements
+    std::vector<RVec> pairSearchCoordinates;
  };
  
-void
-EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
-                     tensor vir, tensor pres,
-                     int64_t count, gmx_bool bFirst)
+void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step)
  {
      real     t;
      gmx_bool bNS;
@@ -811,50 +959,79 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
      /* Set the time to the initial time, the time does not change during EM */
      t = inputrec->init_t;
  
-    if (bFirst ||
-        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
+    if (vsite)
+    {
+        vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
+    }
+
+    // Compute the buffer size of the pair list
+    const real bufferSize = inputrec->rlist - std::max(inputrec->rcoulomb, inputrec->rvdw);
+
+    if (bFirst || bufferSize <= 0 || (haveDDAtomOrdering(*cr) && ems->s.ddp_count != ddpCountPairSearch))
      {
          /* This is the first state or an old state used before the last ns */
          bNS = TRUE;
      }
      else
      {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
+        // We need to generate a new pairlist when one atom moved more than half the buffer size
+        ArrayRef<const RVec> localCoordinates =
+                ArrayRef<const RVec>(ems->s.x).subArray(0, mdAtoms->mdatoms()->homenr);
+        bNS = 2 * maxCoordinateDifference(pairSearchCoordinates, localCoordinates, cr->mpi_comm_mygroup)
+              > bufferSize;
      }
  
-    if (vsite)
+    if (haveDDAtomOrdering(*cr) && bNS)
      {
-        construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr,
-                         top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
+        /* Repartition the domain decomposition */
+        em_dd_partition_system(
+                fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
+        ddpCountPairSearch = cr->dd->ddp_count;
      }
  
-    if (DOMAINDECOMP(cr) && bNS)
+    /* Store the local coordinates that will be used in the pair search, after we re-partitioned */
+    if (bufferSize > 0 && bNS)
      {
-        /* Repartition the domain decomposition */
-        em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession,
-                               pull_work,
-                               ems, top, mdAtoms, fr, vsite, constr,
-                               nrnb, wcycle);
+        ArrayRef<const RVec> localCoordinates =
+                constArrayRefFromArray(ems->s.x.data(), mdAtoms->mdatoms()->homenr);
+        setCoordinates(&pairSearchCoordinates, localCoordinates);
      }
  
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+
      /* Calc force & energy on new trial position  */
      /* do_force always puts the charge groups in the box and shifts again
       * We do not unshift, so molecules are always whole in congrad.c
       */
-    do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession,
+    do_force(fplog,
+             cr,
+             ms,
+             *inputrec,
+             nullptr,
+             nullptr,
+             imdSession,
               pull_work,
-             count, nrnb, wcycle, top,
-             ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist,
-             ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd,
-             ems->s.lambda, graph, fr, runScheduleWork, vsite, mu_tot, t, nullptr,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
-             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS : 0),
+             count,
+             nrnb,
+             wcycle,
+             top,
+             ems->s.box,
+             ems->s.x.arrayRefWithPadding(),
+             &ems->s.hist,
+             &ems->f.view(),
+             force_vir,
+             mdAtoms->mdatoms(),
+             enerd,
+             ems->s.lambda,
+             fr,
+             runScheduleWork,
+             vsite,
+             mu_tot,
+             t,
+             nullptr,
+             fr->longRangeNonbondeds.get(),
+             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
+                     | (bNS ? GMX_FORCE_NS : 0),
               DDBalanceRegionHandler(cr));
  
      /* Clear the unused shake virial and pressure */
@@ -862,30 +1039,37 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
      clear_mat(pres);
  
      /* Communicate stuff when parallel */
-    if (PAR(cr) && inputrec->eI != eiNM)
+    if (PAR(cr) && inputrec->eI != IntegrationAlgorithm::NM)
      {
-        wallcycle_start(wcycle, ewcMoveE);
+        wallcycle_start(wcycle, WallCycleCounter::MoveE);
  
-        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
-                    inputrec, nullptr, nullptr, nullptr, 1, &terminate,
-                    nullptr, FALSE,
-                    CGLO_ENERGY |
-                    CGLO_PRESSURE |
-                    CGLO_CONSTRAINT);
+        global_stat(*gstat,
+                    cr,
+                    enerd,
+                    force_vir,
+                    shake_vir,
+                    *inputrec,
+                    nullptr,
+                    nullptr,
+                    std::vector<real>(1, terminate),
+                    FALSE,
+                    CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT,
+                    step,
+                    observablesReducer);
  
-        wallcycle_stop(wcycle, ewcMoveE);
+        wallcycle_stop(wcycle, WallCycleCounter::MoveE);
      }
  
      if (fr->dispersionCorrection)
      {
          /* Calculate long range corrections to pressure and energy */
-        const DispersionCorrection::Correction correction =
-            fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]);
+        const DispersionCorrection::Correction correction = fr->dispersionCorrection->calculate(
+                ems->s.box, ems->s.lambda[FreeEnergyPerturbationCouplingType::Vdw]);
  
          enerd->term[F_DISPCORR] = correction.energy;
-        enerd->term[F_EPOT]    += correction.energy;
-        enerd->term[F_PRES]    += correction.pressure;
-        enerd->term[F_DVDL]    += correction.dvdl;
+        enerd->term[F_EPOT] += correction.energy;
+        enerd->term[F_PRES] += correction.pressure;
+        enerd->term[F_DVDL] += correction.dvdl;
      }
      else
      {
@@ -897,14 +1081,26 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
      if (constr)
      {
          /* Project out the constraint components of the force */
-        dvdl_constr = 0;
-        rvec *f_rvec = ems->f.rvec_array();
-        constr->apply(FALSE, FALSE,
-                      count, 0, 1.0,
-                      ems->s.x.rvec_array(), f_rvec, f_rvec,
+        bool needsLogging  = false;
+        bool computeEnergy = false;
+        bool computeVirial = true;
+        dvdl_constr        = 0;
+        auto f             = ems->f.view().forceWithPadding();
+        constr->apply(needsLogging,
+                      computeEnergy,
+                      count,
+                      0,
+                      1.0,
+                      ems->s.x.arrayRefWithPadding(),
+                      f,
+                      f.unpaddedArrayRef(),
                        ems->s.box,
-                      ems->s.lambda[efptBONDED], &dvdl_constr,
-                      nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl);
+                      ems->s.lambda[FreeEnergyPerturbationCouplingType::Bonded],
+                      &dvdl_constr,
+                      gmx::ArrayRefWithPadding<RVec>(),
+                      computeVirial,
+                      shake_vir,
+                      gmx::ConstraintVariable::ForceDispl);
          enerd->term[F_DVDL_CONSTR] += dvdl_constr;
          m_add(force_vir, shake_vir, vir);
      }
@@ -914,10 +1110,12 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
      }
  
      clear_mat(ekin);
-    enerd->term[F_PRES] =
-        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
+    enerd->term[F_PRES] = calc_pres(fr->pbcType, inputrec->nwall, ems->s.box, ekin, vir, pres);
  
-    sum_dhdl(enerd, ems->s.lambda, *inputrec->fepvals);
+    if (inputrec->efep != FreeEnergyPerturbationType::No)
+    {
+        accumulateKineticLambdaComponents(enerd, ems->s.lambda, *inputrec->fepvals);
+    }
  
      if (EI_ENERGY_MINIMIZATION(inputrec->eI))
      {
@@ -928,41 +1126,45 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
  } // namespace
  
  //! Parallel utility summing energies and forces
-static double reorder_partsum(const t_commrec *cr, t_grpopts *opts,
-                              gmx_mtop_t *top_global,
-                              em_state_t *s_min, em_state_t *s_b)
+static double reorder_partsum(const t_commrec*  cr,
+                              const t_grpopts*  opts,
+                              const gmx_mtop_t& top_global,
+                              const em_state_t* s_min,
+                              const em_state_t* s_b)
  {
      if (debug)
      {
          fprintf(debug, "Doing reorder_partsum\n");
      }
  
-    const rvec *fm = s_min->f.rvec_array();
-    const rvec *fb = s_b->f.rvec_array();
+    auto fm = s_min->f.view().force();
+    auto fb = s_b->f.view().force();
  
      /* Collect fm in a global vector fmg.
       * This conflicts with the spirit of domain decomposition,
       * but to fully optimize this a much more complicated algorithm is required.
       */
-    const int  natoms = top_global->natoms;
-    rvec      *fmg;
+    const int natoms = top_global.natoms;
+    rvec*     fmg;
      snew(fmg, natoms);
  
-    gmx::ArrayRef<const int> indicesMin = s_b->s.cg_gl;
-    int i = 0;
+    gmx::ArrayRef<const int> indicesMin = s_min->s.cg_gl;
+    int                      i          = 0;
      for (int a : indicesMin)
      {
          copy_rvec(fm[i], fmg[a]);
+        i++;
      }
-    gmx_sum(top_global->natoms*3, fmg[0], cr);
+    gmx_sum(top_global.natoms * 3, fmg[0], cr);
  
      /* Now we will determine the part of the sum for the cgs in state s_b */
      gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
  
-    double                   partsum = 0;
-    i              = 0;
-    int gf         = 0;
-    gmx::ArrayRef<unsigned char> grpnrFREEZE = top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
+    double partsum                        = 0;
+    i                                     = 0;
+    int                                gf = 0;
+    gmx::ArrayRef<const unsigned char> grpnrFREEZE =
+            top_global.groups.groupNumbers[SimulationAtomGroupType::Freeze];
      for (int a : indicesB)
      {
          if (!grpnrFREEZE.empty())
@@ -973,7 +1175,7 @@ static double reorder_partsum(const t_commrec *cr, t_grpopts *opts,
          {
              if (!opts->nFreeze[gf][m])
              {
-                partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
+                partsum += (fb[i][m] - fmg[a][m]) * fb[i][m];
              }
          }
          i++;
@@ -985,9 +1187,12 @@ static double reorder_partsum(const t_commrec *cr, t_grpopts *opts,
  }
  
  //! Print some stuff, like beta, whatever that means.
-static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                    gmx_mtop_t *top_global,
-                    em_state_t *s_min, em_state_t *s_b)
+static real pr_beta(const t_commrec*  cr,
+                    const t_grpopts*  opts,
+                    t_mdatoms*        mdatoms,
+                    const gmx_mtop_t& top_global,
+                    const em_state_t* s_min,
+                    const em_state_t* s_b)
  {
      double sum;
  
@@ -996,14 +1201,13 @@ static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
       * and might have to sum it in parallel runs.
       */
  
-    if (!DOMAINDECOMP(cr) ||
-        (s_min->s.ddp_count == cr->dd->ddp_count &&
-         s_b->s.ddp_count   == cr->dd->ddp_count))
+    if (!haveDDAtomOrdering(*cr)
+        || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
      {
-        const rvec *fm  = s_min->f.rvec_array();
-        const rvec *fb  = s_b->f.rvec_array();
-        sum             = 0;
-        int         gf  = 0;
+        auto fm = s_min->f.view().force();
+        auto fb = s_b->f.view().force();
+        sum     = 0;
+        int gf  = 0;
          /* This part of code can be incorrect with DD,
           * since the atom ordering in s_b and s_min might differ.
           */
@@ -1017,7 +1221,7 @@ static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
              {
                  if (!opts->nFreeze[gf][m])
                  {
-                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
+                    sum += (fb[i][m] - fm[i][m]) * fb[i][m];
                  }
              }
          }
@@ -1032,72 +1236,107 @@ static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
          gmx_sumd(1, &sum, cr);
      }
  
-    return sum/gmx::square(s_min->fnorm);
+    return sum / gmx::square(s_min->fnorm);
  }
  
  namespace gmx
  {
  
-void
-LegacySimulator::do_cg()
+void LegacySimulator::do_cg()
  {
-    const char        *CG = "Polak-Ribiere Conjugate Gradients";
+    const char* CG = "Polak-Ribiere Conjugate Gradients";
  
-    gmx_localtop_t     top;
-    gmx_global_stat_t  gstat;
-    t_graph           *graph;
-    double             tmp, minstep;
-    real               stepsize;
-    real               a, b, c, beta = 0.0;
-    real               epot_repl = 0;
-    real               pnorm;
-    gmx_bool           converged, foundlower;
-    rvec               mu_tot = {0};
-    gmx_bool           do_log = FALSE, do_ene = FALSE, do_x, do_f;
-    tensor             vir, pres;
-    int                number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
-    int                m, step, nminstep;
-    auto               mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info).asParagraph().
-        appendText("Note that activating conjugate gradient energy minimization via the "
-                   "integrator .mdp option and the command gmx mdrun may "
-                   "be available in a different form in a future version of GROMACS, "
-                   "e.g. gmx minimize and an .mdp option.");
+    gmx_global_stat_t gstat;
+    double            tmp, minstep;
+    real              stepsize;
+    real              a, b, c, beta = 0.0;
+    real              epot_repl = 0;
+    real              pnorm;
+    gmx_bool          converged, foundlower;
+    rvec              mu_tot = { 0 };
+    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
+    tensor            vir, pres;
+    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
+    int               m, step, nminstep;
+    auto*             mdatoms = mdAtoms->mdatoms();
+
+    GMX_LOG(mdlog.info)
+            .asParagraph()
+            .appendText(
+                    "Note that activating conjugate gradient energy minimization via the "
+                    "integrator .mdp option and the command gmx mdrun may "
+                    "be available in a different form in a future version of GROMACS, "
+                    "e.g. gmx minimize and an .mdp option.");
  
      step = 0;
  
      if (MASTER(cr))
      {
          // In CG, the state is extended with a search direction
-        state_global->flags |= (1<<estCGP);
+        state_global->flags |= enumValueToBitMask(StateEntry::Cgp);
  
          // Ensure the extra per-atom state array gets allocated
          state_change_natoms(state_global, state_global->natoms);
  
          // Initialize the search direction to zero
-        for (RVec &cg_p : state_global->cg_p)
+        for (RVec& cg_p : state_global->cg_p)
          {
              cg_p = { 0, 0, 0 };
          }
      }
  
      /* Create 4 states on the stack and extract pointers that we will swap */
-    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
-    em_state_t *s_min = &s0;
-    em_state_t *s_a   = &s1;
-    em_state_t *s_b   = &s2;
-    em_state_t *s_c   = &s3;
+    em_state_t  s0{}, s1{}, s2{}, s3{};
+    em_state_t* s_min = &s0;
+    em_state_t* s_a   = &s1;
+    em_state_t* s_b   = &s2;
+    em_state_t* s_c   = &s3;
+
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
      /* Init em and store the local state in s_min */
-    init_em(fplog, mdlog, CG, cr, inputrec, imdSession,
+    init_em(fplog,
+            mdlog,
+            CG,
+            cr,
+            inputrec,
+            imdSession,
              pull_work,
-            state_global, top_global, s_min, &top,
-            nrnb, fr, &graph, mdAtoms, &gstat,
-            vsite, constr, nullptr);
-    gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                         StartingBehavior::NewSimulation);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr, false, mdModulesNotifier);
+            state_global,
+            top_global,
+            s_min,
+            top,
+            nrnb,
+            fr,
+            mdAtoms,
+            &gstat,
+            vsite,
+            constr,
+            nullptr);
+    const bool        simulationsShareState = false;
+    gmx_mdoutf*       outf                  = init_mdoutf(fplog,
+                                   nfile,
+                                   fnm,
+                                   mdrunOptions,
+                                   cr,
+                                   outputProvider,
+                                   mdModulesNotifiers,
+                                   inputrec,
+                                   top_global,
+                                   nullptr,
+                                   wcycle,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   ms);
+    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
+                                   top_global,
+                                   *inputrec,
+                                   pull_work,
+                                   nullptr,
+                                   false,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   mdModulesNotifiers);
  
      /* Print to log file */
      print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
@@ -1114,49 +1353,70 @@ LegacySimulator::do_cg()
          sp_header(fplog, CG, inputrec->em_tol, number_steps);
      }
  
-    EnergyEvaluator energyEvaluator {
-        fplog, mdlog, cr, ms,
-        top_global, &top,
-        inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
-        vsite, constr, fcd, graph,
-        mdAtoms, fr, runScheduleWork, enerd
-    };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd,
+                                     -1,
+                                     {} };
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
      /* do_force always puts the charge groups in the box and shifts again
       * We do not unshift, so molecules are always whole in congrad.c
       */
-    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
+    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE, step);
+    observablesReducer.markAsReadyToReduce();
  
      if (MASTER(cr))
      {
          /* Copy stuff to the energy bin for easy printing etc. */
          matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step),
-                                         mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
-                                         nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE,
-                                           fplog, step, step,
-                                           fcd, nullptr);
+        energyOutput.addDataAtEnergyStep(false,
+                                         false,
+                                         static_cast<double>(step),
+                                         mdatoms->tmass,
+                                         enerd,
+                                         nullptr,
+                                         nullBox,
+                                         PTCouplingArrays(),
+                                         0,
+                                         vir,
+                                         pres,
+                                         nullptr,
+                                         mu_tot,
+                                         constr);
+
+        EnergyOutput::printHeader(fplog, step, step);
+        energyOutput.printStepToEnergyFile(
+                mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, fr->fcdata.get(), nullptr);
      }
  
      /* Estimate/guess the initial stepsize */
-    stepsize = inputrec->em_stepsize/s_min->fnorm;
+    stepsize = inputrec->em_stepsize / s_min->fnorm;
  
      if (MASTER(cr))
      {
          double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
+        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
+        fprintf(stderr, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
          fprintf(stderr, "\n");
          /* and copy to the log file too... */
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
+        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
+        fprintf(fplog, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
          fprintf(fplog, "\n");
      }
      /* Start the loop over CG steps.
@@ -1173,10 +1433,10 @@ LegacySimulator::do_cg()
           */
  
          /* Calculate the new direction in p, and the gradient in this direction, gpa */
-        rvec       *pm  = s_min->s.cg_p.rvec_array();
-        const rvec *sfm = s_min->f.rvec_array();
-        double      gpa = 0;
-        int         gf  = 0;
+        gmx::ArrayRef<gmx::RVec>       pm  = s_min->s.cg_p;
+        gmx::ArrayRef<const gmx::RVec> sfm = s_min->f.view().force();
+        double                         gpa = 0;
+        int                            gf  = 0;
          for (int i = 0; i < mdatoms->homenr; i++)
          {
              if (mdatoms->cFREEZE)
@@ -1187,8 +1447,8 @@ LegacySimulator::do_cg()
              {
                  if (!inputrec->opts.nFreeze[gf][m])
                  {
-                    pm[i][m] = sfm[i][m] + beta*pm[i][m];
-                    gpa     -= pm[i][m]*sfm[i][m];
+                    pm[i][m] = sfm[i][m] + beta * pm[i][m];
+                    gpa -= pm[i][m] * sfm[i][m];
                      /* f is negative gradient, thus the sign */
                  }
                  else
@@ -1210,7 +1470,7 @@ LegacySimulator::do_cg()
          /* Just in case stepsize reaches zero due to numerical precision... */
          if (stepsize <= 0)
          {
-            stepsize = inputrec->em_stepsize/pnorm;
+            stepsize = inputrec->em_stepsize / pnorm;
          }
  
          /*
@@ -1229,7 +1489,7 @@ LegacySimulator::do_cg()
          /* Calculate minimum allowed stepsize, before the average (norm)
           * relative change in coordinate is smaller than precision
           */
-        minstep = 0;
+        minstep      = 0;
          auto s_min_x = makeArrayRef(s_min->s.x);
          for (int i = 0; i < mdatoms->homenr; i++)
          {
@@ -1240,8 +1500,8 @@ LegacySimulator::do_cg()
                  {
                      tmp = 1.0;
                  }
-                tmp      = pm[i][m]/tmp;
-                minstep += tmp*tmp;
+                tmp = pm[i][m] / tmp;
+                minstep += tmp * tmp;
              }
          }
          /* Add up from all CPUs */
@@ -1250,7 +1510,7 @@ LegacySimulator::do_cg()
              gmx_sumd(1, &minstep, cr);
          }
  
-        minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global->natoms));
+        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global.natoms));
  
          if (stepsize < minstep)
          {
@@ -1262,9 +1522,8 @@ LegacySimulator::do_cg()
          do_x = do_per_step(step, inputrec->nstxout);
          do_f = do_per_step(step, inputrec->nstfout);
  
-        write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,
-                      top_global, inputrec, step,
-                      s_min, state_global, observablesHistory);
+        write_em_traj(
+                fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min, state_global, observablesHistory);
  
          /* Take a step downhill.
           * In theory, we should minimize the function along this direction.
@@ -1287,31 +1546,43 @@ LegacySimulator::do_cg()
          a         = 0.0;
          c         = a + stepsize; /* reference position along line is zero */
  
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
+        if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count < cr->dd->ddp_count)
          {
-            em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession,
+            em_dd_partition_system(fplog,
+                                   mdlog,
+                                   step,
+                                   cr,
+                                   top_global,
+                                   inputrec,
+                                   imdSession,
                                     pull_work,
-                                   s_min, &top, mdAtoms, fr, vsite, constr,
-                                   nrnb, wcycle);
+                                   s_min,
+                                   top,
+                                   mdAtoms,
+                                   fr,
+                                   vsite,
+                                   constr,
+                                   nrnb,
+                                   wcycle);
          }
  
          /* Take a trial step (new coords in s_c) */
-        do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c,
-                   constr, -1);
+        do_em_step(cr, inputrec, mdatoms, s_min, c, s_min->s.cg_p.constArrayRefWithPadding(), s_c, constr, -1);
  
          neval++;
          /* Calculate energy for the trial step */
-        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
+        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE, step);
+        observablesReducer.markAsReadyToReduce();
  
          /* Calc derivative along line */
-        const rvec *pc  = s_c->s.cg_p.rvec_array();
-        const rvec *sfc = s_c->f.rvec_array();
-        double      gpc = 0;
+        const rvec*                    pc  = s_c->s.cg_p.rvec_array();
+        gmx::ArrayRef<const gmx::RVec> sfc = s_c->f.view().force();
+        double                         gpc = 0;
          for (int i = 0; i < mdatoms->homenr; i++)
          {
              for (m = 0; m < DIM; m++)
              {
-                gpc -= pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */
+                gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */
              }
          }
          /* Sum the gradient along the line across CPUs */
@@ -1321,7 +1592,7 @@ LegacySimulator::do_cg()
          }
  
          /* This is the max amount of increase in energy we tolerate */
-        tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
+        tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot);
  
          /* Accept the step if the energy is lower, or if it is not significantly higher
           * and the line derivative is still negative.
@@ -1347,12 +1618,10 @@ LegacySimulator::do_cg()
               * to find a smaller value in the interval. Take smaller step next time!
               */
              foundlower = FALSE;
-            stepsize  *= 0.618034;
+            stepsize *= 0.618034;
          }
  
  
-
-
          /* OK, if we didn't find a lower value we will have to locate one now - there must
           * be one in the interval [a=0,c].
           * The same thing is valid here, though: Don't spend dozens of iterations to find
@@ -1377,11 +1646,11 @@ LegacySimulator::do_cg()
                   */
                  if (gpa < 0 && gpc > 0)
                  {
-                    b = a + gpa*(a-c)/(gpc-gpa);
+                    b = a + gpa * (a - c) / (gpc - gpa);
                  }
                  else
                  {
-                    b = 0.5*(a+c);
+                    b = 0.5 * (a + c);
                  }
  
                  /* safeguard if interpolation close to machine accuracy causes errors:
@@ -1389,37 +1658,49 @@ LegacySimulator::do_cg()
                   */
                  if (b <= a || b >= c)
                  {
-                    b = 0.5*(a+c);
+                    b = 0.5 * (a + c);
                  }
  
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+                if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
                  {
                      /* Reload the old state */
-                    em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession,
+                    em_dd_partition_system(fplog,
+                                           mdlog,
+                                           -1,
+                                           cr,
+                                           top_global,
+                                           inputrec,
+                                           imdSession,
                                             pull_work,
-                                           s_min, &top, mdAtoms, fr, vsite, constr,
-                                           nrnb, wcycle);
+                                           s_min,
+                                           top,
+                                           mdAtoms,
+                                           fr,
+                                           vsite,
+                                           constr,
+                                           nrnb,
+                                           wcycle);
                  }
  
                  /* Take a trial step to this new point - new coords in s_b */
-                do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b,
-                           constr, -1);
+                do_em_step(cr, inputrec, mdatoms, s_min, b, s_min->s.cg_p.constArrayRefWithPadding(), s_b, constr, -1);
  
                  neval++;
                  /* Calculate energy for the trial step */
-                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
+                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE, step);
+                observablesReducer.markAsReadyToReduce();
  
                  /* p does not change within a step, but since the domain decomposition
                   * might change, we have to use cg_p of s_b here.
                   */
-                const rvec *pb  = s_b->s.cg_p.rvec_array();
-                const rvec *sfb = s_b->f.rvec_array();
-                gpb             = 0;
+                const rvec*                    pb  = s_b->s.cg_p.rvec_array();
+                gmx::ArrayRef<const gmx::RVec> sfb = s_b->f.view().force();
+                gpb                                = 0;
                  for (int i = 0; i < mdatoms->homenr; i++)
                  {
                      for (m = 0; m < DIM; m++)
                      {
-                        gpb -= pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */
+                        gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */
                      }
                  }
                  /* Sum the gradient along the line across CPUs */
@@ -1430,8 +1711,7 @@ LegacySimulator::do_cg()
  
                  if (debug)
                  {
-                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
-                            s_a->epot, s_b->epot, s_c->epot, gpb);
+                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot, s_c->epot, gpb);
                  }
  
                  epot_repl = s_b->epot;
@@ -1457,12 +1737,9 @@ LegacySimulator::do_cg()
                   * Never run more than 20 steps, no matter what.
                   */
                  nminstep++;
-            }
-            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
-                   (nminstep < 20));
+            } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20));
  
-            if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
-                nminstep >= 20)
+            if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20)
              {
                  /* OK. We couldn't find a significantly lower energy.
                   * If beta==0 this was steepest descent, and then we give up.
@@ -1488,8 +1765,7 @@ LegacySimulator::do_cg()
              {
                  if (debug)
                  {
-                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
-                            s_c->epot, s_a->epot);
+                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot, s_a->epot);
                  }
                  swap_em_state(&s_b, &s_c);
                  gpb = gpc;
@@ -1498,20 +1774,17 @@ LegacySimulator::do_cg()
              {
                  if (debug)
                  {
-                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
-                            s_a->epot, s_c->epot);
+                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot, s_c->epot);
                  }
                  swap_em_state(&s_b, &s_a);
                  gpb = gpa;
              }
-
          }
          else
          {
              if (debug)
              {
-                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
-                        s_c->epot);
+                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot);
              }
              swap_em_state(&s_b, &s_c);
              gpb = gpc;
@@ -1551,16 +1824,31 @@ LegacySimulator::do_cg()
              if (mdrunOptions.verbose)
              {
                  double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
-                        s_min->fmax, s_min->a_fmax+1);
+                fprintf(stderr,
+                        "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+                        step,
+                        s_min->epot,
+                        s_min->fnorm / sqrtNumAtoms,
+                        s_min->fmax,
+                        s_min->a_fmax + 1);
                  fflush(stderr);
              }
              /* Store the new (lower) energies */
              matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step),
-                                             mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
-                                             nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
+            energyOutput.addDataAtEnergyStep(false,
+                                             false,
+                                             static_cast<double>(step),
+                                             mdatoms->tmass,
+                                             enerd,
+                                             nullptr,
+                                             nullBox,
+                                             PTCouplingArrays(),
+                                             0,
+                                             vir,
+                                             pres,
+                                             nullptr,
+                                             mu_tot,
+                                             constr);
  
              do_log = do_per_step(step, inputrec->nstlog);
              do_ene = do_per_step(step, inputrec->nstenergy);
@@ -1569,15 +1857,21 @@ LegacySimulator::do_cg()
  
              if (do_log)
              {
-                energyOutput.printHeader(fplog, step, step);
+                EnergyOutput::printHeader(fplog, step, step);
              }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step,
-                                               fcd, nullptr);
+            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
+                                               do_ene,
+                                               FALSE,
+                                               FALSE,
+                                               do_log ? fplog : nullptr,
+                                               step,
+                                               step,
+                                               fr->fcdata.get(),
+                                               nullptr);
          }
  
          /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
+        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x, 0))
          {
              imdSession->sendPositionsAndEnergies();
          }
@@ -1586,20 +1880,18 @@ LegacySimulator::do_cg()
           * If we have reached machine precision, converged is already set to true.
           */
          converged = converged || (s_min->fmax < inputrec->em_tol);
-
-    }   /* End of the loop */
+        observablesReducer.markAsReadyToReduce();
+    } /* End of the loop */
  
      if (converged)
      {
          step--; /* we never took that last step in this case */
-
      }
      if (s_min->fmax > inputrec->em_tol)
      {
          if (MASTER(cr))
          {
-            warn_step(fplog, inputrec->em_tol, s_min->fmax,
-                      step-1 == number_steps, FALSE);
+            warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE);
          }
          converged = FALSE;
      }
@@ -1612,14 +1904,20 @@ LegacySimulator::do_cg()
          if (!do_log)
          {
              /* Write final value to log since we didn't do anything the last step */
-            energyOutput.printHeader(fplog, step, step);
+            EnergyOutput::printHeader(fplog, step, step);
          }
          if (!do_ene || !do_log)
          {
              /* Write final energy file entries */
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                               !do_log ? fplog : nullptr, step, step,
-                                               fcd, nullptr);
+            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
+                                               !do_ene,
+                                               FALSE,
+                                               FALSE,
+                                               !do_log ? fplog : nullptr,
+                                               step,
+                                               step,
+                                               fr->fcdata.get(),
+                                               nullptr);
          }
      }
  
@@ -1642,18 +1940,15 @@ LegacySimulator::do_cg()
      do_x = !do_per_step(step, inputrec->nstxout);
      do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
  
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  s_min, state_global, observablesHistory);
+    write_em_traj(
+            fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, s_min, state_global, observablesHistory);
  
  
      if (MASTER(cr))
      {
          double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min, sqrtNumAtoms);
-        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min, sqrtNumAtoms);
+        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
+        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
  
          fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
      }
@@ -1665,35 +1960,25 @@ LegacySimulator::do_cg()
  }
  
  
-void
-LegacySimulator::do_lbfgs()
+void LegacySimulator::do_lbfgs()
  {
-    static const char *LBFGS = "Low-Memory BFGS Minimizer";
+    static const char* LBFGS = "Low-Memory BFGS Minimizer";
      em_state_t         ems;
-    gmx_localtop_t     top;
      gmx_global_stat_t  gstat;
-    t_graph           *graph;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real              *rho, *alpha, *p, *s, **dx, **dg;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0;
-    real               dgdx, dgdg, sq, yr, beta;
-    gmx_bool           converged;
-    rvec               mu_tot = {0};
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    int                i, k, m, n, gf, step;
-    int                mdof_flags;
-    auto               mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info).asParagraph().
-        appendText("Note that activating L-BFGS energy minimization via the "
-                   "integrator .mdp option and the command gmx mdrun may "
-                   "be available in a different form in a future version of GROMACS, "
-                   "e.g. gmx minimize and an .mdp option.");
+    auto*              mdatoms = mdAtoms->mdatoms();
+
+    GMX_LOG(mdlog.info)
+            .asParagraph()
+            .appendText(
+                    "Note that activating L-BFGS energy minimization via the "
+                    "integrator .mdp option and the command gmx mdrun may "
+                    "be available in a different form in a future version of GROMACS, "
+                    "e.g. gmx minimize and an .mdp option.");
  
+    if (haveDDAtomOrdering(*cr))
+    {
+        gmx_fatal(FARGS, "L_BFGS is currently not supported");
+    }
      if (PAR(cr))
      {
          gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
@@ -1701,76 +1986,112 @@ LegacySimulator::do_lbfgs()
  
      if (nullptr != constr)
      {
-        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
+        gmx_fatal(
+                FARGS,
+                "The combination of constraints and L-BFGS minimization is not implemented. Either "
+                "do not use constraints, or use another minimizer (e.g. steepest descent).");
      }
  
-    n        = 3*state_global->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
-
-    snew(frozen, n);
+    const int n        = 3 * state_global->natoms;
+    const int nmaxcorr = inputrec->nbfgscorr;
  
-    snew(p, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
+    std::vector<real> p(n);
+    std::vector<real> rho(nmaxcorr);
+    std::vector<real> alpha(nmaxcorr);
  
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dx(nmaxcorr);
+    for (auto& dxCorr : dx)
      {
-        snew(dx[i], n);
+        dxCorr.resize(n);
      }
  
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dg(nmaxcorr);
+    for (auto& dgCorr : dg)
      {
-        snew(dg[i], n);
+        dgCorr.resize(n);
      }
  
-    step  = 0;
-    neval = 0;
+    int step  = 0;
+    int neval = 0;
+
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
      /* Init em */
-    init_em(fplog, mdlog, LBFGS, cr, inputrec, imdSession,
+    init_em(fplog,
+            mdlog,
+            LBFGS,
+            cr,
+            inputrec,
+            imdSession,
              pull_work,
-            state_global, top_global, &ems, &top,
-            nrnb, fr, &graph, mdAtoms, &gstat,
-            vsite, constr, nullptr);
-    gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                         StartingBehavior::NewSimulation);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr, false, mdModulesNotifier);
+            state_global,
+            top_global,
+            &ems,
+            top,
+            nrnb,
+            fr,
+            mdAtoms,
+            &gstat,
+            vsite,
+            constr,
+            nullptr);
+    const bool        simulationsShareState = false;
+    gmx_mdoutf*       outf                  = init_mdoutf(fplog,
+                                   nfile,
+                                   fnm,
+                                   mdrunOptions,
+                                   cr,
+                                   outputProvider,
+                                   mdModulesNotifiers,
+                                   inputrec,
+                                   top_global,
+                                   nullptr,
+                                   wcycle,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   ms);
+    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
+                                   top_global,
+                                   *inputrec,
+                                   pull_work,
+                                   nullptr,
+                                   false,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   mdModulesNotifiers);
  
-    start = 0;
-    end   = mdatoms->homenr;
+    const int start = 0;
+    const int end   = mdatoms->homenr;
  
      /* We need 4 working states */
-    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
-    em_state_t *sa   = &s0;
-    em_state_t *sb   = &s1;
-    em_state_t *sc   = &s2;
-    em_state_t *last = &s3;
+    em_state_t  s0{}, s1{}, s2{}, s3{};
+    em_state_t* sa   = &s0;
+    em_state_t* sb   = &s1;
+    em_state_t* sc   = &s2;
+    em_state_t* last = &s3;
      /* Initialize by copying the state from ems (we could skip x and f here) */
-    *sa              = ems;
-    *sb              = ems;
-    *sc              = ems;
+    *sa = ems;
+    *sb = ems;
+    *sc = ems;
  
      /* Print to log file */
      print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
  
-    do_log = do_ene = do_x = do_f = TRUE;
-
      /* Max number of steps */
-    number_steps = inputrec->nsteps;
+    const int number_steps = inputrec->nsteps;
  
      /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
+    std::vector<bool> frozen(n);
+    int               gf = 0;
+    for (int i = start; i < end; i++)
      {
          if (mdatoms->cFREEZE)
          {
              gf = mdatoms->cFREEZE[i];
          }
-        for (m = 0; m < DIM; m++)
+        for (int m = 0; m < DIM; m++)
          {
-            frozen[3*i+m] = (inputrec->opts.nFreeze[gf][m] != 0);
+            frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
          }
      }
      if (MASTER(cr))
@@ -1784,9 +2105,7 @@ LegacySimulator::do_lbfgs()
  
      if (vsite)
      {
-        construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr,
-                         top.idef.iparams, top.idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
+        vsite->construct(state_global->x, {}, state_global->box, VSiteOperation::Positions);
      }
  
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
@@ -1794,27 +2113,52 @@ LegacySimulator::do_lbfgs()
       * We do not unshift, so molecules are always whole
       */
      neval++;
-    EnergyEvaluator energyEvaluator {
-        fplog, mdlog, cr, ms,
-        top_global, &top,
-        inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
-        vsite, constr, fcd, graph,
-        mdAtoms, fr, runScheduleWork, enerd
-    };
-    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    rvec            mu_tot;
+    tensor          vir;
+    tensor          pres;
+    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE, step);
  
      if (MASTER(cr))
      {
          /* Copy stuff to the energy bin for easy printing etc. */
          matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step),
-                                         mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
-                                         nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE,
-                                           fplog, step, step,
-                                           fcd, nullptr);
+        energyOutput.addDataAtEnergyStep(false,
+                                         false,
+                                         static_cast<double>(step),
+                                         mdatoms->tmass,
+                                         enerd,
+                                         nullptr,
+                                         nullBox,
+                                         PTCouplingArrays(),
+                                         0,
+                                         vir,
+                                         pres,
+                                         nullptr,
+                                         mu_tot,
+                                         constr);
+
+        EnergyOutput::printHeader(fplog, step, step);
+        energyOutput.printStepToEnergyFile(
+                mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, fr->fcdata.get(), nullptr);
      }
  
      /* Set the initial step.
@@ -1828,21 +2172,21 @@ LegacySimulator::do_lbfgs()
          double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
          fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
+        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
          fprintf(stderr, "\n");
          /* and copy to the log file too... */
          fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
          fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
+        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
          fprintf(fplog, "\n");
      }
  
      // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
+    int point = 0;
  
      // Set initial search direction to the force (-gradient), or 0 for frozen particles.
-    real *fInit = static_cast<real *>(ems.f.rvec_array()[0]);
-    for (i = 0; i < n; i++)
+    real* fInit = static_cast<real*>(ems.f.view().force().data()[0]);
+    for (int i = 0; i < n; i++)
      {
          if (!frozen[i])
          {
@@ -1858,25 +2202,28 @@ LegacySimulator::do_lbfgs()
      // (the main efficiency in the algorithm comes from changing directions), but
      // we still need an initial value, so estimate it as the inverse of the norm
      // so we take small steps where the potential fluctuates a lot.
-    stepsize  = 1.0/ems.fnorm;
+    double stepsize = 1.0 / ems.fnorm;
  
      /* Start the loop over BFGS steps.
       * Each successful step is counted, and we continue until
       * we either converge or reach the max number of steps.
       */
  
-    ncorr = 0;
+    bool do_log = true;
+    bool do_ene = true;
+
+    int ncorr = 0;
  
      /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
+    bool converged = false;
+    for (int step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
+        const bool do_x = do_per_step(step, inputrec->nstxout);
+        const bool do_f = do_per_step(step, inputrec->nstfout);
  
-        mdof_flags = 0;
+        int mdof_flags = 0;
          if (do_x)
          {
              mdof_flags |= MDOF_X;
@@ -1892,52 +2239,64 @@ LegacySimulator::do_lbfgs()
              mdof_flags |= MDOF_IMD;
          }
  
-        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                         top_global->natoms, step, static_cast<real>(step), &ems.s,
-                                         state_global, observablesHistory, ems.f);
+        gmx::WriteCheckpointDataHolder checkpointDataHolder;
+        mdoutf_write_to_trajectory_files(fplog,
+                                         cr,
+                                         outf,
+                                         mdof_flags,
+                                         top_global.natoms,
+                                         step,
+                                         static_cast<real>(step),
+                                         &ems.s,
+                                         state_global,
+                                         observablesHistory,
+                                         ems.f.view().force(),
+                                         &checkpointDataHolder);
  
          /* Do the linesearching in the direction dx[point][0..(n-1)] */
  
          /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
+        gmx::ArrayRef<const real> s = dx[point];
  
-        real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]);
-        real *ff = static_cast<real *>(ems.f.rvec_array()[0]);
+        const real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
+        const real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
  
          // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
+        double gpa = 0;
+        for (int i = 0; i < n; i++)
          {
-            gpa -= s[i]*ff[i];
+            gpa -= s[i] * ff[i];
          }
  
          /* Calculate minimum allowed stepsize along the line, before the average (norm)
           * relative change in coordinate is smaller than precision
           */
-        for (minstep = 0, i = 0; i < n; i++)
+        double minstep = 0;
+        for (int i = 0; i < n; i++)
          {
-            tmp = fabs(xx[i]);
+            double tmp = fabs(xx[i]);
              if (tmp < 1.0)
              {
                  tmp = 1.0;
              }
-            tmp      = s[i]/tmp;
-            minstep += tmp*tmp;
+            tmp = s[i] / tmp;
+            minstep += tmp * tmp;
          }
-        minstep = GMX_REAL_EPS/sqrt(minstep/n);
+        minstep = GMX_REAL_EPS / sqrt(minstep / n);
  
          if (stepsize < minstep)
          {
-            converged = TRUE;
+            converged = true;
              break;
          }
  
          // Before taking any steps along the line, store the old position
-        *last       = ems;
-        real *lastx = static_cast<real *>(last->s.x.data()[0]);
-        real *lastf = static_cast<real *>(last->f.data()[0]);
-        Epot0       = ems.epot;
+        *last            = ems;
+        real*      lastx = static_cast<real*>(last->s.x.data()[0]);
+        real*      lastf = static_cast<real*>(last->f.view().force().data()[0]);
+        const real Epot0 = ems.epot;
  
-        *sa         = ems;
+        *sa = ems;
  
          /* Take a step downhill.
           * In theory, we should find the actual minimum of the function in this
@@ -1966,22 +2325,24 @@ LegacySimulator::do_lbfgs()
  
          // State "A" is the first position along the line.
          // reference position along line is initially zero
-        a          = 0.0;
+        real a = 0;
  
          // Check stepsize first. We do not allow displacements
          // larger than emstep.
          //
+        real c;
+        real maxdelta;
          do
          {
              // Pick a new position C by adding stepsize to A.
-            c        = a + stepsize;
+            c = a + stepsize;
  
              // Calculate what the largest change in any individual coordinate
              // would be (translation along line * gradient along line)
              maxdelta = 0;
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
-                delta = c*s[i];
+                real delta = c * s[i];
                  if (delta > maxdelta)
                  {
                      maxdelta = delta;
@@ -1992,25 +2353,25 @@ LegacySimulator::do_lbfgs()
              {
                  stepsize *= 0.1;
              }
-        }
-        while (maxdelta > inputrec->em_stepsize);
+        } while (maxdelta > inputrec->em_stepsize);
  
          // Take a trial step and move the coordinate array xc[] to position C
-        real *xc = static_cast<real *>(sc->s.x.rvec_array()[0]);
-        for (i = 0; i < n; i++)
+        real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
+        for (int i = 0; i < n; i++)
          {
-            xc[i] = lastx[i] + c*s[i];
+            xc[i] = lastx[i] + c * s[i];
          }
  
          neval++;
          // Calculate energy for the trial step in position C
-        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
+        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE, step);
  
          // Calc line gradient in position C
-        real *fc = static_cast<real *>(sc->f.rvec_array()[0]);
-        for (gpc = 0, i = 0; i < n; i++)
+        real*  fc  = static_cast<real*>(sc->f.view().force()[0]);
+        double gpc = 0;
+        for (int i = 0; i < n; i++)
          {
-            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
+            gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
          }
          /* Sum the gradient along the line across CPUs */
          if (PAR(cr))
@@ -2021,11 +2382,11 @@ LegacySimulator::do_lbfgs()
          // This is the max amount of increase in energy we tolerate.
          // By allowing VERY small changes (close to numerical precision) we
          // frequently find even better (lower) final energies.
-        tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa->epot);
+        double tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
  
          // Accept the step if the energy is lower in the new position C (compared to A),
          // or if it is not significantly higher and the line derivative is still negative.
-        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
+        bool foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
          // If true, great, we found a better energy. We no longer try to alter the
          // stepsize, but simply accept this new better position. The we select a new
          // search direction instead, which will be much more efficient than continuing
@@ -2041,6 +2402,7 @@ LegacySimulator::do_lbfgs()
          // than with the stepsize, so no need to modify it. For the next search direction
          // it will be reset to 1/fnorm anyway.
  
+        double step_taken;
          if (!foundlower)
          {
              // OK, if we didn't find a lower value we will have to locate one now - there must
@@ -2051,21 +2413,22 @@ LegacySimulator::do_lbfgs()
              // I also have a safeguard for potentially really pathological functions so we never
              // take more than 20 steps before we give up.
              // If we already found a lower value we just skip this step and continue to the update.
-            real fnorm = 0;
-            nminstep   = 0;
+            real fnorm    = 0;
+            int  nminstep = 0;
              do
              {
                  // Select a new trial point B in the interval [A,C].
                  // If the derivatives at points a & c have different sign we interpolate to zero,
                  // otherwise just do a bisection since there might be multiple minima/maxima
                  // inside the interval.
+                real b;
                  if (gpa < 0 && gpc > 0)
                  {
-                    b = a + gpa*(a-c)/(gpc-gpa);
+                    b = a + gpa * (a - c) / (gpc - gpa);
                  }
                  else
                  {
-                    b = 0.5*(a+c);
+                    b = 0.5 * (a + c);
                  }
  
                  /* safeguard if interpolation close to machine accuracy causes errors:
@@ -2073,27 +2436,27 @@ LegacySimulator::do_lbfgs()
                   */
                  if (b <= a || b >= c)
                  {
-                    b = 0.5*(a+c);
+                    b = 0.5 * (a + c);
                  }
  
                  // Take a trial step to point B
-                real *xb = static_cast<real *>(sb->s.x.rvec_array()[0]);
-                for (i = 0; i < n; i++)
+                real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
+                for (int i = 0; i < n; i++)
                  {
-                    xb[i] = lastx[i] + b*s[i];
+                    xb[i] = lastx[i] + b * s[i];
                  }
  
                  neval++;
                  // Calculate energy for the trial step in point B
-                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
+                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE, step);
                  fnorm = sb->fnorm;
  
                  // Calculate gradient in point B
-                real *fb = static_cast<real *>(sb->f.rvec_array()[0]);
-                for (gpb = 0, i = 0; i < n; i++)
+                real*  fb  = static_cast<real*>(sb->f.view().force()[0]);
+                double gpb = 0;
+                for (int i = 0; i < n; i++)
                  {
-                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
-
+                    gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
                  }
                  /* Sum the gradient along the line across CPUs */
                  if (PAR(cr))
@@ -2106,14 +2469,14 @@ LegacySimulator::do_lbfgs()
                  if (gpb > 0)
                  {
                      /* Replace c endpoint with b */
-                    c   = b;
+                    c = b;
                      /* copy state b to c */
                      *sc = *sb;
                  }
                  else
                  {
                      /* Replace a endpoint with b */
-                    a   = b;
+                    a = b;
                      /* copy state b to a */
                      *sa = *sb;
                  }
@@ -2124,8 +2487,7 @@ LegacySimulator::do_lbfgs()
                   * Never run more than 20 steps, no matter what.
                   */
                  nminstep++;
-            }
-            while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
+            } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
  
              if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
              {
@@ -2136,7 +2498,7 @@ LegacySimulator::do_lbfgs()
                  if (ncorr == 0)
                  {
                      /* Converged */
-                    converged = TRUE;
+                    converged = true;
                      break;
                  }
                  else
@@ -2144,12 +2506,12 @@ LegacySimulator::do_lbfgs()
                      /* Reset memory */
                      ncorr = 0;
                      /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
+                    for (int i = 0; i < n; i++)
                      {
                          dx[point][i] = ff[i];
                      }
                      /* Reset stepsize */
-                    stepsize = 1.0/fnorm;
+                    stepsize = 1.0 / fnorm;
                      continue;
                  }
              }
@@ -2168,7 +2530,6 @@ LegacySimulator::do_lbfgs()
                  ems        = *sa;
                  step_taken = a;
              }
-
          }
          else
          {
@@ -2188,23 +2549,23 @@ LegacySimulator::do_lbfgs()
              ncorr++;
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
-            dg[point][i]  = lastf[i]-ff[i];
+            dg[point][i] = lastf[i] - ff[i];
              dx[point][i] *= step_taken;
          }
  
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
+        real dgdg = 0;
+        real dgdx = 0;
+        for (int i = 0; i < n; i++)
          {
-            dgdg += dg[point][i]*dg[point][i];
-            dgdx += dg[point][i]*dx[point][i];
+            dgdg += dg[point][i] * dg[point][i];
+            dgdx += dg[point][i] * dx[point][i];
          }
  
-        diag = dgdx/dgdg;
+        const real diag = dgdx / dgdg;
  
-        rho[point] = 1.0/dgdx;
+        rho[point] = 1.0 / dgdx;
          point++;
  
          if (point >= nmaxcorr)
@@ -2213,56 +2574,56 @@ LegacySimulator::do_lbfgs()
          }
  
          /* Update */
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              p[i] = ff[i];
          }
  
-        cp = point;
+        int cp = point;
  
          /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
          {
              cp--;
              if (cp < 0)
              {
-                cp = ncorr-1;
+                cp = ncorr - 1;
              }
  
-            sq = 0;
-            for (i = 0; i < n; i++)
+            real sq = 0;
+            for (int i = 0; i < n; i++)
              {
-                sq += dx[cp][i]*p[i];
+                sq += dx[cp][i] * p[i];
              }
  
-            alpha[cp] = rho[cp]*sq;
+            alpha[cp] = rho[cp] * sq;
  
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
-                p[i] -= alpha[cp]*dg[cp][i];
+                p[i] -= alpha[cp] * dg[cp][i];
              }
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              p[i] *= diag;
          }
  
          /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
          {
-            yr = 0;
-            for (i = 0; i < n; i++)
+            real yr = 0;
+            for (int i = 0; i < n; i++)
              {
-                yr += p[i]*dg[cp][i];
+                yr += p[i] * dg[cp][i];
              }
  
-            beta = rho[cp]*yr;
-            beta = alpha[cp]-beta;
+            real beta = rho[cp] * yr;
+            beta      = alpha[cp] - beta;
  
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
-                p[i] += beta*dx[cp][i];
+                p[i] += beta * dx[cp][i];
              }
  
              cp++;
@@ -2272,7 +2633,7 @@ LegacySimulator::do_lbfgs()
              }
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              if (!frozen[i])
              {
@@ -2290,15 +2651,31 @@ LegacySimulator::do_lbfgs()
              if (mdrunOptions.verbose)
              {
                  double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);
+                fprintf(stderr,
+                        "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+                        step,
+                        ems.epot,
+                        ems.fnorm / sqrtNumAtoms,
+                        ems.fmax,
+                        ems.a_fmax + 1);
                  fflush(stderr);
              }
              /* Store the new (lower) energies */
              matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step),
-                                             mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
-                                             nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
+            energyOutput.addDataAtEnergyStep(false,
+                                             false,
+                                             static_cast<double>(step),
+                                             mdatoms->tmass,
+                                             enerd,
+                                             nullptr,
+                                             nullBox,
+                                             PTCouplingArrays(),
+                                             0,
+                                             vir,
+                                             pres,
+                                             nullptr,
+                                             mu_tot,
+                                             constr);
  
              do_log = do_per_step(step, inputrec->nstlog);
              do_ene = do_per_step(step, inputrec->nstenergy);
@@ -2307,15 +2684,21 @@ LegacySimulator::do_lbfgs()
  
              if (do_log)
              {
-                energyOutput.printHeader(fplog, step, step);
+                EnergyOutput::printHeader(fplog, step, step);
              }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step,
-                                               fcd, nullptr);
+            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
+                                               do_ene,
+                                               FALSE,
+                                               FALSE,
+                                               do_log ? fplog : nullptr,
+                                               step,
+                                               step,
+                                               fr->fcdata.get(),
+                                               nullptr);
          }
  
          /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
+        if (imdSession->run(step, TRUE, state_global->box, state_global->x, 0) && MASTER(cr))
          {
              imdSession->sendPositionsAndEnergies();
          }
@@ -2327,20 +2710,18 @@ LegacySimulator::do_lbfgs()
           * If we have reached machine precision, converged is already set to true.
           */
          converged = converged || (ems.fmax < inputrec->em_tol);
-
-    }   /* End of the loop */
+        observablesReducer.markAsReadyToReduce();
+    } /* End of the loop */
  
      if (converged)
      {
          step--; /* we never took that last step in this case */
-
      }
      if (ems.fmax > inputrec->em_tol)
      {
          if (MASTER(cr))
          {
-            warn_step(fplog, inputrec->em_tol, ems.fmax,
-                      step-1 == number_steps, FALSE);
+            warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE);
          }
          converged = FALSE;
      }
@@ -2350,13 +2731,19 @@ LegacySimulator::do_lbfgs()
       */
      if (!do_log) /* Write final value to log since we didn't do anythin last step */
      {
-        energyOutput.printHeader(fplog, step, step);
+        EnergyOutput::printHeader(fplog, step, step);
      }
      if (!do_ene || !do_log) /* Write final energy file entries */
      {
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                           !do_log ? fplog : nullptr, step, step,
-                                           fcd, nullptr);
+        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
+                                           !do_ene,
+                                           FALSE,
+                                           FALSE,
+                                           !do_log ? fplog : nullptr,
+                                           step,
+                                           step,
+                                           fr->fcdata.get(),
+                                           nullptr);
      }
  
      /* Print some stuff... */
@@ -2372,19 +2759,16 @@ LegacySimulator::do_lbfgs()
       * However, we should only do it if we did NOT already write this step
       * above (which we did if do_x or do_f was true).
       */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  &ems, state_global, observablesHistory);
+    const bool do_x = !do_per_step(step, inputrec->nstxout);
+    const bool do_f = !do_per_step(step, inputrec->nstfout);
+    write_em_traj(
+            fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, &ems, state_global, observablesHistory);
  
      if (MASTER(cr))
      {
          double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, &ems, sqrtNumAtoms);
-        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, &ems, sqrtNumAtoms);
+        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
+        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
  
          fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
      }
@@ -2395,43 +2779,78 @@ LegacySimulator::do_lbfgs()
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  }
  
-void
-LegacySimulator::do_steep()
+void LegacySimulator::do_steep()
  {
-    const char       *SD  = "Steepest Descents";
-    gmx_localtop_t    top;
+    const char*       SD = "Steepest Descents";
      gmx_global_stat_t gstat;
-    t_graph          *graph;
      real              stepsize;
      real              ustep;
      gmx_bool          bDone, bAbort, do_x, do_f;
      tensor            vir, pres;
-    rvec              mu_tot = {0};
+    rvec              mu_tot = { 0 };
      int               nsteps;
      int               count          = 0;
      int               steps_accepted = 0;
-    auto              mdatoms        = mdAtoms->mdatoms();
+    auto*             mdatoms        = mdAtoms->mdatoms();
  
-    GMX_LOG(mdlog.info).asParagraph().
-        appendText("Note that activating steepest-descent energy minimization via the "
-                   "integrator .mdp option and the command gmx mdrun may "
-                   "be available in a different form in a future version of GROMACS, "
-                   "e.g. gmx minimize and an .mdp option.");
+    GMX_LOG(mdlog.info)
+            .asParagraph()
+            .appendText(
+                    "Note that activating steepest-descent energy minimization via the "
+                    "integrator .mdp option and the command gmx mdrun may "
+                    "be available in a different form in a future version of GROMACS, "
+                    "e.g. gmx minimize and an .mdp option.");
  
      /* Create 2 states on the stack and extract pointers that we will swap */
-    em_state_t  s0 {}, s1 {};
-    em_state_t *s_min = &s0;
-    em_state_t *s_try = &s1;
+    em_state_t  s0{}, s1{};
+    em_state_t* s_min = &s0;
+    em_state_t* s_try = &s1;
+
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
      /* Init em and store the local state in s_try */
-    init_em(fplog, mdlog, SD, cr, inputrec, imdSession,
+    init_em(fplog,
+            mdlog,
+            SD,
+            cr,
+            inputrec,
+            imdSession,
              pull_work,
-            state_global, top_global, s_try, &top,
-            nrnb, fr, &graph, mdAtoms, &gstat,
-            vsite, constr, nullptr);
-    gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                         StartingBehavior::NewSimulation);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr, false, mdModulesNotifier);
+            state_global,
+            top_global,
+            s_try,
+            top,
+            nrnb,
+            fr,
+            mdAtoms,
+            &gstat,
+            vsite,
+            constr,
+            nullptr);
+    const bool        simulationsShareState = false;
+    gmx_mdoutf*       outf                  = init_mdoutf(fplog,
+                                   nfile,
+                                   fnm,
+                                   mdrunOptions,
+                                   cr,
+                                   outputProvider,
+                                   mdModulesNotifiers,
+                                   inputrec,
+                                   top_global,
+                                   nullptr,
+                                   wcycle,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   ms);
+    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
+                                   top_global,
+                                   *inputrec,
+                                   pull_work,
+                                   nullptr,
+                                   false,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   mdModulesNotifiers);
  
      /* Print to log file  */
      print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
@@ -2454,13 +2873,25 @@ LegacySimulator::do_steep()
      {
          sp_header(fplog, SD, inputrec->em_tol, nsteps);
      }
-    EnergyEvaluator energyEvaluator {
-        fplog, mdlog, cr, ms,
-        top_global, &top,
-        inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
-        vsite, constr, fcd, graph,
-        mdAtoms, fr, runScheduleWork, enerd
-    };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
  
      /**** HERE STARTS THE LOOP ****
       * count is the counter for the number of steps
@@ -2479,15 +2910,13 @@ LegacySimulator::do_steep()
          bool validStep = true;
          if (count > 0)
          {
-            validStep =
-                do_em_step(cr, inputrec, mdatoms,
-                           s_min, stepsize, &s_min->f, s_try,
-                           constr, count);
+            validStep = do_em_step(
+                    cr, inputrec, mdatoms, s_min, stepsize, s_min->f.view().forceWithPadding(), s_try, constr, count);
          }
  
          if (validStep)
          {
-            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
+            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0, count);
          }
          else
          {
@@ -2497,7 +2926,7 @@ LegacySimulator::do_steep()
  
          if (MASTER(cr))
          {
-            energyOutput.printHeader(fplog, count, count);
+            EnergyOutput::printHeader(fplog, count, count);
          }
  
          if (count == 0)
@@ -2510,28 +2939,42 @@ LegacySimulator::do_steep()
          {
              if (mdrunOptions.verbose)
              {
-                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
-                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
-                        ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
+                fprintf(stderr,
+                        "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
+                        count,
+                        ustep,
+                        s_try->epot,
+                        s_try->fmax,
+                        s_try->a_fmax + 1,
+                        ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r');
                  fflush(stderr);
              }
  
-            if ( (count == 0) || (s_try->epot < s_min->epot) )
+            if ((count == 0) || (s_try->epot < s_min->epot))
              {
                  /* Store the new (lower) energies  */
                  matrix nullBox = {};
-                energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(count),
-                                                 mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
-                                                 nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
+                energyOutput.addDataAtEnergyStep(false,
+                                                 false,
+                                                 static_cast<double>(count),
+                                                 mdatoms->tmass,
+                                                 enerd,
+                                                 nullptr,
+                                                 nullBox,
+                                                 PTCouplingArrays(),
+                                                 0,
+                                                 vir,
+                                                 pres,
+                                                 nullptr,
+                                                 mu_tot,
+                                                 constr);
  
                  imdSession->fillEnergyRecord(count, TRUE);
  
                  const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout);
                  const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout);
-                energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE,
-                                                   do_dr, do_or,
-                                                   fplog, count, count,
-                                                   fcd, nullptr);
+                energyOutput.printStepToEnergyFile(
+                        mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or, fplog, count, count, fr->fcdata.get(), nullptr);
                  fflush(fplog);
              }
          }
@@ -2541,7 +2984,7 @@ LegacySimulator::do_steep()
           * or if we did random steps!
           */
  
-        if ( (count == 0) || (s_try->epot < s_min->epot) )
+        if ((count == 0) || (s_try->epot < s_min->epot))
          {
              steps_accepted++;
  
@@ -2560,22 +3003,33 @@ LegacySimulator::do_steep()
              /* Write to trn, if necessary */
              do_x = do_per_step(steps_accepted, inputrec->nstxout);
              do_f = do_per_step(steps_accepted, inputrec->nstfout);
-            write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,
-                          top_global, inputrec, count,
-                          s_min, state_global, observablesHistory);
+            write_em_traj(
+                    fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min, state_global, observablesHistory);
          }
          else
          {
              /* If energy is not smaller make the step smaller...  */
              ustep *= 0.5;
  
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+            if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
              {
                  /* Reload the old state */
-                em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession,
+                em_dd_partition_system(fplog,
+                                       mdlog,
+                                       count,
+                                       cr,
+                                       top_global,
+                                       inputrec,
+                                       imdSession,
                                         pull_work,
-                                       s_min, &top, mdAtoms, fr, vsite, constr,
-                                       nrnb, wcycle);
+                                       s_min,
+                                       top,
+                                       mdAtoms,
+                                       fr,
+                                       vsite,
+                                       constr,
+                                       nrnb,
+                                       wcycle);
              }
          }
  
@@ -2586,7 +3040,7 @@ LegacySimulator::do_steep()
          if (!bDone)
          {
              /* Determine new step  */
-            stepsize = ustep/s_min->fmax;
+            stepsize = ustep / s_min->fmax;
          }
  
          /* Check if stepsize is too small, with 1 nm as a characteristic length */
@@ -2598,100 +3052,143 @@ LegacySimulator::do_steep()
          {
              if (MASTER(cr))
              {
-                warn_step(fplog, inputrec->em_tol, s_min->fmax,
-                          count == nsteps, constr != nullptr);
+                warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr);
              }
              bAbort = TRUE;
          }
  
          /* Send IMD energies and positions, if bIMD is TRUE. */
-        if (imdSession->run(count, TRUE, state_global->box,
-                            MASTER(cr) ? state_global->x.rvec_array() : nullptr,
-                            0) &&
-            MASTER(cr))
+        if (imdSession->run(count,
+                            TRUE,
+                            MASTER(cr) ? state_global->box : nullptr,
+                            MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>(),
+                            0)
+            && MASTER(cr))
          {
              imdSession->sendPositionsAndEnergies();
          }
  
          count++;
-    }   /* End of the loop  */
+        observablesReducer.markAsReadyToReduce();
+    } /* End of the loop  */
  
      /* Print some data...  */
      if (MASTER(cr))
      {
          fprintf(stderr, "\nwriting lowest energy coordinates.\n");
      }
-    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, count,
-                  s_min, state_global, observablesHistory);
+    write_em_traj(fplog,
+                  cr,
+                  outf,
+                  TRUE,
+                  inputrec->nstfout != 0,
+                  ftp2fn(efSTO, nfile, fnm),
+                  top_global,
+                  inputrec,
+                  count,
+                  s_min,
+                  state_global,
+                  observablesHistory);
  
      if (MASTER(cr))
      {
          double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
  
-        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min, sqrtNumAtoms);
-        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min, sqrtNumAtoms);
+        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
+        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
      }
  
      finish_em(cr, outf, walltime_accounting, wcycle);
  
      /* To print the actual number of steps we needed somewhere */
-    inputrec->nsteps = count;
+    {
+        // TODO: Avoid changing inputrec (#3854)
+        auto* nonConstInputrec   = const_cast<t_inputrec*>(inputrec);
+        nonConstInputrec->nsteps = count;
+    }
  
      walltime_accounting_set_nsteps_done(walltime_accounting, count);
  }
  
-void
-LegacySimulator::do_nm()
+void LegacySimulator::do_nm()
  {
-    const char          *NM = "Normal Mode Analysis";
-    int                  nnodes;
-    gmx_localtop_t       top;
-    gmx_global_stat_t    gstat;
-    t_graph             *graph;
-    tensor               vir, pres;
-    rvec                 mu_tot = {0};
-    rvec                *dfdx;
-    gmx_bool             bSparse; /* use sparse matrix storage format */
-    size_t               sz;
-    gmx_sparsematrix_t * sparse_matrix           = nullptr;
-    real           *     full_matrix             = nullptr;
+    const char*         NM = "Normal Mode Analysis";
+    int                 nnodes;
+    gmx_global_stat_t   gstat;
+    tensor              vir, pres;
+    rvec                mu_tot = { 0 };
+    rvec*               dfdx;
+    gmx_bool            bSparse; /* use sparse matrix storage format */
+    size_t              sz;
+    gmx_sparsematrix_t* sparse_matrix = nullptr;
+    real*               full_matrix   = nullptr;
  
      /* added with respect to mdrun */
-    int                       row, col;
-    real                      der_range = 10.0*std::sqrt(GMX_REAL_EPS);
-    real                      x_min;
-    bool                      bIsMaster = MASTER(cr);
-    auto                      mdatoms   = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info).asParagraph().
-        appendText("Note that activating normal-mode analysis via the integrator "
-                   ".mdp option and the command gmx mdrun may "
-                   "be available in a different form in a future version of GROMACS, "
-                   "e.g. gmx normal-modes.");
+    int   row, col;
+    real  der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
+    real  x_min;
+    bool  bIsMaster = MASTER(cr);
+    auto* mdatoms   = mdAtoms->mdatoms();
+
+    GMX_LOG(mdlog.info)
+            .asParagraph()
+            .appendText(
+                    "Note that activating normal-mode analysis via the integrator "
+                    ".mdp option and the command gmx mdrun may "
+                    "be available in a different form in a future version of GROMACS, "
+                    "e.g. gmx normal-modes.");
  
      if (constr != nullptr)
      {
-        gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
+        gmx_fatal(
+                FARGS,
+                "Constraints present with Normal Mode Analysis, this combination is not supported");
      }
  
-    gmx_shellfc_t *shellfc;
+    gmx_shellfc_t* shellfc;
+
+    em_state_t state_work{};
  
-    em_state_t     state_work {};
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
      /* Init em and store the local state in state_minimum */
-    init_em(fplog, mdlog, NM, cr, inputrec, imdSession,
+    init_em(fplog,
+            mdlog,
+            NM,
+            cr,
+            inputrec,
+            imdSession,
              pull_work,
-            state_global, top_global, &state_work, &top,
-            nrnb, fr, &graph, mdAtoms, &gstat,
-            vsite, constr, &shellfc);
-    gmx_mdoutf            *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                              StartingBehavior::NewSimulation);
+            state_global,
+            top_global,
+            &state_work,
+            top,
+            nrnb,
+            fr,
+            mdAtoms,
+            &gstat,
+            vsite,
+            constr,
+            &shellfc);
+    const bool  simulationsShareState = false;
+    gmx_mdoutf* outf                  = init_mdoutf(fplog,
+                                   nfile,
+                                   fnm,
+                                   mdrunOptions,
+                                   cr,
+                                   outputProvider,
+                                   mdModulesNotifiers,
+                                   inputrec,
+                                   top_global,
+                                   nullptr,
+                                   wcycle,
+                                   StartingBehavior::NewSimulation,
+                                   simulationsShareState,
+                                   ms);
  
      std::vector<int>       atom_index = get_atom_index(top_global);
-    std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0});
+    std::vector<gmx::RVec> fneg(atom_index.size(), { 0, 0, 0 });
      snew(dfdx, atom_index.size());
  
  #if !GMX_DOUBLE
@@ -2713,13 +3210,15 @@ LegacySimulator::do_nm()
       */
      if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
      {
-        GMX_LOG(mdlog.warning).appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
+        GMX_LOG(mdlog.warning)
+                .appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
          bSparse = FALSE;
      }
      else if (atom_index.size() < 1000)
      {
-        GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
-                                                   atom_index.size());
+        GMX_LOG(mdlog.warning)
+                .appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
+                                     atom_index.size());
          bSparse = FALSE;
      }
      else
@@ -2729,44 +3228,62 @@ LegacySimulator::do_nm()
      }
  
      /* Number of dimensions, based on real atoms, that is not vsites or shell */
-    sz = DIM*atom_index.size();
+    sz = DIM * atom_index.size();
  
      fprintf(stderr, "Allocating Hessian memory...\n\n");
  
      if (bSparse)
      {
-        sparse_matrix = gmx_sparsematrix_init(sz);
+        sparse_matrix                       = gmx_sparsematrix_init(sz);
          sparse_matrix->compressed_symmetric = TRUE;
      }
      else
      {
-        snew(full_matrix, sz*sz);
+        snew(full_matrix, sz * sz);
      }
  
      /* Write start time and temperature */
      print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
  
      /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = atom_index.size()*2;
+    {
+        // TODO: Avoid changing inputrec (#3854)
+        auto* nonConstInputrec   = const_cast<t_inputrec*>(inputrec);
+        nonConstInputrec->nsteps = atom_index.size() * 2;
+    }
  
      if (bIsMaster)
      {
-        fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
-                *(top_global->name), inputrec->nsteps);
+        fprintf(stderr,
+                "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
+                *(top_global.name),
+                inputrec->nsteps);
      }
  
      nnodes = cr->nnodes;
  
      /* Make evaluate_energy do a single node force calculation */
      cr->nnodes = 1;
-    EnergyEvaluator energyEvaluator {
-        fplog, mdlog, cr, ms,
-        top_global, &top,
-        inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
-        vsite, constr, fcd, graph,
-        mdAtoms, fr, runScheduleWork, enerd
-    };
-    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE, 0);
      cr->nnodes = nnodes;
  
      /* if forces are not small, warn user */
@@ -2775,11 +3292,12 @@ LegacySimulator::do_nm()
      GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
      if (state_work.fmax > 1.0e-3)
      {
-        GMX_LOG(mdlog.warning).appendText(
-                "The force is probably not small enough to "
-                "ensure that you are at a minimum.\n"
-                "Be aware that negative eigenvalues may occur\n"
-                "when the resulting matrix is diagonalized.");
+        GMX_LOG(mdlog.warning)
+                .appendText(
+                        "The force is probably not small enough to "
+                        "ensure that you are at a minimum.\n"
+                        "Be aware that negative eigenvalues may occur\n"
+                        "when the resulting matrix is diagonalized.");
      }
  
      /***********************************************************
@@ -2794,15 +3312,15 @@ LegacySimulator::do_nm()
      /* Steps are divided one by one over the nodes */
      bool bNS          = true;
      auto state_work_x = makeArrayRef(state_work.s.x);
-    auto state_work_f = makeArrayRef(state_work.f);
+    auto state_work_f = state_work.f.view().force();
      for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes)
      {
          size_t atom = atom_index[aid];
          for (size_t d = 0; d < DIM; d++)
          {
-            int64_t     step        = 0;
-            int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
-            double      t           = 0;
+            int64_t step        = 0;
+            int     force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
+            double  t           = 0;
  
              x_min = state_work_x[atom][d];
  
@@ -2833,22 +3351,21 @@ LegacySimulator::do_nm()
                                          pull_work,
                                          bNS,
                                          force_flags,
-                                        &top,
+                                        top,
                                          constr,
                                          enerd,
-                                        fcd,
                                          state_work.s.natoms,
                                          state_work.s.x.arrayRefWithPadding(),
                                          state_work.s.v.arrayRefWithPadding(),
                                          state_work.s.box,
                                          state_work.s.lambda,
                                          &state_work.s.hist,
-                                        state_work.f.arrayRefWithPadding(),
+                                        &state_work.f.view(),
                                          vir,
-                                        mdatoms,
+                                        *mdatoms,
+                                        fr->longRangeNonbondeds.get(),
                                          nrnb,
                                          wcycle,
-                                        graph,
                                          shellfc,
                                          fr,
                                          runScheduleWork,
@@ -2861,14 +3378,14 @@ LegacySimulator::do_nm()
                  }
                  else
                  {
-                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE);
+                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE, step);
                  }
  
                  cr->nnodes = nnodes;
  
                  if (dx == 0)
                  {
-                    std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin());
+                    std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(), fneg.begin());
                  }
              }
  
@@ -2879,52 +3396,48 @@ LegacySimulator::do_nm()
              {
                  for (size_t k = 0; (k < DIM); k++)
                  {
-                    dfdx[j][k] =
-                        -(state_work_f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
+                    dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range);
                  }
              }
  
              if (!bIsMaster)
              {
  #if GMX_MPI
-#define mpi_type GMX_MPI_REAL
-                MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
-                         cr->nodeid, cr->mpi_comm_mygroup);
+#    define mpi_type GMX_MPI_REAL
+                MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid, cr->mpi_comm_mygroup);
  #endif
              }
              else
              {
-                for (index node = 0; (node < nnodes && aid+node < ssize(atom_index)); node++)
+                for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++)
                  {
                      if (node > 0)
                      {
  #if GMX_MPI
                          MPI_Status stat;
-                        MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
-                                 cr->mpi_comm_mygroup, &stat);
-#undef mpi_type
+                        MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node, cr->mpi_comm_mygroup, &stat);
+#    undef mpi_type
  #endif
                      }
  
-                    row = (aid + node)*DIM + d;
+                    row = (aid + node) * DIM + d;
  
                      for (size_t j = 0; j < atom_index.size(); j++)
                      {
                          for (size_t k = 0; k < DIM; k++)
                          {
-                            col = j*DIM + k;
+                            col = j * DIM + k;
  
                              if (bSparse)
                              {
                                  if (col >= row && dfdx[j][k] != 0.0)
                                  {
-                                    gmx_sparsematrix_increment_value(sparse_matrix,
-                                                                     row, col, dfdx[j][k]);
+                                    gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]);
                                  }
                              }
                              else
                              {
-                                full_matrix[row*sz+col] = dfdx[j][k];
+                                full_matrix[row * sz + col] = dfdx[j][k];
                              }
                          }
                      }
@@ -2939,8 +3452,9 @@ LegacySimulator::do_nm()
          /* write progress */
          if (bIsMaster && mdrunOptions.verbose)
          {
-            fprintf(stderr, "\rFinished step %d out of %td",
-                    std::min<int>(atom+nnodes, atom_index.size()),
+            fprintf(stderr,
+                    "\rFinished step %d out of %td",
+                    std::min<int>(atom + nnodes, atom_index.size()),
                      ssize(atom_index));
              fflush(stderr);
          }
@@ -2954,7 +3468,7 @@ LegacySimulator::do_nm()
  
      finish_em(cr, outf, walltime_accounting, wcycle);
  
-    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
+    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2);
  }
  
  } // namespace gmx