Merge release-2019 into master

[alexxy/gromacs.git] / src / gromacs / mdrun / runner.cpp
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 370cff95f94c657027b79afd16368f194ac41918..79ebf9fd33fc50042233446d27e68de5c780b7f6 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -54,15 +54,16 @@
  #include <cstring>
  
  #include <algorithm>
+#include <memory>
  
  #include "gromacs/commandline/filenm.h"
-#include "gromacs/compat/make_unique.h"
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/domdec/localatomsetmanager.h"
-#include "gromacs/ewald/ewald-utils.h"
+#include "gromacs/domdec/partition.h"
+#include "gromacs/ewald/ewald_utils.h"
  #include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme-gpu-program.h"
+#include "gromacs/ewald/pme_gpu_program.h"
  #include "gromacs/fileio/checkpoint.h"
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/oenv.h"
@@ -74,42 +75,47 @@
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/printhardware.h"
-#include "gromacs/listed-forces/disre.h"
-#include "gromacs/listed-forces/gpubonded.h"
-#include "gromacs/listed-forces/orires.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/listed_forces/disre.h"
+#include "gromacs/listed_forces/gpubonded.h"
+#include "gromacs/listed_forces/orires.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/mdlib/boxdeformation.h"
+#include "gromacs/mdlib/broadcaststructs.h"
  #include "gromacs/mdlib/calc_verletbuf.h"
+#include "gromacs/mdlib/dispersioncorrection.h"
+#include "gromacs/mdlib/enerdata_utils.h"
+#include "gromacs/mdlib/force.h"
  #include "gromacs/mdlib/forcerec.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/makeconstraints.h"
  #include "gromacs/mdlib/md_support.h"
  #include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/mdrun.h"
  #include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
-#include "gromacs/mdlib/nbnxn_search.h"
-#include "gromacs/mdlib/nbnxn_tuning.h"
  #include "gromacs/mdlib/ppforceworkload.h"
  #include "gromacs/mdlib/qmmm.h"
  #include "gromacs/mdlib/sighandler.h"
-#include "gromacs/mdlib/sim_util.h"
  #include "gromacs/mdlib/stophandler.h"
-#include "gromacs/mdrun/legacymdrunoptions.h"
-#include "gromacs/mdrun/logging.h"
-#include "gromacs/mdrun/multisim.h"
+#include "gromacs/mdrun/mdmodules.h"
  #include "gromacs/mdrun/simulationcontext.h"
-#include "gromacs/mdrunutility/mdmodules.h"
+#include "gromacs/mdrunutility/handlerestart.h"
+#include "gromacs/mdrunutility/logging.h"
+#include "gromacs/mdrunutility/multisim.h"
+#include "gromacs/mdrunutility/printtime.h"
  #include "gromacs/mdrunutility/threadaffinity.h"
  #include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/enerdata.h"
  #include "gromacs/mdtypes/fcdata.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdrunoptions.h"
  #include "gromacs/mdtypes/observableshistory.h"
  #include "gromacs/mdtypes/state.h"
+#include "gromacs/nbnxm/gpu_data_mgmt.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist_tuning.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/output.h"
  #include "gromacs/pulling/pull.h"
@@ -122,7 +128,9 @@
  #include "gromacs/taskassignment/resourcedivision.h"
  #include "gromacs/taskassignment/taskassignment.h"
  #include "gromacs/taskassignment/usergpuids.h"
+#include "gromacs/timing/gpu_timing.h"
  #include "gromacs/timing/wallcycle.h"
+#include "gromacs/timing/wallcyclereporting.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/trajectory/trajectoryframe.h"
  #include "gromacs/utility/basenetwork.h"
@@ -140,7 +148,7 @@
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
  
-#include "integrator.h"
+#include "legacysimulator.h"
  #include "replicaexchange.h"
  
  #if GMX_FAHCORE
@@ -150,6 +158,32 @@
  namespace gmx
  {
  
+/*! \brief Log if development feature flags are encountered
+ *
+ * The use of dev features indicated by environment variables is logged
+ * in order to ensure that runs with such featrues enabled can be identified
+ * from their log and standard output.
+ *
+ * \param[in]  mdlog        Logger object.
+ */
+static void reportDevelopmentFeatures(const gmx::MDLogger &mdlog)
+{
+    const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
+    const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
+
+    if (enableGpuBufOps)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+                "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
+    }
+
+    if (useGpuUpdateConstrain)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+                "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
+    }
+}
+
  /*! \brief Barrier for safe simultaneous thread access to mdrunner data
   *
   * Used to ensure that the master thread does not modify mdrunner during copy
@@ -163,13 +197,13 @@ static void threadMpiMdrunnerAccessBarrier()
  
  Mdrunner Mdrunner::cloneOnSpawnedThread() const
  {
-    auto newRunner = Mdrunner();
+    auto newRunner = Mdrunner(std::make_unique<MDModules>());
  
      // All runners in the same process share a restraint manager resource because it is
      // part of the interface to the client code, which is associated only with the
      // original thread. Handles to the same resources can be obtained by copy.
      {
-        newRunner.restraintManager_ = compat::make_unique<RestraintManager>(*restraintManager_);
+        newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
      }
  
      // Copy original cr pointer before master thread can pass the thread barrier
@@ -192,7 +226,8 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
      newRunner.replExParams        = replExParams;
      newRunner.pforce              = pforce;
      newRunner.ms                  = ms;
-    newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
+    newRunner.startingBehavior    = startingBehavior;
+    newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
  
      threadMpiMdrunnerAccessBarrier();
  
@@ -239,7 +274,7 @@ t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
  
  #if GMX_THREAD_MPI
      /* now spawn new threads that start mdrunner_start_fn(), while
-       the main thread returns, we set thread affinity later */
+       the main thread returns. Thread affinity is handled later. */
      if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
                       mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
      {
@@ -280,8 +315,8 @@ static void prepare_verlet_scheme(FILE                           *fplog,
          ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
          VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
  
-        real               rlist_new;
-        calc_verlet_buffer_size(mtop, det(box), ir, ir->nstlist, ir->nstlist - 1, -1, &listSetup, nullptr, &rlist_new);
+        const real         rlist_new =
+            calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
  
          if (rlist_new != ir->rlist)
          {
@@ -417,10 +452,144 @@ static TaskTarget findTaskTarget(const char *optionString)
      return returnValue;
  }
  
+//! Finish run, aggregate data to print performance info.
+static void finish_run(FILE *fplog,
+                       const gmx::MDLogger &mdlog,
+                       const t_commrec *cr,
+                       const t_inputrec *inputrec,
+                       t_nrnb nrnb[], gmx_wallcycle_t wcycle,
+                       gmx_walltime_accounting_t walltime_accounting,
+                       nonbonded_verlet_t *nbv,
+                       const gmx_pme_t *pme,
+                       gmx_bool bWriteStat)
+{
+    double  delta_t  = 0;
+    double  nbfs     = 0, mflop = 0;
+    double  elapsed_time,
+            elapsed_time_over_all_ranks,
+            elapsed_time_over_all_threads,
+            elapsed_time_over_all_threads_over_all_ranks;
+    /* Control whether it is valid to print a report. Only the
+       simulation master may print, but it should not do so if the run
+       terminated e.g. before a scheduled reset step. This is
+       complicated by the fact that PME ranks are unaware of the
+       reason why they were sent a pmerecvqxFINISH. To avoid
+       communication deadlocks, we always do the communication for the
+       report, even if we've decided not to write the report, because
+       how long it takes to finish the run is not important when we've
+       decided not to report on the simulation performance.
+
+       Further, we only report performance for dynamical integrators,
+       because those are the only ones for which we plan to
+       consider doing any optimizations. */
+    bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
+
+    if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
+        printReport = false;
+    }
+
+    t_nrnb                  *nrnb_tot;
+    std::unique_ptr<t_nrnb>  nrnbTotalStorage;
+    if (cr->nnodes > 1)
+    {
+        nrnbTotalStorage = std::make_unique<t_nrnb>();
+        nrnb_tot         = nrnbTotalStorage.get();
+#if GMX_MPI
+        MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
+                      cr->mpi_comm_mysim);
+#endif
+    }
+    else
+    {
+        nrnb_tot = nrnb;
+    }
+
+    elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
+    elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
+    if (cr->nnodes > 1)
+    {
+#if GMX_MPI
+        /* reduce elapsed_time over all MPI ranks in the current simulation */
+        MPI_Allreduce(&elapsed_time,
+                      &elapsed_time_over_all_ranks,
+                      1, MPI_DOUBLE, MPI_SUM,
+                      cr->mpi_comm_mysim);
+        elapsed_time_over_all_ranks /= cr->nnodes;
+        /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
+         * current simulation. */
+        MPI_Allreduce(&elapsed_time_over_all_threads,
+                      &elapsed_time_over_all_threads_over_all_ranks,
+                      1, MPI_DOUBLE, MPI_SUM,
+                      cr->mpi_comm_mysim);
+#endif
+    }
+    else
+    {
+        elapsed_time_over_all_ranks                  = elapsed_time;
+        elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
+    }
+
+    if (printReport)
+    {
+        print_flop(fplog, nrnb_tot, &nbfs, &mflop);
+    }
+
+    if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
+    {
+        print_dd_statistics(cr, inputrec, fplog);
+    }
+
+    /* TODO Move the responsibility for any scaling by thread counts
+     * to the code that handled the thread region, so that there's a
+     * mechanism to keep cycle counting working during the transition
+     * to task parallelism. */
+    int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
+    int nthreads_pme = gmx_omp_nthreads_get(emntPME);
+    wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
+    auto cycle_sum(wallcycle_sum(cr, wcycle));
+
+    if (printReport)
+    {
+        auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
+        gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
+
+        if (pme_gpu_task_enabled(pme))
+        {
+            pme_gpu_get_timings(pme, &pme_gpu_timings);
+        }
+        wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
+                        elapsed_time_over_all_ranks,
+                        wcycle, cycle_sum,
+                        nbnxn_gpu_timings,
+                        &pme_gpu_timings);
+
+        if (EI_DYNAMICS(inputrec->eI))
+        {
+            delta_t = inputrec->delta_t;
+        }
+
+        if (fplog)
+        {
+            print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
+                       elapsed_time_over_all_ranks,
+                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
+                       delta_t, nbfs, mflop);
+        }
+        if (bWriteStat)
+        {
+            print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
+                       elapsed_time_over_all_ranks,
+                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
+                       delta_t, nbfs, mflop);
+        }
+    }
+}
+
  int Mdrunner::mdrunner()
  {
      matrix                    box;
-    t_nrnb                   *nrnb;
      t_forcerec               *fr               = nullptr;
      t_fcdata                 *fcd              = nullptr;
      real                      ewaldcoeff_q     = 0;
@@ -428,15 +597,11 @@ int Mdrunner::mdrunner()
      int                       nChargePerturbed = -1, nTypePerturbed = 0;
      gmx_wallcycle_t           wcycle;
      gmx_walltime_accounting_t walltime_accounting = nullptr;
-    int                       rc;
-    int64_t                   reset_counters;
-    int                       nthreads_pme = 1;
-    gmx_membed_t *            membed       = nullptr;
-    gmx_hw_info_t            *hwinfo       = nullptr;
+    gmx_membed_t *            membed              = nullptr;
+    gmx_hw_info_t            *hwinfo              = nullptr;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
-    std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules);
      t_inputrec                      inputrecInstance;
      t_inputrec                     *inputrec = &inputrecInstance;
      gmx_mtop_t                      mtop;
@@ -447,30 +612,11 @@ int Mdrunner::mdrunner()
      // Handle task-assignment related user options.
      EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
                                                 EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-    std::vector<int>    gpuIdsAvailable;
-    try
-    {
-        gpuIdsAvailable = parseUserGpuIds(hw_opt.gpuIdsAvailable);
-        // TODO We could put the GPU IDs into a std::map to find
-        // duplicates, but for the small numbers of IDs involved, this
-        // code is simple and fast.
-        for (size_t i = 0; i != gpuIdsAvailable.size(); ++i)
-        {
-            for (size_t j = i+1; j != gpuIdsAvailable.size(); ++j)
-            {
-                if (gpuIdsAvailable[i] == gpuIdsAvailable[j])
-                {
-                    GMX_THROW(InvalidInputError(formatString("The string of available GPU device IDs '%s' may not contain duplicate device IDs", hw_opt.gpuIdsAvailable.c_str())));
-                }
-            }
-        }
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
      std::vector<int> userGpuTaskAssignment;
      try
      {
-        userGpuTaskAssignment = parseUserGpuIds(hw_opt.userGpuTaskAssignment);
+        userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      auto       nonbondedTarget = findTaskTarget(nbpu_opt);
@@ -487,126 +633,45 @@ int Mdrunner::mdrunner()
      // to check that the old log file matches what the checkpoint file
      // expects. Otherwise, we should start to write log output now if
      // there is a file ready for it.
-    if (logFileHandle != nullptr && !mdrunOptions.continuationOptions.appendFiles)
+    if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
      {
          fplog = gmx_fio_getfp(logFileHandle);
      }
      gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
      gmx::MDLogger    mdlog(logOwner.logger());
  
-    // TODO The thread-MPI master rank makes a working
-    // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
-    // after the threads have been launched. This works because no use
-    // is made of that communicator until after the execution paths
-    // have rejoined. But it is likely that we can improve the way
-    // this is expressed, e.g. by expressly running detection only the
-    // master rank for thread-MPI, rather than relying on the mutex
-    // and reference count.
+    // report any development features that may be enabled by environment variables
+    reportDevelopmentFeatures(mdlog);
+
+    // With thread-MPI, the communicator changes after threads are
+    // launched, so this is rebuilt for the master rank at that
+    // time. The non-master ranks are fine to keep the one made here.
      PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
      hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
  
-    gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo);
+    gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
  
-    std::vector<int> gpuIdsToUse;
-    auto             compatibleGpus = getCompatibleGpus(hwinfo->gpu_info);
-    if (gpuIdsAvailable.empty())
-    {
-        gpuIdsToUse = compatibleGpus;
-    }
-    else
-    {
-        for (const auto &availableGpuId : gpuIdsAvailable)
-        {
-            bool availableGpuIsCompatible = false;
-            for (const auto &compatibleGpuId : compatibleGpus)
-            {
-                if (availableGpuId == compatibleGpuId)
-                {
-                    availableGpuIsCompatible = true;
-                    break;
-                }
-            }
-            if (!availableGpuIsCompatible)
-            {
-                gmx_fatal(FARGS, "You limited the set of compatible GPUs to a set that included ID #%d, but that ID is not for a compatible GPU. List only compatible GPUs.", availableGpuId);
-            }
-            gpuIdsToUse.push_back(availableGpuId);
-        }
-    }
+    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
  
-    if (fplog != nullptr)
-    {
-        /* Print references after all software/hardware printing */
-        please_cite(fplog, "Abraham2015");
-        please_cite(fplog, "Pall2015");
-        please_cite(fplog, "Pronk2013");
-        please_cite(fplog, "Hess2008b");
-        please_cite(fplog, "Spoel2005a");
-        please_cite(fplog, "Lindahl2001a");
-        please_cite(fplog, "Berendsen95a");
-        writeSourceDoi(fplog);
-    }
+    // Print citation requests after all software/hardware printing
+    pleaseCiteGromacs(fplog);
  
      std::unique_ptr<t_state> globalState;
  
      if (SIMMASTER(cr))
      {
          /* Only the master rank has the global state */
-        globalState = compat::make_unique<t_state>();
+        globalState = std::make_unique<t_state>();
  
          /* Read (nearly) all data required for the simulation */
          read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop);
-
-        /* In rerun, set velocities to zero if present */
-        if (doRerun && ((globalState->flags & (1 << estV)) != 0))
-        {
-            // rerun does not use velocities
-            GMX_LOG(mdlog.info).asParagraph().appendText(
-                    "Rerun trajectory contains velocities. Rerun does only evaluate "
-                    "potential energy and forces. The velocities will be ignored.");
-            for (int i = 0; i < globalState->natoms; i++)
-            {
-                clear_rvec(globalState->v[i]);
-            }
-            globalState->flags &= ~(1 << estV);
-        }
-
-        if (inputrec->cutoff_scheme != ecutsVERLET)
-        {
-            if (nstlist_cmdline > 0)
-            {
-                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
-            }
-
-            if (!compatibleGpus.empty())
-            {
-                GMX_LOG(mdlog.warning).asParagraph().appendText(
-                        "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
-                        "      To use a GPU, set the mdp option: cutoff-scheme = Verlet");
-            }
-        }
      }
  
      /* Check and update the hardware options for internal consistency */
-    check_and_update_hw_opt_1(mdlog, &hw_opt, cr, domdecOptions.numPmeRanks);
-
-    /* Early check for externally set process affinity. */
-    gmx_check_thread_affinity_set(mdlog, cr,
-                                  &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+    checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks);
  
      if (GMX_THREAD_MPI && SIMMASTER(cr))
      {
-        if (domdecOptions.numPmeRanks > 0 && hw_opt.nthreads_tmpi <= 0)
-        {
-            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
-        }
-
-        /* Since the master knows the cut-off scheme, update hw_opt for this.
-         * This is done later for normal MPI and also once more with tMPI
-         * for all tMPI ranks.
-         */
-        check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
-
          bool useGpuForNonbonded = false;
          bool useGpuForPme       = false;
          try
@@ -618,7 +683,6 @@ int Mdrunner::mdrunner()
              useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
                      (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                      canUseGpuForNonbonded,
-                    inputrec->cutoff_scheme == ecutsVERLET,
                      gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
@@ -683,12 +747,10 @@ int Mdrunner::mdrunner()
          // handle. If unsuitable, we will notice that during task
          // assignment.
          bool gpusWereDetected      = hwinfo->ngpu_compatible_tot > 0;
-        bool usingVerletScheme     = inputrec->cutoff_scheme == ecutsVERLET;
          auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
          useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
                                                                  emulateGpuNonbonded,
                                                                  canUseGpuForNonbonded,
-                                                                usingVerletScheme,
                                                                  gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                  gpusWereDetected);
          useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
@@ -697,7 +759,7 @@ int Mdrunner::mdrunner()
                                                      gpusWereDetected);
          auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
          useGpuForBonded =
-            decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme,
+            decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
                                              bondedTarget, canUseGpuForBonded,
                                              EVDW_PME(inputrec->vdwtype),
                                              EEL_PME_EWALD(inputrec->coulombtype),
@@ -722,17 +784,17 @@ int Mdrunner::mdrunner()
      // TODO: hide restraint implementation details from Mdrunner.
      // There is nothing unique about restraints at this point as far as the
      // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
-    // factory functions from the SimulationContext on which to call mdModules->add().
+    // factory functions from the SimulationContext on which to call mdModules_->add().
      // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
      for (auto && restraint : restraintManager_->getRestraints())
      {
          auto module = RestraintMDModule::create(restraint,
                                                  restraint->sites());
-        mdModules->add(std::move(module));
+        mdModules_->add(std::move(module));
      }
  
      // TODO: Error handling
-    mdModules->assignOptionsToModules(*inputrec->params, nullptr);
+    mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
  
      if (fplog != nullptr)
      {
@@ -742,6 +804,20 @@ int Mdrunner::mdrunner()
  
      if (SIMMASTER(cr))
      {
+        /* In rerun, set velocities to zero if present */
+        if (doRerun && ((globalState->flags & (1 << estV)) != 0))
+        {
+            // rerun does not use velocities
+            GMX_LOG(mdlog.info).asParagraph().appendText(
+                    "Rerun trajectory contains velocities. Rerun does only evaluate "
+                    "potential energy and forces. The velocities will be ignored.");
+            for (int i = 0; i < globalState->natoms; i++)
+            {
+                clear_rvec(globalState->v[i]);
+            }
+            globalState->flags &= ~(1 << estV);
+        }
+
          /* now make sure the state is initialized and propagated */
          set_state_entries(globalState.get(), inputrec);
      }
@@ -753,7 +829,7 @@ int Mdrunner::mdrunner()
      {
          if (!MASTER(cr))
          {
-            globalState = compat::make_unique<t_state>();
+            globalState = std::make_unique<t_state>();
          }
          broadcastStateWithoutDynamics(cr, globalState.get());
      }
@@ -784,11 +860,6 @@ int Mdrunner::mdrunner()
          gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
      }
  
-    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
-    {
-        gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
-    }
-
      if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
      {
          if (domdecOptions.numPmeRanks > 0)
@@ -846,15 +917,11 @@ int Mdrunner::mdrunner()
  
      ObservablesHistory   observablesHistory = {};
  
-    ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions;
-
-    if (continuationOptions.startedFromCheckpoint)
+    if (startingBehavior != StartingBehavior::NewSimulation)
      {
          /* Check if checkpoint file exists before doing continuation.
           * This way we can use identical input options for the first and subsequent runs...
           */
-        gmx_bool bReadEkin;
-
          if (mdrunOptions.numStepsCommandline > -2)
          {
              /* Temporarily set the number of steps to unmlimited to avoid
@@ -868,17 +935,10 @@ int Mdrunner::mdrunner()
                          logFileHandle,
                          cr, domdecOptions.numCells,
                          inputrec, globalState.get(),
-                        &bReadEkin, &observablesHistory,
-                        continuationOptions.appendFiles,
-                        continuationOptions.appendFilesOptionSet,
+                        &observablesHistory,
                          mdrunOptions.reproducible);
  
-        if (bReadEkin)
-        {
-            continuationOptions.haveReadEkin = true;
-        }
-
-        if (continuationOptions.appendFiles && logFileHandle)
+        if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
          {
              // Now we can start normal logging to the truncated log file.
              fplog    = gmx_fio_getfp(logFileHandle);
@@ -907,12 +967,13 @@ int Mdrunner::mdrunner()
          gmx_bcast(sizeof(box), box, cr);
      }
  
-    /* Update rlist and nstlist. */
-    if (inputrec->cutoff_scheme == ecutsVERLET)
+    if (inputrec->cutoff_scheme != ecutsVERLET)
      {
-        prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
-                              useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
+        gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
      }
+    /* Update rlist and nstlist. */
+    prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
+                          useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
  
      LocalAtomSetManager atomSets;
  
@@ -966,9 +1027,13 @@ int Mdrunner::mdrunner()
      fflush(stderr);
  #endif
  
-    /* Check and update hw_opt for the cut-off scheme */
-    check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
-
+    // If mdrun -pin auto honors any affinity setting that already
+    // exists. If so, it is nice to provide feedback about whether
+    // that existing affinity setting was from OpenMP or something
+    // else, so we run this code both before and after we initialize
+    // the OpenMP support.
+    gmx_check_thread_affinity_set(mdlog,
+                                  &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
      /* Check and update the number of OpenMP threads requested */
      checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
                                              pmeRunMode, mtop);
@@ -978,17 +1043,16 @@ int Mdrunner::mdrunner()
                            physicalNodeComm.size_,
                            hw_opt.nthreads_omp,
                            hw_opt.nthreads_omp_pme,
-                          !thisRankHasDuty(cr, DUTY_PP),
-                          inputrec->cutoff_scheme == ecutsVERLET);
+                          !thisRankHasDuty(cr, DUTY_PP));
  
-    // Enable FP exception detection for the Verlet scheme, but not in
+    // Enable FP exception detection, but not in
      // Release mode and not for compilers with known buggy FP
      // exception support (clang with any optimization) or suspected
      // buggy FP exception support (gcc 7.* with optimization).
  #if !defined NDEBUG && \
      !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
      && defined __OPTIMIZE__)
-    const bool bEnableFPE = inputrec->cutoff_scheme == ecutsVERLET;
+    const bool bEnableFPE = true;
  #else
      const bool bEnableFPE = false;
  #endif
@@ -1038,11 +1102,11 @@ int Mdrunner::mdrunner()
              }
              else if (nonbondedTarget == TaskTarget::Gpu)
              {
-                gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
+                gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
              }
              else if (bondedTarget == TaskTarget::Gpu)
              {
-                gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected.");
+                gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
              }
          }
      }
@@ -1057,7 +1121,7 @@ int Mdrunner::mdrunner()
              }
              else if (pmeTarget == TaskTarget::Gpu)
              {
-                gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected.");
+                gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
              }
          }
      }
@@ -1148,34 +1212,23 @@ int Mdrunner::mdrunner()
          }
      }
  
-    /* getting number of PP/PME threads
+    /* getting number of PP/PME threads on this MPI / tMPI rank.
         PME: env variable should be read only on one node to make sure it is
         identical everywhere;
       */
-    nthreads_pme = gmx_omp_nthreads_get(emntPME);
-
-    int numThreadsOnThisRank;
-    /* threads on this MPI process or TMPI thread */
-    if (thisRankHasDuty(cr, DUTY_PP))
-    {
-        numThreadsOnThisRank = gmx_omp_nthreads_get(emntNonbonded);
-    }
-    else
-    {
-        numThreadsOnThisRank = nthreads_pme;
-    }
-
+    const int numThreadsOnThisRank =
+        thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
      checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
                                    *hwinfo->hardwareTopology,
                                    physicalNodeComm, mdlog);
  
-    if (hw_opt.thread_affinity != threadaffOFF)
+    if (hw_opt.threadAffinity != ThreadAffinity::Off)
      {
          /* Before setting affinity, check whether the affinity has changed
           * - which indicates that probably the OpenMP library has changed it
           * since we first checked).
           */
-        gmx_check_thread_affinity_set(mdlog, cr,
+        gmx_check_thread_affinity_set(mdlog,
                                        &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
  
          int numThreadsOnThisNode, intraNodeThreadOffset;
@@ -1199,7 +1252,7 @@ int Mdrunner::mdrunner()
      {
          /* Master synchronizes its value of reset_counters with all nodes
           * including PME only nodes */
-        reset_counters = wcycle_get_reset_counters(wcycle);
+        int64_t reset_counters = wcycle_get_reset_counters(wcycle);
          gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
          wcycle_set_reset_counters(wcycle, reset_counters);
      }
@@ -1222,12 +1275,12 @@ int Mdrunner::mdrunner()
      std::unique_ptr<MDAtoms>     mdAtoms;
      std::unique_ptr<gmx_vsite_t> vsite;
  
-    snew(nrnb, 1);
+    t_nrnb nrnb;
      if (thisRankHasDuty(cr, DUTY_PP))
      {
          /* Initiate forcerecord */
-        fr                 = mk_forcerec();
-        fr->forceProviders = mdModules->initForceProviders();
+        fr                 = new t_forcerec;
+        fr->forceProviders = mdModules_->initForceProviders();
          init_forcerec(fplog, mdlog, fr, fcd,
                        inputrec, &mtop, cr, box,
                        opt2fn("-table", filenames.size(), filenames.data()),
@@ -1236,7 +1289,8 @@ int Mdrunner::mdrunner()
                        *hwinfo, nonbondedDeviceInfo,
                        useGpuForBonded,
                        FALSE,
-                      pforce);
+                      pforce,
+                      wcycle);
  
          /* Initialize the mdAtoms structure.
           * mdAtoms is not filled with atom data,
@@ -1326,10 +1380,10 @@ int Mdrunner::mdrunner()
                  pmedata = gmx_pme_init(cr,
                                         getNumPmeDomains(cr->dd),
                                         inputrec,
-                                       mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0,
+                                       nChargePerturbed != 0, nTypePerturbed != 0,
                                         mdrunOptions.reproducible,
                                         ewaldcoeff_q, ewaldcoeff_lj,
-                                       nthreads_pme,
+                                       gmx_omp_nthreads_get(emntPME),
                                         pmeRunMode, nullptr,
                                         pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
              }
@@ -1348,6 +1402,7 @@ int Mdrunner::mdrunner()
          signal_handler_install();
      }
  
+    pull_t *pull_work = nullptr;
      if (thisRankHasDuty(cr, DUTY_PP))
      {
          /* Assumes uniform use of the number of OpenMP threads */
@@ -1356,18 +1411,18 @@ int Mdrunner::mdrunner()
          if (inputrec->bPull)
          {
              /* Initialize pull code */
-            inputrec->pull_work =
+            pull_work =
                  init_pull(fplog, inputrec->pull, inputrec,
                            &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
              if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
              {
-                initPullHistory(inputrec->pull_work, &observablesHistory);
+                initPullHistory(pull_work, &observablesHistory);
              }
              if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
              {
-                init_pull_output_files(inputrec->pull_work,
+                init_pull_output_files(pull_work,
                                         filenames.size(), filenames.data(), oenv,
-                                       continuationOptions);
+                                       startingBehavior);
              }
          }
  
@@ -1384,15 +1439,19 @@ int Mdrunner::mdrunner()
                                          globalState.get(),
                                          &mtop,
                                          oenv,
-                                        mdrunOptions);
+                                        mdrunOptions,
+                                        startingBehavior);
          }
  
+        t_swap *swap = nullptr;
          if (inputrec->eSwapCoords != eswapNO)
          {
              /* Initialize ion swapping code */
-            init_swapcoords(fplog, inputrec, opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
-                            &mtop, globalState.get(), &observablesHistory,
-                            cr, &atomSets, oenv, mdrunOptions);
+            swap = init_swapcoords(fplog, inputrec,
+                                   opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
+                                   &mtop, globalState.get(), &observablesHistory,
+                                   cr, &atomSets, oenv, mdrunOptions,
+                                   startingBehavior);
          }
  
          /* Let makeConstraints know whether we have essential dynamics constraints.
@@ -1400,9 +1459,18 @@ int Mdrunner::mdrunner()
           */
          bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
                                      || observablesHistory.edsamHistory);
-        auto constr              = makeConstraints(mtop, *inputrec, doEssentialDynamics,
+        auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
                                                     fplog, *mdAtoms->mdatoms(),
-                                                   cr, ms, nrnb, wcycle, fr->bMolPBC);
+                                                   cr, ms, &nrnb, wcycle, fr->bMolPBC);
+
+        /* Energy terms and groups */
+        gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
+
+        /* Set up interactive MD (IMD) */
+        auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
+                                         MASTER(cr) ? globalState->x.rvec_array() : nullptr,
+                                         filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
+                                         startingBehavior);
  
          if (DOMAINDECOMP(cr))
          {
@@ -1422,41 +1490,43 @@ int Mdrunner::mdrunner()
          // understood.
          PpForceWorkload ppForceWorkload;
  
-        GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to integrator.");
+        GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
          /* Now do whatever the user wants us to do (how flexible...) */
-        Integrator integrator {
+        LegacySimulator simulator {
              fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
              oenv,
              mdrunOptions,
+            startingBehavior,
              vsite.get(), constr.get(),
              enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
              deform.get(),
-            mdModules->outputProvider(),
-            inputrec, &mtop,
+            mdModules_->outputProvider(),
+            inputrec, imdSession.get(), pull_work, swap, &mtop,
              fcd,
              globalState.get(),
              &observablesHistory,
-            mdAtoms.get(), nrnb, wcycle, fr,
+            mdAtoms.get(), &nrnb, wcycle, fr,
+            &enerd,
              &ppForceWorkload,
              replExParams,
              membed,
              walltime_accounting,
              std::move(stopHandlerBuilder_)
          };
-        integrator.run(inputrec->eI, doRerun);
+        simulator.run(inputrec->eI, doRerun);
  
          if (inputrec->bPull)
          {
-            finish_pull(inputrec->pull_work);
+            finish_pull(pull_work);
          }
-
+        finish_swapcoords(swap);
      }
      else
      {
          GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
          /* do PME only */
          walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(pmedata, cr, nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
+        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
      }
  
      wallcycle_stop(wcycle, ewcRUN);
@@ -1465,12 +1535,15 @@ int Mdrunner::mdrunner()
       * if rerunMD, don't write last frame again
       */
      finish_run(fplog, mdlog, cr,
-               inputrec, nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv : nullptr,
+               inputrec, &nrnb, wcycle, walltime_accounting,
+               fr ? fr->nbv.get() : nullptr,
                 pmedata,
                 EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
  
-    // Free PME data
+    // clean up cycle counter
+    wallcycle_destroy(wcycle);
+
+// Free PME data
      if (pmedata)
      {
          gmx_pme_destroy(pmedata);
@@ -1483,13 +1556,13 @@ int Mdrunner::mdrunner()
      // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
      mdAtoms.reset(nullptr);
      globalState.reset(nullptr);
-    mdModules.reset(nullptr);   // destruct force providers here as they might also use the GPU
+    mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
  
      /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
-    free_gpu_resources(fr, physicalNodeComm);
+    free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
      free_gpu(nonbondedDeviceInfo);
      free_gpu(pmeDeviceInfo);
-    done_forcerec(fr, mtop.molblock.size(), mtop.groups.grps[egcENER].nr);
+    done_forcerec(fr, mtop.molblock.size());
      sfree(fcd);
  
      if (doMembed)
@@ -1497,12 +1570,9 @@ int Mdrunner::mdrunner()
          free_membed(membed);
      }
  
-    gmx_hardware_info_free();
-
      /* Does what it says */
      print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
      walltime_accounting_destroy(walltime_accounting);
-    sfree(nrnb);
  
      // Ensure log file content is written
      if (logFileHandle)
@@ -1517,7 +1587,7 @@ int Mdrunner::mdrunner()
          gmx_fedisableexcept();
      }
  
-    rc = static_cast<int>(gmx_get_stop_condition());
+    auto rc = static_cast<int>(gmx_get_stop_condition());
  
  #if GMX_THREAD_MPI
      /* we need to join all threads. The sub-threads join when they
@@ -1525,11 +1595,11 @@ int Mdrunner::mdrunner()
         wait for that. */
      if (PAR(cr) && MASTER(cr))
      {
-        done_commrec(cr);
          tMPI_Finalize();
      }
+    //TODO free commrec in MPI simulations
+    done_commrec(cr);
  #endif
-
      return rc;
  }
  
@@ -1561,6 +1631,11 @@ void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
                                   std::move(name));
  }
  
+Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
+    : mdModules_(std::move(mdModules))
+{
+}
+
  Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
  
  //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
@@ -1570,11 +1645,13 @@ class Mdrunner::BuilderImplementation
  {
      public:
          BuilderImplementation() = delete;
-        explicit BuilderImplementation(SimulationContext* context);
+        BuilderImplementation(std::unique_ptr<MDModules> mdModules,
+                              SimulationContext        * context);
          ~BuilderImplementation();
  
          BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
-                                                    real                forceWarningThreshold);
+                                                    real                forceWarningThreshold,
+                                                    StartingBehavior    startingBehavior);
  
          void addDomdec(const DomdecOptions &options);
  
@@ -1603,6 +1680,7 @@ class Mdrunner::BuilderImplementation
          Mdrunner build();
  
      private:
+
          // Default parameters copied from runner.h
          // \todo Clarify source(s) of default parameters.
  
@@ -1626,6 +1704,12 @@ class Mdrunner::BuilderImplementation
          //! Print a warning if any force is larger than this (in kJ/mol nm).
          real forceWarningThreshold_ = -1;
  
+        //! Whether the simulation will start afresh, or restart with/without appending.
+        StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
+
+        //! The modules that comprise the functionality of mdrun.
+        std::unique_ptr<MDModules> mdModules_;
+
          /*! \brief  Non-owning pointer to SimulationContext (owned and managed by client)
           *
           * \internal
@@ -1665,7 +1749,9 @@ class Mdrunner::BuilderImplementation
          std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
  };
  
-Mdrunner::BuilderImplementation::BuilderImplementation(SimulationContext* context) :
+Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
+                                                       SimulationContext        * context) :
+    mdModules_(std::move(mdModules)),
      context_(context)
  {
      GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context.");
@@ -1674,11 +1760,13 @@ Mdrunner::BuilderImplementation::BuilderImplementation(SimulationContext* contex
  Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
  
  Mdrunner::BuilderImplementation &
-Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions &options,
-                                                      real                forceWarningThreshold)
+Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
+                                                      const real             forceWarningThreshold,
+                                                      const StartingBehavior startingBehavior)
  {
      mdrunOptions_          = options;
      forceWarningThreshold_ = forceWarningThreshold;
+    startingBehavior_      = startingBehavior;
      return *this;
  }
  
@@ -1699,17 +1787,19 @@ void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangePa
  
  void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
  {
-    multisim_ = compat::make_unique<gmx_multisim_t*>(multisim);
+    multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
  }
  
  Mdrunner Mdrunner::BuilderImplementation::build()
  {
-    auto newRunner = Mdrunner();
+    auto newRunner = Mdrunner(std::move(mdModules_));
  
      GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context.");
  
-    newRunner.mdrunOptions    = mdrunOptions_;
-    newRunner.domdecOptions   = domdecOptions_;
+    newRunner.mdrunOptions          = mdrunOptions_;
+    newRunner.pforce                = forceWarningThreshold_;
+    newRunner.startingBehavior      = startingBehavior_;
+    newRunner.domdecOptions         = domdecOptions_;
  
      // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
      newRunner.hw_opt          = hardwareOptions_;
@@ -1776,7 +1866,7 @@ Mdrunner Mdrunner::BuilderImplementation::build()
          GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
      }
  
-    newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>();
+    newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
  
      if (stopHandlerBuilder_)
      {
@@ -1784,7 +1874,7 @@ Mdrunner Mdrunner::BuilderImplementation::build()
      }
      else
      {
-        newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>();
+        newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
      }
  
      return newRunner;
@@ -1832,17 +1922,19 @@ void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<Stop
      stopHandlerBuilder_ = std::move(builder);
  }
  
-MdrunnerBuilder::MdrunnerBuilder(compat::not_null<SimulationContext*> context) :
-    impl_ {gmx::compat::make_unique<Mdrunner::BuilderImplementation>(context)}
+MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
+                                 compat::not_null<SimulationContext*> context) :
+    impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context)}
  {
  }
  
  MdrunnerBuilder::~MdrunnerBuilder() = default;
  
-MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions &options,
-                                                      real                forceWarningThreshold)
+MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
+                                                      real                   forceWarningThreshold,
+                                                      const StartingBehavior startingBehavior)
  {
-    impl_->setExtraMdrunOptions(options, forceWarningThreshold);
+    impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
      return *this;
  }