Merge branch release-5-1

[alexxy/gromacs.git] / src / programs / mdrun / runner.cpp
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index ff2cd0c16243ee48be858880665e15ea6a1f14d0..7d41f71a4fcd5451a6bf6bc1161fec2d314c0aa7 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -34,9 +34,17 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
-
+/*! \internal \file
+ *
+ * \brief Implements the MD runner routine calling all integrators.
+ *
+ * \author David van der Spoel <david.vanderspoel@icm.uu.se>
+ * \ingroup module_mdlib
+ */
  #include "gmxpre.h"
  
+#include "runner.h"
+
  #include "config.h"
  
  #include <assert.h>
@@ -46,49 +54,61 @@
  
  #include <algorithm>
  
+#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
+#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/essentialdynamics/edsam.h"
  #include "gromacs/ewald/pme.h"
+#include "gromacs/fileio/checkpoint.h"
+#include "gromacs/fileio/oenv.h"
  #include "gromacs/fileio/tpxio.h"
-#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
-#include "gromacs/legacyheaders/checkpoint.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/disre.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_detect_hardware.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/gmx_thread_affinity.h"
-#include "gromacs/legacyheaders/inputrec.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/oenv.h"
-#include "gromacs/legacyheaders/orires.h"
-#include "gromacs/legacyheaders/qmmm.h"
-#include "gromacs/legacyheaders/sighandler.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/gmxlib/md_logging.h"
+#include "gromacs/gmxlib/network.h"
+#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
+#include "gromacs/listed-forces/disre.h"
+#include "gromacs/listed-forces/orires.h"
  #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
+#include "gromacs/math/functions.h"
+#include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/mdlib/calc_verletbuf.h"
+#include "gromacs/mdlib/constr.h"
+#include "gromacs/mdlib/force.h"
+#include "gromacs/mdlib/forcerec.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/integrator.h"
+#include "gromacs/mdlib/main.h"
+#include "gromacs/mdlib/md_support.h"
+#include "gromacs/mdlib/mdatoms.h"
+#include "gromacs/mdlib/mdrun.h"
+#include "gromacs/mdlib/minimize.h"
  #include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/mdlib/qmmm.h"
+#include "gromacs/mdlib/sighandler.h"
+#include "gromacs/mdlib/sim_util.h"
+#include "gromacs/mdlib/tpi.h"
+#include "gromacs/mdrunutility/threadaffinity.h"
+#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/state.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/pull.h"
  #include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/swap/swapcoords.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/topology/mtop_util.h"
+#include "gromacs/trajectory/trajectoryframe.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/pleasecite.h"
  #include "gromacs/utility/smalloc.h"
  
  #include "deform.h"
-#include "membed.h"
+#include "md.h"
  #include "repl_ex.h"
  #include "resource-division.h"
  
@@ -96,19 +116,14 @@
  #include "corewrap.h"
  #endif
  
-typedef struct {
-    gmx_integrator_t *func;
-} gmx_intp_t;
-
-/* The array should match the eI array in include/types/enums.h */
-const gmx_intp_t    integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};
-
+//! First step used in pressure scaling
  gmx_int64_t         deform_init_init_step_tpx;
+//! Initial box for pressure scaling
  matrix              deform_init_box_tpx;
+//! MPI variable for use in pressure scaling
  tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  
-
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
  /* The minimum number of atoms per tMPI thread. With fewer atoms than this,
   * the number of threads will get lowered.
   */
@@ -117,38 +132,38 @@ tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  
  struct mdrunner_arglist
  {
-    gmx_hw_opt_t    hw_opt;
-    FILE           *fplog;
-    t_commrec      *cr;
-    int             nfile;
-    const t_filenm *fnm;
-    output_env_t    oenv;
-    gmx_bool        bVerbose;
-    gmx_bool        bCompact;
-    int             nstglobalcomm;
-    ivec            ddxyz;
-    int             dd_node_order;
-    real            rdd;
-    real            rconstr;
-    const char     *dddlb_opt;
-    real            dlb_scale;
-    const char     *ddcsx;
-    const char     *ddcsy;
-    const char     *ddcsz;
-    const char     *nbpu_opt;
-    int             nstlist_cmdline;
-    gmx_int64_t     nsteps_cmdline;
-    int             nstepout;
-    int             resetstep;
-    int             nmultisim;
-    int             repl_ex_nst;
-    int             repl_ex_nex;
-    int             repl_ex_seed;
-    real            pforce;
-    real            cpt_period;
-    real            max_hours;
-    int             imdport;
-    unsigned long   Flags;
+    gmx_hw_opt_t            hw_opt;
+    FILE                   *fplog;
+    t_commrec              *cr;
+    int                     nfile;
+    const t_filenm         *fnm;
+    const gmx_output_env_t *oenv;
+    gmx_bool                bVerbose;
+    int                     nstglobalcomm;
+    ivec                    ddxyz;
+    int                     dd_rank_order;
+    int                     npme;
+    real                    rdd;
+    real                    rconstr;
+    const char             *dddlb_opt;
+    real                    dlb_scale;
+    const char             *ddcsx;
+    const char             *ddcsy;
+    const char             *ddcsz;
+    const char             *nbpu_opt;
+    int                     nstlist_cmdline;
+    gmx_int64_t             nsteps_cmdline;
+    int                     nstepout;
+    int                     resetstep;
+    int                     nmultisim;
+    int                     repl_ex_nst;
+    int                     repl_ex_nex;
+    int                     repl_ex_seed;
+    real                    pforce;
+    real                    cpt_period;
+    real                    max_hours;
+    int                     imdport;
+    unsigned long           Flags;
  };
  
  
@@ -157,44 +172,50 @@ struct mdrunner_arglist
     a commrec. */
  static void mdrunner_start_fn(void *arg)
  {
-    struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
-    struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
-                                            that it's thread-local. This doesn't
-                                            copy pointed-to items, of course,
-                                            but those are all const. */
-    t_commrec *cr;                       /* we need a local version of this */
-    FILE      *fplog = NULL;
-    t_filenm  *fnm;
+    try
+    {
+        struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
+        struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
+                                                that it's thread-local. This doesn't
+                                                copy pointed-to items, of course,
+                                                but those are all const. */
+        t_commrec *cr;                       /* we need a local version of this */
+        FILE      *fplog = NULL;
+        t_filenm  *fnm;
  
-    fnm = dup_tfn(mc.nfile, mc.fnm);
+        fnm = dup_tfn(mc.nfile, mc.fnm);
  
-    cr = reinitialize_commrec_for_this_thread(mc.cr);
+        cr = reinitialize_commrec_for_this_thread(mc.cr);
  
-    if (MASTER(cr))
-    {
-        fplog = mc.fplog;
-    }
+        if (MASTER(cr))
+        {
+            fplog = mc.fplog;
+        }
  
-    mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
-             mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
-             mc.ddxyz, mc.dd_node_order, mc.rdd,
-             mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
-             mc.ddcsx, mc.ddcsy, mc.ddcsz,
-             mc.nbpu_opt, mc.nstlist_cmdline,
-             mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
-             mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
-             mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags);
+        gmx::mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
+                      mc.bVerbose, mc.nstglobalcomm,
+                      mc.ddxyz, mc.dd_rank_order, mc.npme, mc.rdd,
+                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
+                      mc.ddcsx, mc.ddcsy, mc.ddcsz,
+                      mc.nbpu_opt, mc.nstlist_cmdline,
+                      mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
+                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
+                      mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  }
  
+
  /* called by mdrunner() to start a specific number of threads (including
     the main thread) for thread-parallel runs. This in turn calls mdrunner()
     for each thread.
     All options besides nthreads are the same as for mdrunner(). */
  static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
                                           FILE *fplog, t_commrec *cr, int nfile,
-                                         const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-                                         gmx_bool bCompact, int nstglobalcomm,
-                                         ivec ddxyz, int dd_node_order, real rdd, real rconstr,
+                                         const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose,
+                                         int nstglobalcomm,
+                                         ivec ddxyz, int dd_rank_order, int npme,
+                                         real rdd, real rconstr,
                                           const char *dddlb_opt, real dlb_scale,
                                           const char *ddcsx, const char *ddcsy, const char *ddcsz,
                                           const char *nbpu_opt, int nstlist_cmdline,
@@ -228,12 +249,12 @@ static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
      mda->fnm             = fnmn;
      mda->oenv            = oenv;
      mda->bVerbose        = bVerbose;
-    mda->bCompact        = bCompact;
      mda->nstglobalcomm   = nstglobalcomm;
      mda->ddxyz[XX]       = ddxyz[XX];
      mda->ddxyz[YY]       = ddxyz[YY];
      mda->ddxyz[ZZ]       = ddxyz[ZZ];
-    mda->dd_node_order   = dd_node_order;
+    mda->dd_rank_order   = dd_rank_order;
+    mda->npme            = npme;
      mda->rdd             = rdd;
      mda->rconstr         = rconstr;
      mda->dddlb_opt       = dddlb_opt;
@@ -271,12 +292,15 @@ static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
  #endif /* GMX_THREAD_MPI */
  
  
-/* We determine the extra cost of the non-bonded kernels compared to
+/*! \brief Cost of non-bonded kernels
+ *
+ * We determine the extra cost of the non-bonded kernels compared to
   * a reference nstlist value of 10 (which is the default in grompp).
   */
  static const int    nbnxnReferenceNstlist = 10;
-/* The values to try when switching  */
+//! The values to try when switching
  const int           nstlist_try[] = { 20, 25, 40 };
+//! Number of elements in the neighborsearch list trials.
  #define NNSTL  sizeof(nstlist_try)/sizeof(nstlist_try[0])
  /* Increase nstlist until the non-bonded cost increases more than listfac_ok,
   * but never more than listfac_max.
@@ -288,13 +312,17 @@ const int           nstlist_try[] = { 20, 25, 40 };
   * nstlist will not be increased enough to reach optimal performance.
   */
  /* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */
+//! Max OK performance ratio beween force calc and neighbor searching
  static const float  nbnxn_cpu_listfac_ok    = 1.05;
+//! Too high performance ratio beween force calc and neighbor searching
  static const float  nbnxn_cpu_listfac_max   = 1.09;
  /* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */
+//! Max OK performance ratio beween force calc and neighbor searching
  static const float  nbnxn_gpu_listfac_ok    = 1.20;
+//! Too high performance ratio beween force calc and neighbor searching
  static const float  nbnxn_gpu_listfac_max   = 1.30;
  
-/* Try to increase nstlist when using the Verlet cut-off scheme */
+/*! \brief Try to increase nstlist when using the Verlet cut-off scheme */
  static void increase_nstlist(FILE *fp, t_commrec *cr,
                               t_inputrec *ir, int nstlist_cmdline,
                               const gmx_mtop_t *mtop, matrix box,
@@ -314,7 +342,6 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
      const char            *box_err  = "Can not increase nstlist because the box is too small";
      const char            *dd_err   = "Can not increase nstlist because of domain decomposition limitations";
      char                   buf[STRLEN];
-    const float            oneThird = 1.0f / 3.0f;
  
      if (nstlist_cmdline <= 0)
      {
@@ -411,8 +438,8 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
      /* Determine the pair list size increase due to zero interactions */
      rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
                                                mtop->natoms/det(box));
-    rlist_ok  = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc;
-    rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc;
+    rlist_ok  = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_ok) - rlist_inc;
+    rlist_max = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_max) - rlist_inc;
      if (debug)
      {
          fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
@@ -432,7 +459,7 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
          calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
  
          /* Does rlist fit in the box? */
-        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));
+        bBox = (gmx::square(rlist_new) < max_cutoff2(ir->ePBC, box));
          bDD  = TRUE;
          if (bBox && DOMAINDECOMP(cr))
          {
@@ -499,10 +526,10 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
              fprintf(fp, "%s\n\n", buf);
          }
          ir->rlist     = rlist_new;
-        ir->rlistlong = rlist_new;
      }
  }
  
+/*! \brief Initialize variables for Verlet scheme simulation */
  static void prepare_verlet_scheme(FILE                           *fplog,
                                    t_commrec                      *cr,
                                    t_inputrec                     *ir,
@@ -537,7 +564,6 @@ static void prepare_verlet_scheme(FILE                           *fplog,
                          ls.cluster_size_i, ls.cluster_size_j);
              }
              ir->rlist     = rlist_new;
-            ir->rlistlong = rlist_new;
          }
      }
  
@@ -554,7 +580,10 @@ static void prepare_verlet_scheme(FILE                           *fplog,
      }
  }
  
-/* Override the value in inputrec with value passed on the command line (if any) */
+/*! \brief Override the nslist value in inputrec
+ *
+ * with value passed on the command line (if any)
+ */
  static void override_nsteps_cmdline(FILE            *fplog,
                                      gmx_int64_t      nsteps_cmdline,
                                      t_inputrec      *ir,
@@ -592,11 +621,51 @@ static void override_nsteps_cmdline(FILE            *fplog,
      /* Do nothing if nsteps_cmdline == -2 */
  }
  
+namespace gmx
+{
+
+//! \brief Return the correct integrator function.
+static integrator_t *my_integrator(unsigned int ei)
+{
+    switch (ei)
+    {
+        case eiMD:
+        case eiBD:
+        case eiSD1:
+        case eiVV:
+        case eiVVAK:
+            if (!EI_DYNAMICS(ei))
+            {
+                GMX_THROW(APIError("do_md integrator would be called for a non-dynamical integrator"));
+            }
+            return do_md;
+        case eiSteep:
+            return do_steep;
+        case eiCG:
+            return do_cg;
+        case eiNM:
+            return do_nm;
+        case eiLBFGS:
+            return do_lbfgs;
+        case eiTPI:
+        case eiTPIC:
+            if (!EI_TPI(ei))
+            {
+                GMX_THROW(APIError("do_tpi integrator would be called for a non-TPI integrator"));
+            }
+            return do_tpi;
+        case eiSD2_REMOVED:
+            GMX_THROW(NotImplementedError("SD2 integrator has been removed"));
+        default:
+            GMX_THROW(APIError("Non existing integrator selected"));
+    }
+}
+
  int mdrunner(gmx_hw_opt_t *hw_opt,
               FILE *fplog, t_commrec *cr, int nfile,
-             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-             gmx_bool bCompact, int nstglobalcomm,
-             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
+             const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose,
+             int nstglobalcomm,
+             ivec ddxyz, int dd_rank_order, int npme, real rdd, real rconstr,
               const char *dddlb_opt, real dlb_scale,
               const char *ddcsx, const char *ddcsy, const char *ddcsz,
               const char *nbpu_opt, int nstlist_cmdline,
@@ -623,17 +692,14 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      gmx_constr_t              constr;
      int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
      gmx_wallcycle_t           wcycle;
-    gmx_bool                  bReadEkin;
      gmx_walltime_accounting_t walltime_accounting = NULL;
      int                       rc;
      gmx_int64_t               reset_counters;
      gmx_edsam_t               ed           = NULL;
      int                       nthreads_pme = 1;
-    int                       nthreads_pp  = 1;
-    gmx_membed_t              membed       = NULL;
      gmx_hw_info_t            *hwinfo       = NULL;
      /* The master rank decides early on bUseGPU and broadcasts this later */
-    gmx_bool                  bUseGPU      = FALSE;
+    gmx_bool                  bUseGPU            = FALSE;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
@@ -671,7 +737,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      if (SIMMASTER(cr))
      {
          /* Read (nearly) all data required for the simulation */
-        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);
+        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, mtop);
  
          if (inputrec->cutoff_scheme == ecutsVERLET)
          {
@@ -718,34 +784,26 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                  gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
              }
  
-#ifdef GMX_TARGET_BGQ
+#if GMX_TARGET_BGQ
              md_print_warn(cr, fplog,
                            "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
                            "      BlueGene/Q. You will observe better performance from using the\n"
                            "      Verlet cut-off scheme.\n");
  #endif
          }
-
-        if (inputrec->eI == eiSD2)
-        {
-            md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n"
-                          "it is slower than integrator %s and is slightly less accurate\n"
-                          "with constraints. Use the %s integrator.",
-                          ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]);
-        }
      }
  
      /* Check and update the hardware options for internal consistency */
-    check_and_update_hw_opt_1(hw_opt, cr);
+    check_and_update_hw_opt_1(hw_opt, cr, npme);
  
      /* Early check for externally set process affinity. */
      gmx_check_thread_affinity_set(fplog, cr,
                                    hw_opt, hwinfo->nthreads_hw_avail, FALSE);
  
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
      if (SIMMASTER(cr))
      {
-        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
+        if (npme > 0 && hw_opt->nthreads_tmpi <= 0)
          {
              gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
          }
@@ -767,8 +825,8 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
              t_commrec *cr_old       = cr;
              /* now start the threads. */
              cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
-                                        oenv, bVerbose, bCompact, nstglobalcomm,
-                                        ddxyz, dd_node_order, rdd, rconstr,
+                                        oenv, bVerbose, nstglobalcomm,
+                                        ddxyz, dd_rank_order, npme, rdd, rconstr,
                                          dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
                                          nbpu_opt, nstlist_cmdline,
                                          nsteps_cmdline, nstepout, resetstep, nmultisim,
@@ -786,18 +844,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  #endif
      /* END OF CAUTION: cr is now reliable */
  
-    /* g_membed initialisation *
-     * Because we change the mtop, init_membed is called before the init_parallel *
-     * (in case we ever want to make it run in parallel) */
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "Initializing membed");
-        }
-        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
-    }
-
      if (PAR(cr))
      {
          /* now broadcast everything to the non-master nodes/threads: */
@@ -821,14 +867,14 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      /* A parallel command line option consistency check that we can
         only do after any threads have started. */
      if (!PAR(cr) &&
-        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
+        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || npme > 0))
      {
          gmx_fatal(FARGS,
                    "The -dd or -npme option request a parallel simulation, "
-#ifndef GMX_MPI
+#if !GMX_MPI
                    "but %s was compiled without threads or MPI enabled"
  #else
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
                    "but the number of MPI-threads (option -ntmpi) is not set or is 1"
  #else
                    "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
@@ -851,22 +897,22 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  
      if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
      {
-        if (cr->npmenodes > 0)
+        if (npme > 0)
          {
-            gmx_fatal_collective(FARGS, cr, NULL,
+            gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
                                   "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
          }
  
-        cr->npmenodes = 0;
+        npme = 0;
      }
  
-    if (bUseGPU && cr->npmenodes < 0)
+    if (bUseGPU && npme < 0)
      {
          /* With GPUs we don't automatically use PME-only ranks. PME ranks can
           * improve performance with many threads per GPU, since our OpenMP
           * scaling is bad, but it's difficult to automate the setup.
           */
-        cr->npmenodes = 0;
+        npme = 0;
      }
  
  #ifdef GMX_FAHCORE
@@ -891,7 +937,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),
                  state);
  
-    if (DEFORM(*inputrec))
+    if (inputrecDeform(inputrec))
      {
          /* Store the deform reference box before reading the checkpoint */
          if (SIMMASTER(cr))
@@ -914,23 +960,22 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
      }
  
-    if (opt2bSet("-cpi", nfile, fnm))
+    if (Flags & MD_STARTFROMCPT)
      {
          /* Check if checkpoint file exists before doing continuation.
           * This way we can use identical input options for the first and subsequent runs...
           */
-        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
-        {
-            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
-                            cr, ddxyz,
-                            inputrec, state, &bReadEkin,
-                            (Flags & MD_APPENDFILES),
-                            (Flags & MD_APPENDFILESSET));
+        gmx_bool bReadEkin;
  
-            if (bReadEkin)
-            {
-                Flags |= MD_READ_EKIN;
-            }
+        load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
+                        cr, ddxyz, &npme,
+                        inputrec, state, &bReadEkin,
+                        (Flags & MD_APPENDFILES),
+                        (Flags & MD_APPENDFILESSET));
+
+        if (bReadEkin)
+        {
+            Flags |= MD_READ_EKIN;
          }
      }
  
@@ -953,6 +998,9 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          gmx_bcast(sizeof(box), box, cr);
      }
  
+    // TODO This should move to do_md(), because it only makes sense
+    // with dynamical integrators, but there is no test coverage and
+    // it interacts with constraints, somehow.
      /* Essential dynamics */
      if (opt2bSet("-ei", nfile, fnm))
      {
@@ -963,17 +1011,14 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
                       inputrec->eI == eiNM))
      {
-        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
+        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, npme,
+                                           dd_rank_order,
+                                           rdd, rconstr,
                                             dddlb_opt, dlb_scale,
                                             ddcsx, ddcsy, ddcsz,
                                             mtop, inputrec,
                                             box, state->x,
                                             &ddbox, &npme_major, &npme_minor);
-
-        make_dd_communicators(fplog, cr, dd_node_order);
-
-        /* Set overallocation to avoid frequent reallocation of arrays */
-        set_over_alloc_dd(TRUE);
      }
      else
      {
@@ -1001,7 +1046,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  
      /* Initialize per-physical-node MPI process/thread ID and counters. */
      gmx_init_intranode_counters(cr);
-#ifdef GMX_MPI
+#if GMX_MPI
      if (MULTISIM(cr))
      {
          md_print_info(cr, fplog,
@@ -1011,7 +1056,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      }
      md_print_info(cr, fplog, "Using %d MPI %s\n",
                    cr->nnodes,
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
                    cr->nnodes == 1 ? "thread" : "threads"
  #else
                    cr->nnodes == 1 ? "process" : "processes"
@@ -1034,7 +1079,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                            inputrec->cutoff_scheme == ecutsVERLET);
  
  #ifndef NDEBUG
-    if (integrator[inputrec->eI].func != do_tpi &&
+    if (EI_TPI(inputrec->eI) &&
          inputrec->cutoff_scheme == ecutsVERLET)
      {
          gmx_feenableexcept();
@@ -1071,13 +1116,9 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
         PME: env variable should be read only on one node to make sure it is
         identical everywhere;
       */
-    /* TODO nthreads_pp is only used for pinning threads.
-     * This is a temporary solution until we have a hw topology library.
-     */
-    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
      nthreads_pme = gmx_omp_nthreads_get(emntPME);
  
-    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);
+    wcycle = wallcycle_init(fplog, resetstep, cr);
  
      if (PAR(cr))
      {
@@ -1097,20 +1138,14 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          fr          = mk_forcerec();
          fr->hwinfo  = hwinfo;
          fr->gpu_opt = &hw_opt->gpu_opt;
-        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
+        init_forcerec(fplog, fr, fcd, inputrec, mtop, cr, box,
                        opt2fn("-table", nfile, fnm),
-                      opt2fn("-tabletf", nfile, fnm),
                        opt2fn("-tablep", nfile, fnm),
                        opt2fn("-tableb", nfile, fnm),
                        nbpu_opt,
                        FALSE,
                        pforce);
  
-        /* version for PCA_NOT_READ_NODE (see md.c) */
-        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
-           "nofile","nofile","nofile","nofile",FALSE,pforce);
-         */
-
          /* Initialize QM-MM */
          if (fr->bQMMM)
          {
@@ -1217,7 +1252,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      }
  
  
-    if (integrator[inputrec->eI].func == do_md)
+    if (EI_DYNAMICS(inputrec->eI))
      {
          /* Turn on signal handling on all nodes */
          /*
@@ -1244,53 +1279,44 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          if (inputrec->bRot)
          {
              /* Initialize enforced rotation code */
-            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
+            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, state->box, mtop, oenv,
                       bVerbose, Flags);
          }
  
-        if (inputrec->eSwapCoords != eswapNO)
-        {
-            /* Initialize ion swapping code */
-            init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),
-                            mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);
-        }
-
          constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);
  
          if (DOMAINDECOMP(cr))
          {
              GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
+            /* This call is not included in init_domain_decomposition mainly
+             * because fr->cginfo_mb is set later.
+             */
              dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
                              Flags & MD_DDBONDCHECK, fr->cginfo_mb);
-
-            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);
-
-            setup_dd_grid(fplog, cr->dd);
          }
  
          /* Now do whatever the user wants us to do (how flexible...) */
-        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
-                                      oenv, bVerbose, bCompact,
-                                      nstglobalcomm,
-                                      vsite, constr,
-                                      nstepout, inputrec, mtop,
-                                      fcd, state,
-                                      mdatoms, nrnb, wcycle, ed, fr,
-                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
-                                      membed,
-                                      cpt_period, max_hours,
-                                      imdport,
-                                      Flags,
-                                      walltime_accounting);
+        my_integrator(inputrec->eI) (fplog, cr, nfile, fnm,
+                                     oenv, bVerbose,
+                                     nstglobalcomm,
+                                     vsite, constr,
+                                     nstepout, inputrec, mtop,
+                                     fcd, state,
+                                     mdatoms, nrnb, wcycle, ed, fr,
+                                     repl_ex_nst, repl_ex_nex, repl_ex_seed,
+                                     cpt_period, max_hours,
+                                     imdport,
+                                     Flags,
+                                     walltime_accounting);
  
-        if (inputrec->bPull)
+        if (inputrec->bRot)
          {
-            finish_pull(inputrec->pull_work);
+            finish_rot(inputrec->rot);
          }
  
-        if (inputrec->bRot)
+        if (inputrec->bPull)
          {
-            finish_rot(inputrec->rot);
+            finish_pull(inputrec->pull_work);
          }
  
      }
@@ -1316,11 +1342,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      /* Free GPU memory and context */
      free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);
  
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        sfree(membed);
-    }
-
      gmx_hardware_info_free(hwinfo);
  
      /* Does what it says */
@@ -1337,7 +1358,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  
      done_ed(&ed);
  
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
      /* we need to join all threads. The sub-threads join when they
         exit this function, but the master thread needs to be told to
         wait for that. */
@@ -1349,3 +1370,5 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  
      return rc;
  }
+
+} // namespace gmx