Merge branch release-2016

[alexxy/gromacs.git] / src / gromacs / domdec / domdec.cpp
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index 1d0d81eaf272c34d2775bdfe0e636e571549bc98..85df75f43d9599335fdb3692eaa9d6bb4404b4ee 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -70,6 +70,7 @@
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/mdatoms.h"
  #include "gromacs/mdlib/mdrun.h"
+#include "gromacs/mdlib/mdsetup.h"
  #include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_grid.h"
  #include "gromacs/mdlib/nsgrid.h"
@@ -91,6 +92,7 @@
  #include "gromacs/topology/block.h"
  #include "gromacs/topology/idef.h"
  #include "gromacs/topology/ifunc.h"
+#include "gromacs/topology/mtop_lookup.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/utility/basedefinitions.h"
@@ -102,6 +104,7 @@
  #include "gromacs/utility/qsort_threadsafe.h"
  #include "gromacs/utility/real.h"
  #include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
  
  #include "domdec_constraints.h"
  #include "domdec_internal.h"
@@ -250,7 +253,7 @@ int ddglatnr(const gmx_domdec_t *dd, int i)
  {
      int atnr;
  
-    if (dd == NULL)
+    if (dd == nullptr)
      {
          atnr = i + 1;
      }
@@ -271,16 +274,24 @@ t_block *dd_charge_groups_global(gmx_domdec_t *dd)
      return &dd->comm->cgs_gl;
  }
  
-static bool dlbIsOn(const gmx_domdec_comm_t *comm)
+/*! \brief Returns true if the DLB state indicates that the balancer is on. */
+static bool isDlbOn(const gmx_domdec_comm_t *comm)
  {
      return (comm->dlbState == edlbsOnCanTurnOff ||
-            comm->dlbState == edlbsOnForever);
+            comm->dlbState == edlbsOnUser);
+}
+/*! \brief Returns true if the DLB state indicates that the balancer is off/disabled.
+ */
+static bool isDlbDisabled(const gmx_domdec_comm_t *comm)
+{
+    return (comm->dlbState == edlbsOffUser ||
+            comm->dlbState == edlbsOffForever);
  }
  
  static void vec_rvec_init(vec_rvec_t *v)
  {
      v->nalloc = 0;
-    v->v      = NULL;
+    v->v      = nullptr;
  }
  
  static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
@@ -301,13 +312,8 @@ void dd_store_state(gmx_domdec_t *dd, t_state *state)
          gmx_incons("The state does not the domain decomposition state");
      }
  
-    state->ncg_gl = dd->ncg_home;
-    if (state->ncg_gl > state->cg_gl_nalloc)
-    {
-        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
-        srenew(state->cg_gl, state->cg_gl_nalloc);
-    }
-    for (i = 0; i < state->ncg_gl; i++)
+    state->cg_gl.resize(dd->ncg_home);
+    for (i = 0; i < dd->ncg_home; i++)
      {
          state->cg_gl[i] = dd->index_gl[i];
      }
@@ -355,7 +361,7 @@ void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
          dim         = dd->dim[d];
          shift0[dim] = zones->izone[izone].shift0[dim];
          shift1[dim] = zones->izone[izone].shift1[dim];
-        if (dd->comm->tric_dir[dim] || (dlbIsOn(dd->comm) && d > 0))
+        if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
          {
              /* A conservative approach, this can be optimized */
              shift0[dim] -= 1;
@@ -364,6 +370,15 @@ void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
      }
  }
  
+int dd_natoms_mdatoms(const gmx_domdec_t *dd)
+{
+    /* We currently set mdatoms entries for all atoms:
+     * local + non-local + communicated for vsite + constraints
+     */
+
+    return dd->comm->nat[ddnatNR - 1];
+}
+
  int dd_natoms_vsite(const gmx_domdec_t *dd)
  {
      return dd->comm->nat[ddnatVSITE];
@@ -511,7 +526,7 @@ void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
             consider PBC in the treatment of fshift */
          bShiftForcesNeedPbc   = (dd->ci[dd->dim[d]] == 0);
          bScrew                = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
-        if (fshift == NULL && !bScrew)
+        if (fshift == nullptr && !bScrew)
          {
              bShiftForcesNeedPbc = FALSE;
          }
@@ -1050,8 +1065,8 @@ static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
  static void dd_collect_cg(gmx_domdec_t *dd,
                            t_state      *state_local)
  {
-    gmx_domdec_master_t *ma = NULL;
-    int                  buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
+    gmx_domdec_master_t *ma = nullptr;
+    int                  buf2[2], *ibuf, i, ncg_home = 0, *cg = nullptr, nat_home = 0;
  
      if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
      {
@@ -1075,8 +1090,8 @@ static void dd_collect_cg(gmx_domdec_t *dd,
  
          cgs_gl = &dd->comm->cgs_gl;
  
-        ncg_home = state_local->ncg_gl;
-        cg       = state_local->cg_gl;
+        ncg_home = state_local->cg_gl.size();
+        cg       = state_local->cg_gl.data();
          nat_home = 0;
          for (i = 0; i < ncg_home; i++)
          {
@@ -1097,7 +1112,7 @@ static void dd_collect_cg(gmx_domdec_t *dd,
      }
      else
      {
-        ibuf = NULL;
+        ibuf = nullptr;
      }
      /* Collect the charge group and atom counts on the master */
      dd_gather(dd, 2*sizeof(int), buf2, ibuf);
@@ -1132,19 +1147,19 @@ static void dd_collect_cg(gmx_domdec_t *dd,
      /* Collect the charge group indices on the master */
      dd_gatherv(dd,
                 ncg_home*sizeof(int), cg,
-               DDMASTER(dd) ? ma->ibuf : NULL,
-               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
-               DDMASTER(dd) ? ma->cg : NULL);
+               DDMASTER(dd) ? ma->ibuf : nullptr,
+               DDMASTER(dd) ? ma->ibuf+dd->nnodes : nullptr,
+               DDMASTER(dd) ? ma->cg : nullptr);
  
      dd->comm->master_cg_ddp_count = state_local->ddp_count;
  }
  
  static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
-                                    rvec *lv, rvec *v)
+                                    const rvec *lv, rvec *v)
  {
      gmx_domdec_master_t *ma;
      int                  n, i, c, a, nalloc = 0;
-    rvec                *buf = NULL;
+    rvec                *buf = nullptr;
      t_block             *cgs_gl;
  
      ma = dd->ma;
@@ -1152,8 +1167,8 @@ static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
      if (!DDMASTER(dd))
      {
  #if GMX_MPI
-        MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
-                 dd->rank, dd->mpi_comm_all);
+        MPI_Send(const_cast<void *>(static_cast<const void *>(lv)), dd->nat_home*sizeof(rvec), MPI_BYTE,
+                 DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
  #endif
      }
      else
@@ -1217,12 +1232,12 @@ static void get_commbuffer_counts(gmx_domdec_t *dd,
  }
  
  static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
-                                   rvec *lv, rvec *v)
+                                   const rvec *lv, rvec *v)
  {
      gmx_domdec_master_t *ma;
-    int                 *rcounts = NULL, *disps = NULL;
+    int                 *rcounts = nullptr, *disps = nullptr;
      int                  n, i, c, a;
-    rvec                *buf = NULL;
+    rvec                *buf = nullptr;
      t_block             *cgs_gl;
  
      ma = dd->ma;
@@ -1254,11 +1269,15 @@ static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
      }
  }
  
-void dd_collect_vec(gmx_domdec_t *dd,
-                    t_state *state_local, rvec *lv, rvec *v)
+void dd_collect_vec(gmx_domdec_t           *dd,
+                    t_state                *state_local,
+                    const PaddedRVecVector *localVector,
+                    rvec                   *v)
  {
      dd_collect_cg(dd, state_local);
  
+    const rvec *lv = as_rvec_array(localVector->data());
+
      if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
      {
          dd_collect_vec_sendrecv(dd, lv, v);
@@ -1269,17 +1288,23 @@ void dd_collect_vec(gmx_domdec_t *dd,
      }
  }
  
+void dd_collect_vec(gmx_domdec_t           *dd,
+                    t_state                *state_local,
+                    const PaddedRVecVector *localVector,
+                    PaddedRVecVector       *vector)
+{
+    dd_collect_vec(dd, state_local, localVector, as_rvec_array(vector->data()));
+}
+
  
  void dd_collect_state(gmx_domdec_t *dd,
                        t_state *state_local, t_state *state)
  {
-    int est, i, j, nh;
-
-    nh = state->nhchainlength;
+    int nh = state->nhchainlength;
  
      if (DDMASTER(dd))
      {
-        for (i = 0; i < efptNR; i++)
+        for (int i = 0; i < efptNR; i++)
          {
              state->lambda[i] = state_local->lambda[i];
          }
@@ -1292,124 +1317,81 @@ void dd_collect_state(gmx_domdec_t *dd,
          copy_mat(state_local->fvir_prev, state->fvir_prev);
          copy_mat(state_local->pres_prev, state->pres_prev);
  
-        for (i = 0; i < state_local->ngtc; i++)
+        for (int i = 0; i < state_local->ngtc; i++)
          {
-            for (j = 0; j < nh; j++)
+            for (int j = 0; j < nh; j++)
              {
                  state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
                  state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
              }
              state->therm_integral[i] = state_local->therm_integral[i];
          }
-        for (i = 0; i < state_local->nnhpres; i++)
+        for (int i = 0; i < state_local->nnhpres; i++)
          {
-            for (j = 0; j < nh; j++)
+            for (int j = 0; j < nh; j++)
              {
                  state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
                  state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
              }
          }
+        state->baros_integral = state_local->baros_integral;
      }
-    for (est = 0; est < estNR; est++)
+    if (state_local->flags & (1 << estX))
      {
-        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
-        {
-            switch (est)
-            {
-                case estX:
-                    dd_collect_vec(dd, state_local, state_local->x, state->x);
-                    break;
-                case estV:
-                    dd_collect_vec(dd, state_local, state_local->v, state->v);
-                    break;
-                case est_SDX_NOTSUPPORTED:
-                    break;
-                case estCGP:
-                    dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
-                    break;
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in dd_collect_state");
-            }
-        }
+        dd_collect_vec(dd, state_local, &state_local->x, &state->x);
+    }
+    if (state_local->flags & (1 << estV))
+    {
+        dd_collect_vec(dd, state_local, &state_local->v, &state->v);
+    }
+    if (state_local->flags & (1 << estCGP))
+    {
+        dd_collect_vec(dd, state_local, &state_local->cg_p, &state->cg_p);
      }
  }
  
-static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
+static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
  {
-    int est;
-
      if (debug)
      {
-        fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
+        fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
      }
  
-    state->nalloc = over_alloc_dd(nalloc);
+    state_change_natoms(state, natoms);
  
-    for (est = 0; est < estNR; est++)
+    if (f != nullptr)
      {
-        if (EST_DISTR(est) && (state->flags & (1<<est)))
-        {
-            /* We need to allocate one element extra, since we might use
-             * (unaligned) 4-wide SIMD loads to access rvec entries.
-             */
-            switch (est)
-            {
-                case estX:
-                    srenew(state->x, state->nalloc + 1);
-                    break;
-                case estV:
-                    srenew(state->v, state->nalloc + 1);
-                    break;
-                case est_SDX_NOTSUPPORTED:
-                    break;
-                case estCGP:
-                    srenew(state->cg_p, state->nalloc + 1);
-                    break;
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    /* No reallocation required */
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in dd_realloc_state");
-            }
-        }
-    }
-
-    if (f != NULL)
-    {
-        srenew(*f, state->nalloc);
+        /* We need to allocate one element extra, since we might use
+         * (unaligned) 4-wide SIMD loads to access rvec entries.
+         */
+        f->resize(natoms + 1);
      }
  }
  
-static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
-                               int nalloc)
+static void dd_check_alloc_ncg(t_forcerec       *fr,
+                               t_state          *state,
+                               PaddedRVecVector *f,
+                               int               numChargeGroups)
  {
-    if (nalloc > fr->cg_nalloc)
+    if (numChargeGroups > fr->cg_nalloc)
      {
          if (debug)
          {
-            fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
+            fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
          }
-        fr->cg_nalloc = over_alloc_dd(nalloc);
+        fr->cg_nalloc = over_alloc_dd(numChargeGroups);
          srenew(fr->cginfo, fr->cg_nalloc);
          if (fr->cutoff_scheme == ecutsGROUP)
          {
              srenew(fr->cg_cm, fr->cg_nalloc);
          }
      }
-    if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
+    if (fr->cutoff_scheme == ecutsVERLET)
      {
          /* We don't use charge groups, we use x in state to set up
           * the atom communication.
           */
-        dd_realloc_state(state, f, nalloc);
+        dd_resize_state(state, f, numChargeGroups);
      }
  }
  
@@ -1418,7 +1400,7 @@ static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
  {
      gmx_domdec_master_t *ma;
      int                  n, i, c, a, nalloc = 0;
-    rvec                *buf = NULL;
+    rvec                *buf = nullptr;
  
      if (DDMASTER(dd))
      {
@@ -1478,9 +1460,9 @@ static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
                                         rvec *v, rvec *lv)
  {
      gmx_domdec_master_t *ma;
-    int                 *scounts = NULL, *disps = NULL;
+    int                 *scounts = nullptr, *disps = nullptr;
      int                  n, i, c, a;
-    rvec                *buf = NULL;
+    rvec                *buf = nullptr;
  
      if (DDMASTER(dd))
      {
@@ -1519,7 +1501,11 @@ static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
  
  static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
  {
-    int i;
+    if (dfhist == nullptr)
+    {
+        return;
+    }
+
      dd_bcast(dd, sizeof(int), &dfhist->bEquil);
      dd_bcast(dd, sizeof(int), &dfhist->nlambda);
      dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
@@ -1534,7 +1520,7 @@ static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
          dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
          dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
  
-        for (i = 0; i < nlam; i++)
+        for (int i = 0; i < nlam; i++)
          {
              dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
              dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
@@ -1548,15 +1534,13 @@ static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
  
  static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
                                  t_state *state, t_state *state_local,
-                                rvec **f)
+                                PaddedRVecVector *f)
  {
-    int  i, j, nh;
-
-    nh = state->nhchainlength;
+    int nh = state->nhchainlength;
  
      if (DDMASTER(dd))
      {
-        for (i = 0; i < efptNR; i++)
+        for (int i = 0; i < efptNR; i++)
          {
              state_local->lambda[i] = state->lambda[i];
          }
@@ -1568,26 +1552,30 @@ static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
          copy_mat(state->boxv, state_local->boxv);
          copy_mat(state->svir_prev, state_local->svir_prev);
          copy_mat(state->fvir_prev, state_local->fvir_prev);
-        copy_df_history(&state_local->dfhist, &state->dfhist);
-        for (i = 0; i < state_local->ngtc; i++)
+        if (state->dfhist != nullptr)
+        {
+            copy_df_history(state_local->dfhist, state->dfhist);
+        }
+        for (int i = 0; i < state_local->ngtc; i++)
          {
-            for (j = 0; j < nh; j++)
+            for (int j = 0; j < nh; j++)
              {
                  state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
                  state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
              }
              state_local->therm_integral[i] = state->therm_integral[i];
          }
-        for (i = 0; i < state_local->nnhpres; i++)
+        for (int i = 0; i < state_local->nnhpres; i++)
          {
-            for (j = 0; j < nh; j++)
+            for (int j = 0; j < nh; j++)
              {
                  state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
                  state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
              }
          }
+        state_local->baros_integral = state->baros_integral;
      }
-    dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
+    dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
      dd_bcast(dd, sizeof(int), &state_local->fep_state);
      dd_bcast(dd, sizeof(real), &state_local->veta);
      dd_bcast(dd, sizeof(real), &state_local->vol0);
@@ -1596,46 +1584,28 @@ static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
      dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
      dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
      dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
-    dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
-    dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
-    dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
-    dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
-    dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
+    dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
+    dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
+    dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
+    dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
+    dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
  
      /* communicate df_history -- required for restarting from checkpoint */
-    dd_distribute_dfhist(dd, &state_local->dfhist);
+    dd_distribute_dfhist(dd, state_local->dfhist);
  
-    if (dd->nat_home > state_local->nalloc)
+    dd_resize_state(state_local, f, dd->nat_home);
+
+    if (state_local->flags & (1 << estX))
      {
-        dd_realloc_state(state_local, f, dd->nat_home);
+        dd_distribute_vec(dd, cgs, as_rvec_array(state->x.data()), as_rvec_array(state_local->x.data()));
      }
-    for (i = 0; i < estNR; i++)
+    if (state_local->flags & (1 << estV))
      {
-        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
-        {
-            switch (i)
-            {
-                case estX:
-                    dd_distribute_vec(dd, cgs, state->x, state_local->x);
-                    break;
-                case estV:
-                    dd_distribute_vec(dd, cgs, state->v, state_local->v);
-                    break;
-                case est_SDX_NOTSUPPORTED:
-                    break;
-                case estCGP:
-                    dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
-                    break;
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    /* Not implemented yet */
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in dd_distribute_state");
-            }
-        }
+        dd_distribute_vec(dd, cgs, as_rvec_array(state->v.data()), as_rvec_array(state_local->v.data()));
+    }
+    if (state_local->flags & (1 << estCGP))
+    {
+        dd_distribute_vec(dd, cgs, as_rvec_array(state->cg_p.data()), as_rvec_array(state_local->cg_p.data()));
      }
  }
  
@@ -1657,7 +1627,7 @@ static char dim2char(int dim)
  static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
                                gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
  {
-    rvec   grid_s[2], *grid_r = NULL, cx, r;
+    rvec   grid_s[2], *grid_r = nullptr, cx, r;
      char   fname[STRLEN], buf[22];
      FILE  *out;
      int    a, i, d, z, y, x;
@@ -1672,7 +1642,7 @@ static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
          snew(grid_r, 2*dd->nnodes);
      }
  
-    dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
+    dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
  
      if (DDMASTER(dd))
      {
@@ -1749,7 +1719,7 @@ void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
      char          fname[STRLEN], buf[22];
      FILE         *out;
      int           i, ii, resnr, c;
-    char         *atomname, *resname;
+    const char   *atomname, *resname;
      real          b;
      gmx_domdec_t *dd;
  
@@ -1765,10 +1735,11 @@ void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
  
      fprintf(out, "TITLE     %s\n", title);
      gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
+    int molb = 0;
      for (i = 0; i < natoms; i++)
      {
          ii = dd->gatindex[i];
-        gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
+        mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
          if (i < dd->comm->nat[ddnatZONE])
          {
              c = 0;
@@ -1994,7 +1965,7 @@ static int dd_simnode2pmenode(const gmx_domdec_t         *dd,
          /* This assumes DD cells with identical x coordinates
           * are numbered sequentially.
           */
-        if (dd->comm->pmenodes == NULL)
+        if (dd->comm->pmenodes == nullptr)
          {
              if (sim_nodeid < dd->nnodes)
              {
@@ -2022,7 +1993,7 @@ static int dd_simnode2pmenode(const gmx_domdec_t         *dd,
  void get_pme_nnodes(const gmx_domdec_t *dd,
                      int *npmenodes_x, int *npmenodes_y)
  {
-    if (dd != NULL)
+    if (dd != nullptr)
      {
          *npmenodes_x = dd->comm->npmenodes_x;
          *npmenodes_y = dd->comm->npmenodes_y;
@@ -2151,25 +2122,26 @@ static void set_zones_ncg_home(gmx_domdec_t *dd)
  }
  
  static void rebuild_cgindex(gmx_domdec_t *dd,
-                            const int *gcgs_index, t_state *state)
+                            const int *gcgs_index, const t_state *state)
  {
-    int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
+    int * gmx_restrict dd_cg_gl = dd->index_gl;
+    int * gmx_restrict cgindex  = dd->cgindex;
+    int                nat      = 0;
  
-    ind        = state->cg_gl;
-    dd_cg_gl   = dd->index_gl;
-    cgindex    = dd->cgindex;
-    nat        = 0;
+    /* Copy back the global charge group indices from state
+     * and rebuild the local charge group to atom index.
+     */
      cgindex[0] = nat;
-    for (i = 0; i < state->ncg_gl; i++)
+    for (unsigned int i = 0; i < state->cg_gl.size(); i++)
      {
          cgindex[i]  = nat;
-        cg_gl       = ind[i];
+        int cg_gl   = state->cg_gl[i];
          dd_cg_gl[i] = cg_gl;
          nat        += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
      }
-    cgindex[i] = nat;
+    cgindex[state->cg_gl.size()] = nat;
  
-    dd->ncg_home = state->ncg_gl;
+    dd->ncg_home = state->cg_gl.size();
      dd->nat_home = nat;
  
      set_zones_ncg_home(dd);
@@ -2192,7 +2164,7 @@ static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
      int         *cginfo;
      int          cg;
  
-    if (fr != NULL)
+    if (fr != nullptr)
      {
          cginfo_mb = fr->cginfo_mb;
          cginfo    = fr->cginfo;
@@ -2203,7 +2175,7 @@ static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
          }
      }
  
-    if (bLocalCG != NULL)
+    if (bLocalCG != nullptr)
      {
          for (cg = cg0; cg < cg1; cg++)
          {
@@ -2286,7 +2258,7 @@ static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
      int i, ngl, nerr;
  
      nerr = 0;
-    if (bLocalCG == NULL)
+    if (bLocalCG == nullptr)
      {
          return nerr;
      }
@@ -2812,7 +2784,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
      {
          cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
          npulse[d]       = 1;
-        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
+        if (dd->nc[d] == 1 || comm->slb_frac[d] == nullptr)
          {
              /* Uniform grid */
              cell_dx = ddbox->box_size[d]/dd->nc[d];
@@ -2904,7 +2876,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
          }
      }
  
-    if (!dlbIsOn(comm))
+    if (!isDlbOn(comm))
      {
          copy_rvec(cellsize_min, comm->cellsize_min);
      }
@@ -2912,7 +2884,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
      for (d = 0; d < comm->npmedecompdim; d++)
      {
          set_pme_maxshift(dd, &comm->ddpme[d],
-                         comm->slb_frac[dd->dim[d]] == NULL, ddbox,
+                         comm->slb_frac[dd->dim[d]] == nullptr, ddbox,
                           comm->ddpme[d].slb_dim_f);
      }
  }
@@ -3479,7 +3451,7 @@ static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
              srenew(cd->ind, np);
              for (i = cd->np_nalloc; i < np; i++)
              {
-                cd->ind[i].index  = NULL;
+                cd->ind[i].index  = nullptr;
                  cd->ind[i].nalloc = 0;
              }
              cd->np_nalloc = np;
@@ -3504,7 +3476,7 @@ static void set_dd_cell_sizes(gmx_domdec_t *dd,
      copy_rvec(comm->cell_x0, comm->old_cell_x0);
      copy_rvec(comm->cell_x1, comm->old_cell_x1);
  
-    if (dlbIsOn(comm))
+    if (isDlbOn(comm))
      {
          if (DDMASTER(dd))
          {
@@ -3545,7 +3517,7 @@ static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
          /* Without PBC we don't have restrictions on the outer cells */
          if (!(dim >= ddbox->npbcdim &&
                (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
-            dlbIsOn(comm) &&
+            isDlbOn(comm) &&
              (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
              comm->cellsize_min[dim])
          {
@@ -3559,11 +3531,11 @@ static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
          }
      }
  
-    if ((dlbIsOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
+    if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
      {
          /* Communicate the boundaries and update cell_ns_x0/1 */
          dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
-        if (dlbIsOn(dd->comm) && dd->ndim > 1)
+        if (isDlbOn(dd->comm) && dd->ndim > 1)
          {
              check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
          }
@@ -3612,7 +3584,7 @@ static void distribute_cg(FILE *fplog,
                            gmx_domdec_t *dd)
  {
      gmx_domdec_master_t *ma;
-    int                **tmp_ind = NULL, *tmp_nalloc = NULL;
+    int                **tmp_ind = nullptr, *tmp_nalloc = nullptr;
      int                  i, icg, j, k, k0, k1, d;
      matrix               tcm;
      rvec                 cg_cm;
@@ -3790,7 +3762,7 @@ static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
                                  t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
                                  rvec pos[])
  {
-    gmx_domdec_master_t *ma = NULL;
+    gmx_domdec_master_t *ma = nullptr;
      ivec                 npulse;
      int                  i, cg_gl;
      int                 *ibuf, buf2[2] = { 0, 0 };
@@ -3817,7 +3789,7 @@ static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
      }
      else
      {
-        ibuf = NULL;
+        ibuf = nullptr;
      }
      dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
  
@@ -3841,9 +3813,9 @@ static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
      }
  
      dd_scatterv(dd,
-                bMaster ? ma->ibuf : NULL,
-                bMaster ? ma->ibuf+dd->nnodes : NULL,
-                bMaster ? ma->cg : NULL,
+                bMaster ? ma->ibuf : nullptr,
+                bMaster ? ma->ibuf+dd->nnodes : nullptr,
+                bMaster ? ma->cg : nullptr,
                  dd->ncg_home*sizeof(int), dd->index_gl);
  
      /* Determine the home charge group sizes */
@@ -4116,39 +4088,21 @@ static void cg_move_error(FILE *fplog,
  
  static void rotate_state_atom(t_state *state, int a)
  {
-    int est;
-
-    for (est = 0; est < estNR; est++)
+    if (state->flags & (1 << estX))
      {
-        if (EST_DISTR(est) && (state->flags & (1<<est)))
-        {
-            switch (est)
-            {
-                case estX:
-                    /* Rotate the complete state; for a rectangular box only */
-                    state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
-                    state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
-                    break;
-                case estV:
-                    state->v[a][YY] = -state->v[a][YY];
-                    state->v[a][ZZ] = -state->v[a][ZZ];
-                    break;
-                case est_SDX_NOTSUPPORTED:
-                    break;
-                case estCGP:
-                    state->cg_p[a][YY] = -state->cg_p[a][YY];
-                    state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
-                    break;
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    /* These are distances, so not affected by rotation */
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in rotate_state_atom");
-            }
-        }
+        /* Rotate the complete state; for a rectangular box only */
+        state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
+        state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
+    }
+    if (state->flags & (1 << estV))
+    {
+        state->v[a][YY] = -state->v[a][YY];
+        state->v[a][ZZ] = -state->v[a][ZZ];
+    }
+    if (state->flags & (1 << estCGP))
+    {
+        state->cg_p[a][YY] = -state->cg_p[a][YY];
+        state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
      }
  }
  
@@ -4232,7 +4186,7 @@ static void calc_cg_move(FILE *fplog, gmx_int64_t step,
                      if (pos_d >= limit1[d])
                      {
                          cg_move_error(fplog, dd, step, cg, d, 1,
-                                      cg_cm != state->x, limitd[d],
+                                      cg_cm != as_rvec_array(state->x.data()), limitd[d],
                                        cg_cm[cg], cm_new, pos_d);
                      }
                      dev[d] = 1;
@@ -4259,7 +4213,7 @@ static void calc_cg_move(FILE *fplog, gmx_int64_t step,
                      if (pos_d < limit0[d])
                      {
                          cg_move_error(fplog, dd, step, cg, d, -1,
-                                      cg_cm != state->x, limitd[d],
+                                      cg_cm != as_rvec_array(state->x.data()), limitd[d],
                                        cg_cm[cg], cm_new, pos_d);
                      }
                      dev[d] = -1;
@@ -4343,7 +4297,7 @@ static void calc_cg_move(FILE *fplog, gmx_int64_t step,
  
  static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                                 gmx_domdec_t *dd, ivec tric_dir,
-                               t_state *state, rvec **f,
+                               t_state *state, PaddedRVecVector *f,
                                 t_forcerec *fr,
                                 gmx_bool bCompact,
                                 t_nrnb *nrnb,
@@ -4352,16 +4306,15 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
  {
      int               *move;
      int                npbcdim;
-    int                ncg[DIM*2], nat[DIM*2];
-    int                c, i, cg, k, d, dim, dim2, dir, d2, d3;
+    int                ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
+    int                i, cg, k, d, dim, dim2, dir, d2, d3;
      int                mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
      int                sbuf[2], rbuf[2];
      int                home_pos_cg, home_pos_at, buf_pos;
      int                flag;
-    gmx_bool           bV = FALSE, bCGP = FALSE;
      real               pos_d;
      matrix             tcm;
-    rvec              *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1;
+    rvec              *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
      int               *cgindex;
      cginfo_mb_t       *cginfo_mb;
      gmx_domdec_comm_t *comm;
@@ -4379,29 +4332,9 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
          cg_cm = fr->cg_cm;
      }
  
-    for (i = 0; i < estNR; i++)
-    {
-        if (EST_DISTR(i))
-        {
-            switch (i)
-            {
-                case estX: /* Always present */ break;
-                case estV:   bV   = (state->flags & (1<<i)); break;
-                case est_SDX_NOTSUPPORTED: break;
-                case estCGP: bCGP = (state->flags & (1<<i)); break;
-                case estLD_RNG:
-                case estLD_RNGI:
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    /* No processing required */
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
-            }
-        }
-    }
+    // Positions are always present, so there's nothing to flag
+    bool bV   = state->flags & (1<<estV);
+    bool bCGP = state->flags & (1<<estCGP);
  
      if (dd->ncg_tot > comm->nalloc_int)
      {
@@ -4410,13 +4343,6 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
      }
      move = comm->buf_int;
  
-    /* Clear the count */
-    for (c = 0; c < dd->ndim*2; c++)
-    {
-        ncg[c] = 0;
-        nat[c] = 0;
-    }
-
      npbcdim = dd->npbcdim;
  
      for (d = 0; (d < DIM); d++)
@@ -4472,7 +4398,7 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                           cgindex,
                           ( thread   *dd->ncg_home)/nthread,
                           ((thread+1)*dd->ncg_home)/nthread,
-                         fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
+                         fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
                           move);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -4551,7 +4477,7 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
               */
              home_pos_cg =
                  compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
-                                        nvec, state->x, comm, FALSE);
+                                        nvec, as_rvec_array(state->x.data()), comm, FALSE);
              if (bCompact)
              {
                  home_pos_cg -= *ncg_moved;
@@ -4565,16 +4491,19 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
      vec         = 0;
      home_pos_at =
          compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
-                                nvec, vec++, state->x, comm, bCompact);
+                                nvec, vec++, as_rvec_array(state->x.data()),
+                                comm, bCompact);
      if (bV)
      {
          compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
-                                nvec, vec++, state->v, comm, bCompact);
+                                nvec, vec++, as_rvec_array(state->v.data()),
+                                comm, bCompact);
      }
      if (bCGP)
      {
          compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
-                                nvec, vec++, state->cg_p, comm, bCompact);
+                                nvec, vec++, as_rvec_array(state->cg_p.data()),
+                                comm, bCompact);
      }
  
      if (bCompact)
@@ -4650,6 +4579,13 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
              nvr      += i;
          }
  
+        dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
+        if (fr->cutoff_scheme == ecutsGROUP)
+        {
+            /* Here we resize to more than necessary and shrink later */
+            dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
+        }
+
          /* Process the received charge groups */
          buf_pos = 0;
          for (cg = 0; cg < ncg_recv; cg++)
@@ -4681,7 +4617,7 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                  /* Check which direction this cg should go */
                  for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
                  {
-                    if (dlbIsOn(dd->comm))
+                    if (isDlbOn(dd->comm))
                      {
                          /* The cell boundaries for dimension d2 are not equal
                           * for each cell row of the lower dimension(s),
@@ -4762,7 +4698,6 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                  dd->index_gl[home_pos_cg]  = comm->buf_int[cg*DD_CGIBS];
                  dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
                  /* Copy the state from the buffer */
-                dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
                  if (fr->cutoff_scheme == ecutsGROUP)
                  {
                      cg_cm = fr->cg_cm;
@@ -4778,10 +4713,6 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                      comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
                  }
  
-                if (home_pos_at+nrcg > state->nalloc)
-                {
-                    dd_realloc_state(state, f, home_pos_at+nrcg);
-                }
                  for (i = 0; i < nrcg; i++)
                  {
                      copy_rvec(comm->vbuf.v[buf_pos++],
@@ -4850,6 +4781,12 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
      dd->ncg_home = home_pos_cg;
      dd->nat_home = home_pos_at;
  
+    if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
+    {
+        /* We overallocated before, we need to set the right size here */
+        dd_resize_state(state, f, dd->nat_home);
+    }
+
      if (debug)
      {
          fprintf(debug,
@@ -4892,7 +4829,7 @@ static double force_flop_count(t_nrnb *nrnb)
           * for the normal loops and again half it for water loops.
           */
          name = nrnb_str(i);
-        if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
+        if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
          {
              sum += nrnb->n[i]*0.25*cost_nrnb(i);
          }
@@ -4904,7 +4841,7 @@ static double force_flop_count(t_nrnb *nrnb)
      for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
      {
          name = nrnb_str(i);
-        if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
+        if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
          {
              sum += nrnb->n[i]*cost_nrnb(i);
          }
@@ -4951,7 +4888,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
  {
      gmx_domdec_comm_t *comm;
      domdec_load_t     *load;
-    domdec_root_t     *root = NULL;
+    domdec_root_t     *root = nullptr;
      int                d, dim, i, pos;
      float              cell_frac = 0, sbuf[DD_NLOAD_MAX];
      gmx_bool           bSepPME;
@@ -4982,7 +4919,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
              (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
          {
              load = &comm->load[d];
-            if (dlbIsOn(dd->comm))
+            if (isDlbOn(dd->comm))
              {
                  cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
              }
@@ -4991,7 +4928,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
              {
                  sbuf[pos++] = dd_force_load(comm);
                  sbuf[pos++] = sbuf[0];
-                if (dlbIsOn(dd->comm))
+                if (isDlbOn(dd->comm))
                  {
                      sbuf[pos++] = sbuf[0];
                      sbuf[pos++] = cell_frac;
@@ -5011,7 +4948,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
              {
                  sbuf[pos++] = comm->load[d+1].sum;
                  sbuf[pos++] = comm->load[d+1].max;
-                if (dlbIsOn(dd->comm))
+                if (isDlbOn(dd->comm))
                  {
                      sbuf[pos++] = comm->load[d+1].sum_m;
                      sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
@@ -5040,7 +4977,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
              if (dd->ci[dim] == dd->master_ci[dim])
              {
                  /* We are the root, process this row */
-                if (dlbIsOn(comm))
+                if (isDlbOn(comm))
                  {
                      root = comm->root[d];
                  }
@@ -5057,7 +4994,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
                      load->sum += load->load[pos++];
                      load->max  = std::max(load->max, load->load[pos]);
                      pos++;
-                    if (dlbIsOn(dd->comm))
+                    if (isDlbOn(dd->comm))
                      {
                          if (root->bLimited)
                          {
@@ -5091,7 +5028,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
                          pos++;
                      }
                  }
-                if (dlbIsOn(comm) && root->bLimited)
+                if (isDlbOn(comm) && root->bLimited)
                  {
                      load->sum_m *= dd->nc[dim];
                      load->flags |= (1<<d);
@@ -5106,7 +5043,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
          comm->load_step  += comm->cycl[ddCyclStep];
          comm->load_sum   += comm->load[0].sum;
          comm->load_max   += comm->load[0].max;
-        if (dlbIsOn(comm))
+        if (isDlbOn(comm))
          {
              for (d = 0; d < dd->ndim; d++)
              {
@@ -5131,6 +5068,21 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
      }
  }
  
+static float dd_force_load_fraction(gmx_domdec_t *dd)
+{
+    /* Return the relative performance loss on the total run time
+     * due to the force calculation load imbalance.
+     */
+    if (dd->comm->nload > 0 && dd->comm->load_step > 0)
+    {
+        return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
  static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
  {
      /* Return the relative performance loss on the total run time
@@ -5150,99 +5102,110 @@ static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
  
  static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
  {
-    char               buf[STRLEN];
-    int                npp, npme, nnodes, d, limp;
-    float              imbal, pme_f_ratio, lossf = 0, lossp = 0;
-    gmx_bool           bLim;
-    gmx_domdec_comm_t *comm;
+    gmx_domdec_comm_t *comm = dd->comm;
  
-    comm = dd->comm;
-    if (DDMASTER(dd) && comm->nload > 0)
-    {
-        npp    = dd->nnodes;
-        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
-        nnodes = npp + npme;
-        if (dd->nnodes > 1 && comm->load_sum > 0)
-        {
-            imbal  = comm->load_max*npp/comm->load_sum - 1;
-            lossf  = dd_force_imb_perf_loss(dd);
-            sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
-            fprintf(fplog, "%s", buf);
-            fprintf(stderr, "\n");
-            fprintf(stderr, "%s", buf);
-            sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
-            fprintf(fplog, "%s", buf);
-            fprintf(stderr, "%s", buf);
-        }
-        bLim = FALSE;
-        if (dlbIsOn(comm))
-        {
-            sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
-            for (d = 0; d < dd->ndim; d++)
+    /* Only the master rank prints loads and only if we measured loads */
+    if (!DDMASTER(dd) || comm->nload == 0)
+    {
+        return;
+    }
+
+    char  buf[STRLEN];
+    int   numPpRanks   = dd->nnodes;
+    int   numPmeRanks  = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
+    int   numRanks     = numPpRanks + numPmeRanks;
+    float lossFraction = 0;
+
+    /* Print the average load imbalance and performance loss */
+    if (dd->nnodes > 1 && comm->load_sum > 0)
+    {
+        float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
+        lossFraction    = dd_force_imb_perf_loss(dd);
+        fprintf(stderr, "\n");
+        sprintf(buf,
+                " Load balancing based on %d %% of the MD step time\n"
+                " Average load imbalance: %.1f %%\n"
+                " Part of the total run time spent waiting due to load imbalance: %.1f %%\n",
+                static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5),
+                imbalance*100,
+                lossFraction*100);
+        fprintf(fplog, "%s", buf);
+        fprintf(stderr, "%s", buf);
+    }
+
+    /* Print during what percentage of steps the  load balancing was limited */
+    bool dlbWasLimited = false;
+    if (isDlbOn(comm))
+    {
+        sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
+        for (int d = 0; d < dd->ndim; d++)
+        {
+            int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
+            sprintf(buf+strlen(buf), " %c %d %%",
+                    dim2char(dd->dim[d]), limitPercentage);
+            if (limitPercentage >= 50)
              {
-                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
-                sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
-                if (limp >= 50)
-                {
-                    bLim = TRUE;
-                }
+                dlbWasLimited = true;
              }
-            sprintf(buf+strlen(buf), "\n");
-            fprintf(fplog, "%s", buf);
-            fprintf(stderr, "%s", buf);
          }
-        if (npme > 0 && comm->load_mdf > 0 && comm->load_step > 0)
+        sprintf(buf + strlen(buf), "\n");
+        fprintf(fplog, "%s", buf);
+        fprintf(stderr, "%s", buf);
+    }
+
+    /* Print the performance loss due to separate PME - PP rank imbalance */
+    float lossFractionPme = 0;
+    if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
+    {
+        float pmeForceRatio = comm->load_pme/comm->load_mdf;
+        lossFractionPme     = (comm->load_pme - comm->load_mdf)/comm->load_step;
+        if (lossFractionPme <= 0)
          {
-            pme_f_ratio = comm->load_pme/comm->load_mdf;
-            lossp       = (comm->load_pme - comm->load_mdf)/comm->load_step;
-            if (lossp <= 0)
-            {
-                lossp *= (float)npme/(float)nnodes;
-            }
-            else
-            {
-                lossp *= (float)npp/(float)nnodes;
-            }
-            sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
-            fprintf(fplog, "%s", buf);
-            fprintf(stderr, "%s", buf);
-            sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
-            fprintf(fplog, "%s", buf);
-            fprintf(stderr, "%s", buf);
+            lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
          }
-        fprintf(fplog, "\n");
-        fprintf(stderr, "\n");
+        else
+        {
+            lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
+        }
+        sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
+        fprintf(fplog, "%s", buf);
+        fprintf(stderr, "%s", buf);
+        sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
+        fprintf(fplog, "%s", buf);
+        fprintf(stderr, "%s", buf);
+    }
+    fprintf(fplog, "\n");
+    fprintf(stderr, "\n");
  
-        if (lossf >= DD_PERF_LOSS_WARN)
+    if (lossFraction >= DD_PERF_LOSS_WARN)
+    {
+        sprintf(buf,
+                "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
+                "      in the domain decomposition.\n", lossFraction*100);
+        if (!isDlbOn(comm))
          {
-            sprintf(buf,
-                    "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
-                    "      in the domain decomposition.\n", lossf*100);
-            if (!dlbIsOn(comm))
-            {
-                sprintf(buf+strlen(buf), "      You might want to use dynamic load balancing (option -dlb.)\n");
-            }
-            else if (bLim)
-            {
-                sprintf(buf+strlen(buf), "      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
-            }
-            fprintf(fplog, "%s\n", buf);
-            fprintf(stderr, "%s\n", buf);
+            sprintf(buf+strlen(buf), "      You might want to use dynamic load balancing (option -dlb.)\n");
          }
-        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
+        else if (dlbWasLimited)
          {
-            sprintf(buf,
-                    "NOTE: %.1f %% performance was lost because the PME ranks\n"
-                    "      had %s work to do than the PP ranks.\n"
-                    "      You might want to %s the number of PME ranks\n"
-                    "      or %s the cut-off and the grid spacing.\n",
-                    fabs(lossp*100),
-                    (lossp < 0) ? "less"     : "more",
-                    (lossp < 0) ? "decrease" : "increase",
-                    (lossp < 0) ? "decrease" : "increase");
-            fprintf(fplog, "%s\n", buf);
-            fprintf(stderr, "%s\n", buf);
+            sprintf(buf+strlen(buf), "      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
          }
+        fprintf(fplog, "%s\n", buf);
+        fprintf(stderr, "%s\n", buf);
+    }
+    if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
+    {
+        sprintf(buf,
+                "NOTE: %.1f %% performance was lost because the PME ranks\n"
+                "      had %s work to do than the PP ranks.\n"
+                "      You might want to %s the number of PME ranks\n"
+                "      or %s the cut-off and the grid spacing.\n",
+                fabs(lossFractionPme*100),
+                (lossFractionPme < 0) ? "less"     : "more",
+                (lossFractionPme < 0) ? "decrease" : "increase",
+                (lossFractionPme < 0) ? "decrease" : "increase");
+        fprintf(fplog, "%s\n", buf);
+        fprintf(stderr, "%s\n", buf);
      }
  }
  
@@ -5304,7 +5267,7 @@ static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
          fprintf(fplog, "\n");
      }
      fprintf(fplog, "DD  step %s", gmx_step_str(step, buf));
-    if (dlbIsOn(dd->comm))
+    if (isDlbOn(dd->comm))
      {
          fprintf(fplog, "  vol min/aver %5.3f%c",
                  dd_vol_min(dd), flags ? '!' : ' ');
@@ -5322,7 +5285,7 @@ static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
  
  static void dd_print_load_verbose(gmx_domdec_t *dd)
  {
-    if (dlbIsOn(dd->comm))
+    if (isDlbOn(dd->comm))
      {
          fprintf(stderr, "vol %4.2f%c ",
                  dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
@@ -5363,7 +5326,7 @@ static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
      if (bPartOfGroup)
      {
          dd->comm->mpi_comm_load[dim_ind] = c_row;
-        if (dd->comm->dlbState != edlbsOffForever)
+        if (!isDlbDisabled(dd->comm))
          {
              if (dd->ci[dim] == dd->master_ci[dim])
              {
@@ -5601,7 +5564,7 @@ static void setup_neighbor_relations(gmx_domdec_t *dd)
          }
      }
  
-    if (dd->comm->dlbState != edlbsOffForever)
+    if (!isDlbDisabled(dd->comm))
      {
          snew(dd->comm->root, dd->ndim);
      }
@@ -5783,7 +5746,7 @@ static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
  
      if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
      {
-        ma->vbuf = NULL;
+        ma->vbuf = nullptr;
      }
      else
      {
@@ -5963,7 +5926,7 @@ static void make_dd_communicators(FILE *fplog, t_commrec *cr,
       * Real reordering is only supported on very few architectures,
       * Blue Gene is one of them.
       */
-    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
+    CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
  
      if (cr->npmenodes > 0)
      {
@@ -6026,8 +5989,8 @@ static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size
      int    i, n;
      double dbl;
  
-    slb_frac = NULL;
-    if (nc > 1 && size_string != NULL)
+    slb_frac = nullptr;
+    if (nc > 1 && size_string != nullptr)
      {
          if (fplog)
          {
@@ -6169,56 +6132,95 @@ static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
      return r;
  }
  
-static int check_dlb_support(FILE *fplog, t_commrec *cr,
-                             const char *dlb_opt, gmx_bool bRecordLoad,
-                             unsigned long Flags, const t_inputrec *ir)
+/*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
+ */
+static int forceDlbOffOrBail(int                cmdlineDlbState,
+                             const std::string &reasonStr,
+                             t_commrec         *cr,
+                             FILE              *fplog)
  {
-    int           dlbState = -1;
-    char          buf[STRLEN];
+    std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
+    std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
+
+    if (cmdlineDlbState == edlbsOnUser)
+    {
+        gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
+    }
+    else if (cmdlineDlbState == edlbsOffCanTurnOn)
+    {
+        dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
+    }
+    return edlbsOffForever;
+}
+
+/*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
+ *
+ * This function parses the parameters of "-dlb" command line option setting
+ * corresponding state values. Then it checks the consistency of the determined
+ * state with other run parameters and settings. As a result, the initial state
+ * may be altered or an error may be thrown if incompatibility of options is detected.
+ *
+ * \param [in] fplog       Pointer to mdrun log file.
+ * \param [in] cr          Pointer to MPI communication object.
+ * \param [in] dlb_opt     Pointer to the '-dlb' command line argument's option.
+ * \param [in] bRecordLoad True if the load balancer is recording load information.
+ * \param [in] Flags       Simulation flags passed from main.
+ * \param [in] ir          Pointer mdrun to input parameters.
+ * \returns                DLB initial/startup state.
+ */
+static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
+                                    const char *dlb_opt, gmx_bool bRecordLoad,
+                                    unsigned long Flags, const t_inputrec *ir)
+{
+    int dlbState = -1;
  
      switch (dlb_opt[0])
      {
          case 'a': dlbState = edlbsOffCanTurnOn; break;
-        case 'n': dlbState = edlbsOffForever;   break;
-        case 'y': dlbState = edlbsOnForever;    break;
+        case 'n': dlbState = edlbsOffUser;      break;
+        case 'y': dlbState = edlbsOnUser;       break;
          default: gmx_incons("Unknown dlb_opt");
      }
  
+    /* Reruns don't support DLB: bail or override auto mode */
      if (Flags & MD_RERUN)
      {
-        return edlbsOffForever;
+        std::string reasonStr = "it is not supported in reruns.";
+        return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
      }
  
+    /* Unsupported integrators */
      if (!EI_DYNAMICS(ir->eI))
      {
-        if (dlbState == edlbsOnForever)
-        {
-            sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
-            dd_warning(cr, fplog, buf);
-        }
-
-        return edlbsOffForever;
+        auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
+        return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
      }
  
+    /* Without cycle counters we can't time work to balance on */
      if (!bRecordLoad)
      {
-        dd_warning(cr, fplog, "NOTE: Cycle counters unsupported or not enabled in kernel. Cannot use dynamic load balancing.\n");
-        return edlbsOffForever;
+        std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
+        return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
      }
  
      if (Flags & MD_REPRODUCIBLE)
      {
+        std::string reasonStr = "you started a reproducible run.";
          switch (dlbState)
          {
+            case edlbsOffUser:
+                break;
              case edlbsOffForever:
+                GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
                  break;
              case edlbsOffCanTurnOn:
+                return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
+                break;
              case edlbsOnCanTurnOff:
-                dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
-                dlbState = edlbsOffForever;
+                GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
                  break;
-            case edlbsOnForever:
-                dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
+            case edlbsOnUser:
+                return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
                  break;
              default:
                  gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
@@ -6234,7 +6236,7 @@ static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
      int dim;
  
      dd->ndim = 0;
-    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
+    if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
      {
          /* Decomposition order z,y,x */
          if (fplog)
@@ -6277,7 +6279,7 @@ static gmx_domdec_comm_t *init_dd_comm()
      }
  
      comm->nalloc_int = 0;
-    comm->buf_int    = NULL;
+    comm->buf_int    = nullptr;
  
      vec_rvec_init(&comm->vbuf);
  
@@ -6325,12 +6327,12 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
      dd->bScrewPBC = (ir->ePBC == epbcSCREW);
  
      dd->pme_recv_f_alloc = 0;
-    dd->pme_recv_f_buf   = NULL;
+    dd->pme_recv_f_buf   = nullptr;
  
      /* Initialize to GPU share count to 0, might change later */
      comm->nrank_gpu_shared = 0;
  
-    comm->dlbState         = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
+    comm->dlbState         = determineInitialDlbState(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
      dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
      /* To consider turning DLB on after 2*nstlist steps we need to check
       * at partitioning count 3. Thus we need to increase the first count by 2.
@@ -6520,13 +6522,14 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
      }
      else
      {
-        set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
+        set_ddbox_cr(cr, nullptr, ir, box, &comm->cgs_gl, x, ddbox);
  
          /* We need to choose the optimal DD grid and possibly PME nodes */
          real limit =
              dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
                             nPmeRanks,
-                           comm->dlbState != edlbsOffForever, dlb_scale,
+                           !isDlbDisabled(comm),
+                           dlb_scale,
                             comm->cellsize_limit, comm->cutoff,
                             comm->bInterCGBondeds);
  
@@ -6536,7 +6539,7 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
              gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
              sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
                      !bC ? "-rdd" : "-rcon",
-                    comm->dlbState != edlbsOffForever ? " or -dds" : "",
+                    comm->dlbState != edlbsOffUser ? " or -dds" : "",
                      bC ? " or your LINCS settings" : "");
  
              gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
@@ -6588,7 +6591,7 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
           */
          if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
              comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
-            getenv("GMX_PMEONEDD") == NULL)
+            getenv("GMX_PMEONEDD") == nullptr)
          {
              comm->npmedecompdim = 2;
              comm->npmenodes_x   = dd->nc[XX];
@@ -6631,7 +6634,7 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
      *npme_y = comm->npmenodes_y;
  
      snew(comm->slb_frac, DIM);
-    if (comm->dlbState == edlbsOffForever)
+    if (isDlbDisabled(comm))
      {
          comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
          comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
@@ -6640,7 +6643,7 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
  
      if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
      {
-        if (comm->bBondComm || comm->dlbState != edlbsOffForever)
+        if (comm->bBondComm || !isDlbDisabled(comm))
          {
              /* Set the bonded communication distance to halfway
               * the minimum and the maximum,
@@ -6648,7 +6651,7 @@ static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
               */
              real acs           = average_cellsize_min(dd, ddbox);
              comm->cutoff_mbody = 0.5*(r_bonded + acs);
-            if (comm->dlbState != edlbsOffForever)
+            if (!isDlbDisabled(comm))
              {
                  /* Check if this does not limit the scaling */
                  comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
@@ -6721,15 +6724,15 @@ static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
          cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
      }
  
+    /* Turn off DLB if we're too close to the cell size limit. */
      if (cellsize_min < comm->cellsize_limit*1.05)
      {
-        char buf[STRLEN];
-        sprintf(buf, "step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, but the minimum cell size is smaller than 1.05 times the cell size limit. Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
-        dd_warning(cr, fplog, buf);
+        auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
+                                     "but the minimum cell size is smaller than 1.05 times the cell size limit."
+                                     "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
+        dd_warning(cr, fplog, str.c_str());
  
-        /* Change DLB from "auto" to "no". */
          comm->dlbState = edlbsOffForever;
-
          return;
      }
  
@@ -6832,8 +6835,8 @@ void dd_init_bondeds(FILE *fplog,
      else
      {
          /* Only communicate atoms based on cut-off */
-        comm->cglink   = NULL;
-        comm->bLocalCG = NULL;
+        comm->cglink   = nullptr;
+        comm->bLocalCG = nullptr;
      }
  }
  
@@ -6848,7 +6851,7 @@ static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
      real               limit, shrink;
      char               buf[64];
  
-    if (fplog == NULL)
+    if (fplog == nullptr)
      {
          return;
      }
@@ -6940,7 +6943,7 @@ static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
                      std::max(comm->cutoff, comm->cutoff_mbody));
              fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                      "multi-body bonded interactions", "(-rdd)",
-                    (comm->bBondComm || dlbIsOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
+                    (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
          }
          if (bInterCGVsites)
          {
@@ -7059,7 +7062,7 @@ static void set_cell_limits_dlb(gmx_domdec_t      *dd,
      {
          comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
      }
-    if (dlbIsOn(comm))
+    if (isDlbOn(comm))
      {
          set_dlb_limits(dd);
      }
@@ -7110,12 +7113,12 @@ static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
      {
          fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
      }
-    if (comm->dlbState != edlbsOffForever)
+    if (!isDlbDisabled(comm))
      {
          set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
      }
  
-    print_dd_settings(fplog, dd, mtop, ir, dlbIsOn(comm), dlb_scale, ddbox);
+    print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
      if (comm->dlbState == edlbsOffCanTurnOn)
      {
          if (fplog)
@@ -7253,7 +7256,7 @@ static gmx_bool test_dd_cutoff(t_commrec *cr,
      dd = cr->dd;
  
      set_ddbox(dd, FALSE, cr, ir, state->box,
-              TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
+              TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
  
      LocallyLimited = 0;
  
@@ -7269,8 +7272,7 @@ static gmx_bool test_dd_cutoff(t_commrec *cr,
  
          np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
  
-        if (dd->comm->dlbState != edlbsOffForever && dim < ddbox.npbcdim &&
-            dd->comm->cd[d].np_dlb > 0)
+        if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
          {
              if (np > dd->comm->cd[d].np_dlb)
              {
@@ -7288,12 +7290,12 @@ static gmx_bool test_dd_cutoff(t_commrec *cr,
          }
      }
  
-    if (dd->comm->dlbState != edlbsOffForever)
+    if (!isDlbDisabled(dd->comm))
      {
          /* If DLB is not active yet, we don't need to check the grid jumps.
           * Actually we shouldn't, because then the grid jump data is not set.
           */
-        if (dlbIsOn(dd->comm) &&
+        if (isDlbOn(dd->comm) &&
              check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
          {
              LocallyLimited = 1;
@@ -7408,7 +7410,7 @@ static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
  
  gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
  {
-    return dlbIsOn(dd->comm);
+    return isDlbOn(dd->comm);
  }
  
  gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
@@ -7596,7 +7598,7 @@ set_dd_corners(const gmx_domdec_t *dd,
          c->c[1][0] = comm->cell_x0[dim1];
          /* All rows can see this row */
          c->c[1][1] = comm->cell_x0[dim1];
-        if (dlbIsOn(dd->comm))
+        if (isDlbOn(dd->comm))
          {
              c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
              if (bDistMB)
@@ -7615,7 +7617,7 @@ set_dd_corners(const gmx_domdec_t *dd,
              {
                  c->c[2][j] = comm->cell_x0[dim2];
              }
-            if (dlbIsOn(dd->comm))
+            if (isDlbOn(dd->comm))
              {
                  /* Use the maximum of the i-cells that see a j-cell */
                  for (i = 0; i < zones->nizone; i++)
@@ -7650,7 +7652,7 @@ set_dd_corners(const gmx_domdec_t *dd,
               */
              c->cr1[0] = comm->cell_x1[dim1];
              c->cr1[3] = comm->cell_x1[dim1];
-            if (dlbIsOn(dd->comm))
+            if (isDlbOn(dd->comm))
              {
                  c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
                  if (bDistMB)
@@ -7932,7 +7934,8 @@ get_zone_pulse_cgs(gmx_domdec_t *dd,
  
  static void setup_dd_communication(gmx_domdec_t *dd,
                                     matrix box, gmx_ddbox_t *ddbox,
-                                   t_forcerec *fr, t_state *state, rvec **f)
+                                   t_forcerec *fr,
+                                   t_state *state, PaddedRVecVector *f)
  {
      int                    dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
      int                    nzone, nzone_send, zone, zonei, cg0, cg1;
@@ -7947,7 +7950,7 @@ static void setup_dd_communication(gmx_domdec_t *dd,
      real                   r_comm2, r_bcomm2;
      dd_corners_t           corners;
      ivec                   tric_dist;
-    rvec                  *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
+    rvec                  *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr, *recv_vr;
      real                   skew_fac2_d, skew_fac_01;
      rvec                   sf2_round;
      int                    nsend, nat;
@@ -7979,11 +7982,11 @@ static void setup_dd_communication(gmx_domdec_t *dd,
              cg_cm = fr->cg_cm;
              break;
          case ecutsVERLET:
-            cg_cm = state->x;
+            cg_cm = as_rvec_array(state->x.data());
              break;
          default:
              gmx_incons("unimplemented");
-            cg_cm = NULL;
+            cg_cm = nullptr;
      }
  
      for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
@@ -8002,7 +8005,7 @@ static void setup_dd_communication(gmx_domdec_t *dd,
      bBondComm = comm->bBondComm;
  
      /* Do we need to determine extra distances for multi-body bondeds? */
-    bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
+    bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
  
      /* Do we need to determine extra distances for only two-body bondeds? */
      bDist2B = (bBondComm && !bDistMB);
@@ -8317,7 +8320,7 @@ static void setup_dd_communication(gmx_domdec_t *dd,
              }
              else
              {
-                cg_cm = state->x;
+                cg_cm = as_rvec_array(state->x.data());
              }
              /* Communicate cg_cm */
              if (cd->bInPlace)
@@ -8395,7 +8398,7 @@ static void setup_dd_communication(gmx_domdec_t *dd,
           * So we pass NULL for the forcerec.
           */
          dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
-                      NULL, comm->bLocalCG);
+                      nullptr, comm->bLocalCG);
      }
  
      if (debug)
@@ -8438,7 +8441,7 @@ static void set_zones_size(gmx_domdec_t *dd,
      zones = &comm->zones;
  
      /* Do we need to determine extra distances for multi-body bondeds? */
-    bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
+    bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
  
      for (z = zone_start; z < zone_end; z++)
      {
@@ -8458,7 +8461,7 @@ static void set_zones_size(gmx_domdec_t *dd,
              /* With a staggered grid we have different sizes
               * for non-shifted dimensions.
               */
-            if (dlbIsOn(dd->comm) && zones->shift[z][dim] == 0)
+            if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
              {
                  if (d == 1)
                  {
@@ -8487,7 +8490,7 @@ static void set_zones_size(gmx_domdec_t *dd,
              if (zones->shift[z][dim] > 0)
              {
                  dim = dd->dim[d];
-                if (!dlbIsOn(dd->comm) || d == 0)
+                if (!isDlbOn(dd->comm) || d == 0)
                  {
                      zones->size[z].x0[dim] = comm->cell_x1[dim];
                      zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
@@ -8721,7 +8724,7 @@ static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort
  {
      int a, atot, cg, cg0, cg1, i;
  
-    if (cgindex == NULL)
+    if (cgindex == nullptr)
      {
          /* Avoid the useless loop of the atoms within a cg */
          order_vec_cg(ncg, sort, v, buf);
@@ -8940,7 +8943,7 @@ static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state
      }
      else
      {
-        cgindex = NULL;
+        cgindex = nullptr;
      }
  
      /* Remove the charge groups which are no longer at home here */
@@ -8952,37 +8955,19 @@ static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state
      }
  
      /* Reorder the state */
-    for (i = 0; i < estNR; i++)
+    if (state->flags & (1 << estX))
      {
-        if (EST_DISTR(i) && (state->flags & (1<<i)))
-        {
-            switch (i)
-            {
-                case estX:
-                    order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
-                    break;
-                case estV:
-                    order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
-                    break;
-                case est_SDX_NOTSUPPORTED:
-                    break;
-                case estCGP:
-                    order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
-                    break;
-                case estLD_RNG:
-                case estLD_RNGI:
-                case estDISRE_INITF:
-                case estDISRE_RM3TAV:
-                case estORIRE_INITF:
-                case estORIRE_DTAV:
-                    /* No ordering required */
-                    break;
-                default:
-                    gmx_incons("Unknown state entry encountered in dd_sort_state");
-                    break;
-            }
-        }
+        order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
+    }
+    if (state->flags & (1 << estV))
+    {
+        order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
+    }
+    if (state->flags & (1 << estCGP))
+    {
+        order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
      }
+
      if (fr->cutoff_scheme == ecutsGROUP)
      {
          /* Reorder cgcm */
@@ -9086,7 +9071,7 @@ void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
  
      gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
  
-    if (fplog == NULL)
+    if (fplog == nullptr)
      {
          return;
      }
@@ -9141,7 +9126,7 @@ void dd_partition_system(FILE                *fplog,
                           const gmx_mtop_t    *top_global,
                           const t_inputrec    *ir,
                           t_state             *state_local,
-                         rvec               **f,
+                         PaddedRVecVector    *f,
                           t_mdatoms           *mdatoms,
                           gmx_localtop_t      *top_local,
                           t_forcerec          *fr,
@@ -9198,7 +9183,7 @@ void dd_partition_system(FILE                *fplog,
  
      bNStGlobalComm = (step % nstglobalcomm == 0);
  
-    if (!dlbIsOn(comm))
+    if (!isDlbOn(comm))
      {
          bDoDLB = FALSE;
      }
@@ -9249,7 +9234,7 @@ void dd_partition_system(FILE                *fplog,
              }
              comm->n_load_collect++;
  
-            if (dlbIsOn(comm))
+            if (isDlbOn(comm))
              {
                  if (DDMASTER(dd))
                  {
@@ -9371,10 +9356,10 @@ void dd_partition_system(FILE                *fplog,
          ncgindex_set = 0;
  
          set_ddbox(dd, bMasterState, cr, ir, state_global->box,
-                  TRUE, cgs_gl, state_global->x, &ddbox);
+                  TRUE, cgs_gl, as_rvec_array(state_global->x.data()), &ddbox);
  
          get_cg_distribution(fplog, dd, cgs_gl,
-                            state_global->box, &ddbox, state_global->x);
+                            state_global->box, &ddbox, as_rvec_array(state_global->x.data()));
  
          dd_distribute_state(dd, cgs_gl,
                              state_global, state_local, f);
@@ -9387,7 +9372,7 @@ void dd_partition_system(FILE                *fplog,
          if (fr->cutoff_scheme == ecutsGROUP)
          {
              calc_cgcm(fplog, 0, dd->ncg_home,
-                      &top_local->cgs, state_local->x, fr->cg_cm);
+                      &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
          }
  
          inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
@@ -9418,7 +9403,7 @@ void dd_partition_system(FILE                *fplog,
          {
              /* Redetermine the cg COMs */
              calc_cgcm(fplog, 0, dd->ncg_home,
-                      &top_local->cgs, state_local->x, fr->cg_cm);
+                      &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
          }
  
          inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
@@ -9426,9 +9411,9 @@ void dd_partition_system(FILE                *fplog,
          dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
  
          set_ddbox(dd, bMasterState, cr, ir, state_local->box,
-                  TRUE, &top_local->cgs, state_local->x, &ddbox);
+                  TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
  
-        bRedist = dlbIsOn(comm);
+        bRedist = isDlbOn(comm);
      }
      else
      {
@@ -9445,7 +9430,7 @@ void dd_partition_system(FILE                *fplog,
              copy_rvec(comm->box_size, ddbox.box_size);
          }
          set_ddbox(dd, bMasterState, cr, ir, state_local->box,
-                  bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
+                  bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
  
          bBoxChanged = TRUE;
          bRedist     = TRUE;
@@ -9534,8 +9519,8 @@ void dd_partition_system(FILE                *fplog,
                                    0, dd->ncg_home,
                                    comm->zones.dens_zone0,
                                    fr->cginfo,
-                                  state_local->x,
-                                  ncg_moved, bRedist ? comm->moved : NULL,
+                                  as_rvec_array(state_local->x.data()),
+                                  ncg_moved, bRedist ? comm->moved : nullptr,
                                    fr->nbv->grp[eintLocal].kernel_type,
                                    fr->nbv->grp[eintLocal].nbat);
  
@@ -9570,6 +9555,10 @@ void dd_partition_system(FILE                *fplog,
          }
          dd_sort_state(dd, fr->cg_cm, fr, state_local,
                        bResortAll ? -1 : ncg_home_old);
+
+        /* After sorting and compacting we set the correct size */
+        dd_resize_state(state_local, f, dd->nat_home);
+
          /* Rebuild all the indices */
          ga2la_clear(dd->ga2la);
          ncgindex_set = 0;
@@ -9598,7 +9587,7 @@ void dd_partition_system(FILE                *fplog,
  
      /*
         write_dd_pdb("dd_home",step,"dump",top_global,cr,
-                 -1,state_local->x,state_local->box);
+                 -1,as_rvec_array(state_local->x.data()),state_local->box);
       */
  
      wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
@@ -9611,7 +9600,7 @@ void dd_partition_system(FILE                *fplog,
      dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
                        comm->cellsize_min, np,
                        fr,
-                      fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
+                      fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
                        vsite, top_global, top_local);
  
      wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
@@ -9653,10 +9642,8 @@ void dd_partition_system(FILE                *fplog,
       * or constraint communication.
       */
      state_local->natoms = comm->nat[ddnatNR-1];
-    if (state_local->natoms > state_local->nalloc)
-    {
-        dd_realloc_state(state_local, f, state_local->natoms);
-    }
+
+    dd_resize_state(state_local, f, state_local->natoms);
  
      if (fr->bF_NoVirSum)
      {
@@ -9690,31 +9677,15 @@ void dd_partition_system(FILE                *fplog,
      forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
                          dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
  
-    /* We make the all mdatoms up to nat_tot_con.
-     * We could save some work by only setting invmass
-     * between nat_tot and nat_tot_con.
-     */
-    /* This call also sets the new number of home particles to dd->nat_home */
-    atoms2md(top_global, ir,
-             comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
-
-    /* Now we have the charges we can sort the FE interactions */
-    dd_sort_local_top(dd, mdatoms, top_local);
-
-    if (vsite != NULL)
-    {
-        /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
-        split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
-                                  mdatoms, FALSE, vsite);
-    }
+    /* Update atom data for mdatoms and several algorithms */
+    mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
+                              nullptr, mdatoms, vsite, nullptr);
  
      if (ir->implicit_solvent)
      {
          make_local_gb(cr, fr->born, ir->gb_algorithm);
      }
  
-    setup_bonded_threading(fr, &top_local->idef);
-
      if (!(cr->duty & DUTY_PME))
      {
          /* Send the charges and/or c6/sigmas to our PME only node */
@@ -9762,15 +9733,15 @@ void dd_partition_system(FILE                *fplog,
       * the last vsite construction, we need to communicate the constructing
       * atom coordinates again (for spreading the forces this MD step).
       */
-    dd_move_x_vsites(dd, state_local->box, state_local->x);
+    dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
  
      wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
  
      if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
      {
-        dd_move_x(dd, state_local->box, state_local->x);
+        dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
          write_dd_pdb("dd_dump", step, "dump", top_global, cr,
-                     -1, state_local->x, state_local->box);
+                     -1, as_rvec_array(state_local->x.data()), state_local->box);
      }
  
      /* Store the partitioning step */