Sort all includes in src/gromacs

[alexxy/gromacs.git] / src / gromacs / mdlib / domdec.c
diff --git a/src/gromacs/mdlib/domdec.c b/src/gromacs/mdlib/domdec.c

index 0e58da49b30a2bd92b53aed199f876352733b2fb..bad4092e3ebe4060c7721a4a54984436a5f8098a 100644 (file)
--- a/src/gromacs/mdlib/domdec.c
+++ b/src/gromacs/mdlib/domdec.c
@@ -1,64 +1,89 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
- *
+/*
+ * This file is part of the GROMACS molecular simulation package.
   *
- * This file is part of Gromacs        Copyright (c) 1991-2008
- * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
   *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
   * of the License, or (at your option) any later version.
   *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
   *
- * And Hey:
- * Gnomes, ROck Monsters And Chili Sauce
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
   */
  
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
  
-#include <stdio.h>
-#include <time.h>
+#include "gromacs/legacyheaders/domdec.h"
+
+#include "config.h"
+
+#include <assert.h>
  #include <math.h>
-#include <string.h>
+#include <stdio.h>
  #include <stdlib.h>
-#include "typedefs.h"
-#include "smalloc.h"
-#include "gmx_fatal.h"
-#include "gmx_fatal_collective.h"
-#include "vec.h"
-#include "domdec.h"
-#include "domdec_network.h"
-#include "nrnb.h"
-#include "pbc.h"
-#include "chargegroup.h"
-#include "constr.h"
-#include "mdatoms.h"
-#include "names.h"
-#include "force.h"
-#include "pme.h"
-#include "pull.h"
-#include "pull_rotation.h"
-#include "mdrun.h"
-#include "nsgrid.h"
-#include "shellfc.h"
-#include "mtop_util.h"
-#include "gmx_ga2la.h"
-#include "gmx_sort.h"
-#include "macros.h"
-#include "nbnxn_search.h"
-#include "bondf.h"
-#include "gmx_omp_nthreads.h"
-#include "gpu_utils.h"
-
-#include "gromacs/fileio/futil.h"
+#include <string.h>
+#include <time.h>
+
+#include "gromacs/bonded/bonded.h"
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/pdbio.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/bonded-threading.h"
+#include "gromacs/legacyheaders/chargegroup.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/domdec_network.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_ga2la.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/gpu_utils.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/nsgrid.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/shellfc.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/pulling/pull_rotation.h"
+#include "gromacs/swap/swapcoords.h"
  #include "gromacs/timing/wallcycle.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/futil.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/qsort_threadsafe.h"
+#include "gromacs/utility/smalloc.h"
  
  #define DDRANK(dd, rank)    (rank)
  #define DDMASTERRANK(dd)   (dd->masterrank)
@@ -381,7 +406,7 @@ typedef struct gmx_domdec_comm
      double load_pme;
  
      /* The last partition step */
-    gmx_large_int_t partition_step;
+    gmx_int64_t partition_step;
  
      /* Debugging */
      int  nstDDDump;
@@ -431,8 +456,14 @@ static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
  /* Factor to account for pressure scaling during nstlist steps */
  #define DD_PRES_SCALE_MARGIN 1.02
  
-/* Allowed performance loss before we DLB or warn */
-#define DD_PERF_LOSS 0.05
+/* Turn on DLB when the load imbalance causes this amount of total loss.
+ * There is a bit of overhead with DLB and it's difficult to achieve
+ * a load imbalance of less than 2% with DLB.
+ */
+#define DD_PERF_LOSS_DLB_ON  0.02
+
+/* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
+#define DD_PERF_LOSS_WARN    0.05
  
  #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
  
@@ -1293,7 +1324,6 @@ static void dd_collect_cg(gmx_domdec_t *dd,
  {
      gmx_domdec_master_t *ma = NULL;
      int                  buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
-    t_block             *cgs_gl;
  
      if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
      {
@@ -1303,12 +1333,18 @@ static void dd_collect_cg(gmx_domdec_t *dd,
  
      if (state_local->ddp_count == dd->ddp_count)
      {
+        /* The local state and DD are in sync, use the DD indices */
          ncg_home = dd->ncg_home;
          cg       = dd->index_gl;
          nat_home = dd->nat_home;
      }
      else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
      {
+        /* The DD is out of sync with the local state, but we have stored
+         * the cg indices with the local state, so we can use those.
+         */
+        t_block *cgs_gl;
+
          cgs_gl = &dd->comm->cgs_gl;
  
          ncg_home = state_local->ncg_gl;
@@ -1324,8 +1360,8 @@ static void dd_collect_cg(gmx_domdec_t *dd,
          gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
      }
  
-    buf2[0] = dd->ncg_home;
-    buf2[1] = dd->nat_home;
+    buf2[0] = ncg_home;
+    buf2[1] = nat_home;
      if (DDMASTER(dd))
      {
          ma   = dd->ma;
@@ -1367,7 +1403,7 @@ static void dd_collect_cg(gmx_domdec_t *dd,
  
      /* Collect the charge group indices on the master */
      dd_gatherv(dd,
-               dd->ncg_home*sizeof(int), dd->index_gl,
+               ncg_home*sizeof(int), cg,
                 DDMASTER(dd) ? ma->ibuf : NULL,
                 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
                 DDMASTER(dd) ? ma->cg : NULL);
@@ -1568,37 +1604,6 @@ void dd_collect_state(gmx_domdec_t *dd,
                  case estCGP:
                      dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
                      break;
-                case estLD_RNG:
-                    if (state->nrngi == 1)
-                    {
-                        if (DDMASTER(dd))
-                        {
-                            for (i = 0; i < state_local->nrng; i++)
-                            {
-                                state->ld_rng[i] = state_local->ld_rng[i];
-                            }
-                        }
-                    }
-                    else
-                    {
-                        dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
-                                  state_local->ld_rng, state->ld_rng);
-                    }
-                    break;
-                case estLD_RNGI:
-                    if (state->nrngi == 1)
-                    {
-                        if (DDMASTER(dd))
-                        {
-                            state->ld_rngi[0] = state_local->ld_rngi[0];
-                        }
-                    }
-                    else
-                    {
-                        dd_gather(dd, sizeof(state->ld_rngi[0]),
-                                  state_local->ld_rngi, state->ld_rngi);
-                    }
-                    break;
                  case estDISRE_INITF:
                  case estDISRE_RM3TAV:
                  case estORIRE_INITF:
@@ -1640,8 +1645,6 @@ static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
                  case estCGP:
                      srenew(state->cg_p, state->nalloc);
                      break;
-                case estLD_RNG:
-                case estLD_RNGI:
                  case estDISRE_INITF:
                  case estDISRE_RM3TAV:
                  case estORIRE_INITF:
@@ -1899,32 +1902,6 @@ static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
                  case estCGP:
                      dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
                      break;
-                case estLD_RNG:
-                    if (state->nrngi == 1)
-                    {
-                        dd_bcastc(dd,
-                                  state_local->nrng*sizeof(state_local->ld_rng[0]),
-                                  state->ld_rng, state_local->ld_rng);
-                    }
-                    else
-                    {
-                        dd_scatter(dd,
-                                   state_local->nrng*sizeof(state_local->ld_rng[0]),
-                                   state->ld_rng, state_local->ld_rng);
-                    }
-                    break;
-                case estLD_RNGI:
-                    if (state->nrngi == 1)
-                    {
-                        dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
-                                  state->ld_rngi, state_local->ld_rngi);
-                    }
-                    else
-                    {
-                        dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
-                                   state->ld_rngi, state_local->ld_rngi);
-                    }
-                    break;
                  case estDISRE_INITF:
                  case estDISRE_RM3TAV:
                  case estORIRE_INITF:
@@ -1953,11 +1930,11 @@ static char dim2char(int dim)
      return c;
  }
  
-static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
+static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
                                gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
  {
      rvec   grid_s[2], *grid_r = NULL, cx, r;
-    char   fname[STRLEN], format[STRLEN], buf[22];
+    char   fname[STRLEN], buf[22];
      FILE  *out;
      int    a, i, d, z, y, x;
      matrix tric;
@@ -1971,7 +1948,7 @@ static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
          snew(grid_r, 2*dd->nnodes);
      }
  
-    dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
+    dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
  
      if (DDMASTER(dd))
      {
@@ -1997,7 +1974,6 @@ static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
              }
          }
          sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
-        sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
          out = gmx_fio_fopen(fname, "w");
          gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
          a = 1;
@@ -2018,8 +1994,8 @@ static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
                          cx[YY] = grid_r[i*2+y][YY];
                          cx[ZZ] = grid_r[i*2+z][ZZ];
                          mvmul(tric, cx, r);
-                        fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
-                                ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
+                        gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
+                                                 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
                      }
                  }
              }
@@ -2042,11 +2018,11 @@ static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
      }
  }
  
-void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
+void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
                    gmx_mtop_t *mtop, t_commrec *cr,
                    int natoms, rvec x[], matrix box)
  {
-    char          fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
+    char          fname[STRLEN], buf[22];
      FILE         *out;
      int           i, ii, resnr, c;
      char         *atomname, *resname;
@@ -2061,9 +2037,6 @@ void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
  
      sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
  
-    sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
-    sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
-
      out = gmx_fio_fopen(fname, "w");
  
      fprintf(out, "TITLE     %s\n", title);
@@ -2089,10 +2062,8 @@ void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
          {
              b = dd->comm->zones.n + 1;
          }
-        fprintf(out, strlen(atomname) < 4 ? format : format4,
-                "ATOM", (ii+1)%100000,
-                atomname, resname, ' ', resnr%10000, ' ',
-                10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
+        gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
+                                 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
      }
      fprintf(out, "TER\n");
  
@@ -2409,7 +2380,7 @@ void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
  
      if (debug)
      {
-        fprintf(debug, "Receive coordinates from PP nodes:");
+        fprintf(debug, "Receive coordinates from PP ranks:");
          for (x = 0; x < *nmy_ddnodes; x++)
          {
              fprintf(debug, " %d", (*my_ddnodes)[x]);
@@ -2625,7 +2596,7 @@ static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
          if (!bLocalCG[dd->index_gl[i]])
          {
              fprintf(stderr,
-                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
+                    "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
              nerr++;
          }
      }
@@ -2639,7 +2610,7 @@ static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
      }
      if (ngl != dd->ncg_tot)
      {
-        fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
+        fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
          nerr++;
      }
  
@@ -2662,7 +2633,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
          {
              if (have[dd->gatindex[a]] > 0)
              {
-                fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
+                fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
              }
              else
              {
@@ -2681,7 +2652,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
          {
              if (a >= dd->nat_tot)
              {
-                fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
+                fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
                  nerr++;
              }
              else
@@ -2689,7 +2660,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
                  have[a] = 1;
                  if (dd->gatindex[a] != i)
                  {
-                    fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
+                    fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
                      nerr++;
                  }
              }
@@ -2699,7 +2670,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
      if (ngl != dd->nat_tot)
      {
          fprintf(stderr,
-                "DD node %d, %s: %d global atom indices, %d local atoms\n",
+                "DD rank %d, %s: %d global atom indices, %d local atoms\n",
                  dd->rank, where, ngl, dd->nat_tot);
      }
      for (a = 0; a < dd->nat_tot; a++)
@@ -2707,7 +2678,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
          if (have[a] == 0)
          {
              fprintf(stderr,
-                    "DD node %d, %s: local atom %d, global %d has no global index\n",
+                    "DD rank %d, %s: local atom %d, global %d has no global index\n",
                      dd->rank, where, a+1, dd->gatindex[a]+1);
          }
      }
@@ -2717,7 +2688,7 @@ static void check_index_consistency(gmx_domdec_t *dd,
  
      if (nerr > 0)
      {
-        gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
+        gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
                    dd->rank, where, nerr);
      }
  }
@@ -2810,7 +2781,7 @@ static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
      return grid_jump_limit;
  }
  
-static gmx_bool check_grid_jump(gmx_large_int_t step,
+static gmx_bool check_grid_jump(gmx_int64_t     step,
                                  gmx_domdec_t   *dd,
                                  real            cutoff,
                                  gmx_ddbox_t    *ddbox,
@@ -2846,7 +2817,7 @@ static gmx_bool check_grid_jump(gmx_large_int_t step,
                  /* This error should never be triggered under normal
                   * circumstances, but you never know ...
                   */
-                gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
+                gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
                            gmx_step_str(step, buf),
                            dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
              }
@@ -3118,8 +3089,17 @@ static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
      }
  }
  
+enum {
+    setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
+};
+
+/* Set the domain boundaries. Use for static (or no) load balancing,
+ * and also for the starting state for dynamic load balancing.
+ * setmode determine if and where the boundaries are stored, use enum above.
+ * Returns the number communication pulses in npulse.
+ */
  static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
-                                  gmx_bool bMaster, ivec npulse)
+                                  int setmode, ivec npulse)
  {
      gmx_domdec_comm_t *comm;
      int                d, j;
@@ -3136,20 +3116,23 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
          {
              /* Uniform grid */
              cell_dx = ddbox->box_size[d]/dd->nc[d];
-            if (bMaster)
+            switch (setmode)
              {
-                for (j = 0; j < dd->nc[d]+1; j++)
-                {
-                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
-                }
-            }
-            else
-            {
-                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
-                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+                case setcellsizeslbMASTER:
+                    for (j = 0; j < dd->nc[d]+1; j++)
+                    {
+                        dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
+                    }
+                    break;
+                case setcellsizeslbLOCAL:
+                    comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
+                    comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+                    break;
+                default:
+                    break;
              }
              cellsize = cell_dx*ddbox->skew_fac[d];
-            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
+            while (cellsize*npulse[d] < comm->cutoff)
              {
                  npulse[d]++;
              }
@@ -3162,7 +3145,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
               * all cell borders in a loop to obtain identical values
               * to the master distribution case and to determine npulse.
               */
-            if (bMaster)
+            if (setmode == setcellsizeslbMASTER)
              {
                  cell_x = dd->ma->cell_x[d];
              }
@@ -3183,10 +3166,13 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
                  }
                  cellsize_min[d] = min(cellsize_min[d], cellsize);
              }
-            if (!bMaster)
+            if (setmode == setcellsizeslbLOCAL)
              {
                  comm->cell_x0[d] = cell_x[dd->ci[d]];
                  comm->cell_x1[d] = cell_x[dd->ci[d]+1];
+            }
+            if (setmode != setcellsizeslbMASTER)
+            {
                  sfree(cell_x);
              }
          }
@@ -3197,12 +3183,23 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
          if (d < ddbox->npbcdim &&
              dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
          {
-            gmx_fatal_collective(FARGS, NULL, dd,
-                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
-                                 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
-                                 comm->cutoff,
-                                 dd->nc[d], dd->nc[d],
-                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
+            char error_string[STRLEN];
+
+            sprintf(error_string,
+                    "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
+                    dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
+                    comm->cutoff,
+                    dd->nc[d], dd->nc[d],
+                    dd->nnodes > dd->nc[d] ? "cells" : "ranks");
+
+            if (setmode == setcellsizeslbLOCAL)
+            {
+                gmx_fatal_collective(FARGS, NULL, dd, error_string);
+            }
+            else
+            {
+                gmx_fatal(FARGS, error_string);
+            }
          }
      }
  
@@ -3223,7 +3220,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
  static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
                                                    int d, int dim, gmx_domdec_root_t *root,
                                                    gmx_ddbox_t *ddbox,
-                                                  gmx_bool bUniform, gmx_large_int_t step, real cellsize_limit_f, int range[])
+                                                  gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
  {
      gmx_domdec_comm_t *comm;
      int                ncd, i, j, nmin, nmin_old;
@@ -3434,7 +3431,7 @@ static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
  static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
                                         int d, int dim, gmx_domdec_root_t *root,
                                         gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
-                                       gmx_bool bUniform, gmx_large_int_t step)
+                                       gmx_bool bUniform, gmx_int64_t step)
  {
      gmx_domdec_comm_t *comm;
      int                ncd, d1, i, j, pos;
@@ -3659,7 +3656,7 @@ static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
  
  static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
                                           gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
-                                         gmx_bool bUniform, gmx_large_int_t step)
+                                         gmx_bool bUniform, gmx_int64_t step)
  {
      gmx_domdec_comm_t *comm;
      int                d, dim, d1;
@@ -3719,7 +3716,7 @@ static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
  
  static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
                                    gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
-                                  gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
+                                  gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
                                    gmx_wallcycle_t wcycle)
  {
      gmx_domdec_comm_t *comm;
@@ -3789,7 +3786,7 @@ static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
  
  static void set_dd_cell_sizes(gmx_domdec_t *dd,
                                gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
-                              gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
+                              gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
                                gmx_wallcycle_t wcycle)
  {
      gmx_domdec_comm_t *comm;
@@ -3812,7 +3809,7 @@ static void set_dd_cell_sizes(gmx_domdec_t *dd,
      }
      else
      {
-        set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
+        set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
          realloc_comm_ind(dd, npulse);
      }
  
@@ -3829,7 +3826,7 @@ static void set_dd_cell_sizes(gmx_domdec_t *dd,
  static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
                                    gmx_ddbox_t *ddbox,
                                    rvec cell_ns_x0, rvec cell_ns_x1,
-                                  gmx_large_int_t step)
+                                  gmx_int64_t step)
  {
      gmx_domdec_comm_t *comm;
      int                dim_ind, dim;
@@ -3905,7 +3902,7 @@ static void check_screw_box(matrix box)
      }
  }
  
-static void distribute_cg(FILE *fplog, gmx_large_int_t step,
+static void distribute_cg(FILE *fplog, gmx_int64_t step,
                            matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
                            gmx_domdec_t *dd)
  {
@@ -4070,7 +4067,7 @@ static void distribute_cg(FILE *fplog, gmx_large_int_t step,
      }
  }
  
-static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t *dd,
+static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
                                  t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
                                  rvec pos[])
  {
@@ -4079,6 +4076,7 @@ static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t
      int                  i, cg_gl;
      int                 *ibuf, buf2[2] = { 0, 0 };
      gmx_bool             bMaster = DDMASTER(dd);
+
      if (bMaster)
      {
          ma = dd->ma;
@@ -4088,7 +4086,7 @@ static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t
              check_screw_box(box);
          }
  
-        set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
+        set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
  
          distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
          for (i = 0; i < dd->nnodes; i++)
@@ -4338,7 +4336,7 @@ static void clear_and_mark_ind(int ncg, int *move,
  
  static void print_cg_move(FILE *fplog,
                            gmx_domdec_t *dd,
-                          gmx_large_int_t step, int cg, int dim, int dir,
+                          gmx_int64_t step, int cg, int dim, int dir,
                            gmx_bool bHaveLimitdAndCMOld, real limitd,
                            rvec cm_old, rvec cm_new, real pos_d)
  {
@@ -4377,7 +4375,7 @@ static void print_cg_move(FILE *fplog,
  
  static void cg_move_error(FILE *fplog,
                            gmx_domdec_t *dd,
-                          gmx_large_int_t step, int cg, int dim, int dir,
+                          gmx_int64_t step, int cg, int dim, int dir,
                            gmx_bool bHaveLimitdAndCMOld, real limitd,
                            rvec cm_old, rvec cm_new, real pos_d)
  {
@@ -4445,7 +4443,7 @@ static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
      return comm->moved;
  }
  
-static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
+static void calc_cg_move(FILE *fplog, gmx_int64_t step,
                           gmx_domdec_t *dd,
                           t_state *state,
                           ivec tric_dir, matrix tcm,
@@ -4620,7 +4618,7 @@ static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
      }
  }
  
-static void dd_redistribute_cg(FILE *fplog, gmx_large_int_t step,
+static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                                 gmx_domdec_t *dd, ivec tric_dir,
                                 t_state *state, rvec **f,
                                 t_forcerec *fr,
@@ -5489,7 +5487,7 @@ static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
          fprintf(fplog, "\n");
          fprintf(stderr, "\n");
  
-        if (lossf >= DD_PERF_LOSS)
+        if (lossf >= DD_PERF_LOSS_WARN)
          {
              sprintf(buf,
                      "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
@@ -5505,12 +5503,12 @@ static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
              fprintf(fplog, "%s\n", buf);
              fprintf(stderr, "%s\n", buf);
          }
-        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
+        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
          {
              sprintf(buf,
-                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
-                    "      had %s work to do than the PP nodes.\n"
-                    "      You might want to %s the number of PME nodes\n"
+                    "NOTE: %.1f %% performance was lost because the PME ranks\n"
+                    "      had %s work to do than the PP ranks.\n"
+                    "      You might want to %s the number of PME ranks\n"
                      "      or %s the cut-off and the grid spacing.\n",
                      fabs(lossp*100),
                      (lossp < 0) ? "less"     : "more",
@@ -5549,7 +5547,7 @@ float dd_pme_f_ratio(gmx_domdec_t *dd)
      }
  }
  
-static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_large_int_t step)
+static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
  {
      int  flags, d;
      char buf[22];
@@ -5676,7 +5674,7 @@ void dd_setup_dlb_resource_sharing(t_commrec           gmx_unused *cr,
  
      physicalnode_id_hash = gmx_physicalnode_id_hash();
  
-    gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->nodeid);
+    gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
  
      dd = cr->dd;
  
@@ -6030,13 +6028,13 @@ static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reor
      if (fplog)
      {
          fprintf(fplog,
-                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+                "Domain decomposition rank %d, coordinates %d %d %d\n\n",
                  dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
      }
      if (debug)
      {
          fprintf(debug,
-                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+                "Domain decomposition rank %d, coordinates %d %d %d\n\n",
                  dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
      }
  }
@@ -6147,7 +6145,7 @@ static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_nod
          }
          else if (fplog)
          {
-            fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
+            fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
              fprintf(fplog,
                      "Will not use a Cartesian communicator for PP <-> PME\n\n");
          }
@@ -6184,7 +6182,7 @@ static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_nod
  
          if (fplog)
          {
-            fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
+            fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
                      cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
          }
  
@@ -6211,7 +6209,7 @@ static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_nod
              case ddnoPP_PME:
                  if (fplog)
                  {
-                    fprintf(fplog, "Order of the nodes: PP first, PME last\n");
+                    fprintf(fplog, "Order of the ranks: PP first, PME last\n");
                  }
                  break;
              case ddnoINTERLEAVE:
@@ -6222,7 +6220,7 @@ static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_nod
                   */
                  if (fplog)
                  {
-                    fprintf(fplog, "Interleaving PP and PME nodes\n");
+                    fprintf(fplog, "Interleaving PP and PME ranks\n");
                  }
                  comm->pmenodes = dd_pmenodes(cr);
                  break;
@@ -6252,7 +6250,7 @@ static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_nod
  
      if (fplog)
      {
-        fprintf(fplog, "This is a %s only node\n\n",
+        fprintf(fplog, "This rank does only %s work.\n\n",
                  (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
      }
  }
@@ -6405,7 +6403,7 @@ static int multi_body_bondeds_count(gmx_mtop_t *mtop)
      return n;
  }
  
-static int dd_nst_env(FILE *fplog, const char *env_var, int def)
+static int dd_getenv(FILE *fplog, const char *env_var, int def)
  {
      char *val;
      int   nst;
@@ -6451,7 +6449,7 @@ static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
  
      if (ir->ns_type == ensSIMPLE)
      {
-        gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
+        gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
      }
  
      if (ir->nstlist == 0)
@@ -6635,7 +6633,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
      if (fplog)
      {
          fprintf(fplog,
-                "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
+                "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
      }
  
      snew(dd, 1);
@@ -6648,14 +6646,14 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
      dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
      dd->bScrewPBC = (ir->ePBC == epbcSCREW);
  
-    dd->bSendRecv2      = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
-    comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
-    comm->eFlop         = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
-    recload             = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
-    comm->nstSortCG     = dd_nst_env(fplog, "GMX_DD_SORT", 1);
-    comm->nstDDDump     = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
-    comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
-    comm->DD_debug      = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
+    dd->bSendRecv2      = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
+    comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
+    comm->eFlop         = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
+    recload             = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
+    comm->nstSortCG     = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
+    comm->nstDDDump     = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
+    comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
+    comm->DD_debug      = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
  
      dd->pme_recv_f_alloc = 0;
      dd->pme_recv_f_buf   = NULL;
@@ -6884,13 +6882,13 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
          if (dd->nc[XX] == 0)
          {
              bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
-            sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
+            sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
                      !bC ? "-rdd" : "-rcon",
                      comm->eDLB != edlbNO ? " or -dds" : "",
                      bC ? " or your LINCS settings" : "");
  
              gmx_fatal_collective(FARGS, cr, NULL,
-                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
+                                 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
                                   "%s\n"
                                   "Look in the log file for details on the domain decomposition",
                                   cr->nnodes-cr->npmenodes, limit, buf);
@@ -6901,7 +6899,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
      if (fplog)
      {
          fprintf(fplog,
-                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
+                "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
                  dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
      }
  
@@ -6909,13 +6907,13 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
      if (cr->nnodes - dd->nnodes != cr->npmenodes)
      {
          gmx_fatal_collective(FARGS, cr, NULL,
-                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
+                             "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
                               dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
      }
      if (cr->npmenodes > dd->nnodes)
      {
          gmx_fatal_collective(FARGS, cr, NULL,
-                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
+                             "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
      }
      if (cr->npmenodes > 0)
      {
@@ -6926,7 +6924,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
          comm->npmenodes = dd->nnodes;
      }
  
-    if (EEL_PME(ir->coulombtype))
+    if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
      {
          /* The following choices should match those
           * in comm_cost_est in domdec_setup.c.
@@ -7062,7 +7060,7 @@ static void set_dlb_limits(gmx_domdec_t *dd)
  }
  
  
-static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_large_int_t step)
+static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
  {
      gmx_domdec_t      *dd;
      gmx_domdec_comm_t *comm;
@@ -7220,7 +7218,7 @@ static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
      }
      else
      {
-        set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
+        set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
          fprintf(fplog, "The initial number of communication pulses is:");
          for (d = 0; d < dd->ndim; d++)
          {
@@ -7343,7 +7341,7 @@ static void set_cell_limits_dlb(gmx_domdec_t      *dd,
      }
  
      /* This env var can override npulse */
-    d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
+    d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
      if (d > 0)
      {
          npulse = d;
@@ -7425,7 +7423,7 @@ void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
          snew(comm->dth, comm->nth);
      }
  
-    if (EEL_PME(ir->coulombtype))
+    if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
      {
          init_ddpme(dd, &comm->ddpme[0], 0);
          if (comm->npmedecompdim >= 2)
@@ -7439,7 +7437,7 @@ void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
          if (dd->pme_nodeid >= 0)
          {
              gmx_fatal_collective(FARGS, NULL, dd,
-                                 "Can not have separate PME nodes without PME electrostatics");
+                                 "Can not have separate PME ranks without PME electrostatics");
          }
      }
  
@@ -8725,6 +8723,7 @@ static void set_zones_size(gmx_domdec_t *dd,
                  corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
              }
              /* Apply the triclinic couplings */
+            assert(ddbox->npbcdim <= DIM);
              for (i = YY; i < ddbox->npbcdim; i++)
              {
                  for (j = XX; j < i; j++)
@@ -8879,7 +8878,7 @@ static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
      int i1, i2, i_new;
  
      /* The new indices are not very ordered, so we qsort them */
-    qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
+    gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
  
      /* sort2 is already ordered, so now we can merge the two arrays */
      i1    = 0;
@@ -8992,7 +8991,7 @@ static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
              fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
          }
          /* Determine the order of the charge groups using qsort */
-        qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
+        gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
      }
  
      return ncg_new;
@@ -9256,7 +9255,7 @@ void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
  }
  
  void dd_partition_system(FILE                *fplog,
-                         gmx_large_int_t      step,
+                         gmx_int64_t          step,
                           t_commrec           *cr,
                           gmx_bool             bMasterState,
                           int                  nstglobalcomm,
@@ -9279,7 +9278,7 @@ void dd_partition_system(FILE                *fplog,
      gmx_domdec_comm_t *comm;
      gmx_ddbox_t        ddbox = {0};
      t_block           *cgs_gl;
-    gmx_large_int_t    step_pcoupl;
+    gmx_int64_t        step_pcoupl;
      rvec               cell_ns_x0, cell_ns_x1;
      int                i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
      gmx_bool           bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
@@ -9390,7 +9389,7 @@ void dd_partition_system(FILE                *fplog,
                  if (DDMASTER(dd))
                  {
                      bTurnOnDLB =
-                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
+                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
                      if (debug)
                      {
                          fprintf(debug, "step %s, imb loss %f\n",
@@ -9752,7 +9751,7 @@ void dd_partition_system(FILE                *fplog,
       */
      /* This call also sets the new number of home particles to dd->nat_home */
      atoms2md(top_global, ir,
-             comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
+             comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
  
      /* Now we have the charges we can sort the FE interactions */
      dd_sort_local_top(dd, mdatoms, top_local);
@@ -9760,7 +9759,8 @@ void dd_partition_system(FILE                *fplog,
      if (vsite != NULL)
      {
          /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
-        split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
+        split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
+                                  mdatoms, FALSE, vsite);
      }
  
      if (shellfc)
@@ -9778,10 +9778,14 @@ void dd_partition_system(FILE                *fplog,
  
      if (!(cr->duty & DUTY_PME))
      {
-        /* Send the charges to our PME only node */
-        gmx_pme_send_q(cr, mdatoms->nChargePerturbed,
-                       mdatoms->chargeA, mdatoms->chargeB,
-                       dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
+        /* Send the charges and/or c6/sigmas to our PME only node */
+        gmx_pme_send_parameters(cr,
+                                fr->ic,
+                                mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
+                                mdatoms->chargeA, mdatoms->chargeB,
+                                mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
+                                mdatoms->sigmaA, mdatoms->sigmaB,
+                                dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
      }
  
      if (constr)
@@ -9801,6 +9805,14 @@ void dd_partition_system(FILE                *fplog,
          dd_make_local_rotation_groups(dd, ir->rot);
      }
  
+    if (ir->eSwapCoords != eswapNO)
+    {
+        /* Update the local groups needed for ion swapping */
+        dd_make_local_swap_groups(dd, ir->swap);
+    }
+
+    /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
+    dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
  
      add_dd_statistics(dd);