Further improve getDDGridSetup

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 17 Sep 2019 10:38:05 +0000 (12:38 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Thu, 19 Sep 2019 16:20:18 +0000 (18:20 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 17 Sep 2019 10:38:05 +0000 (12:38 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Thu, 19 Sep 2019 16:20:18 +0000 (18:20 +0200)
diff --git a/src/gromacs/domdec/dlbtiming.h b/src/gromacs/domdec/dlbtiming.h

index 8834e3d3b0dcb6b3d57b215671477c40814b1488..ebd84e6eac4c4bbbf8ecdd61b212ad6319f40b9b 100644 (file)
--- a/src/gromacs/domdec/dlbtiming.h
+++ b/src/gromacs/domdec/dlbtiming.h
@@ -86,7 +86,7 @@ class DDBalanceRegionHandler
      public:
          //! Constructor, pass a pointer to t_commrec or nullptr when not using domain decomposition
          DDBalanceRegionHandler(const t_commrec *cr) :
-            useBalancingRegion_(havePPDomainDecomposition(cr)),
+            useBalancingRegion_(cr != nullptr ? havePPDomainDecomposition(cr) : false),
              dd_(cr != nullptr ? cr->dd : nullptr)
          {
          }
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index 4d7770645e78372299be3427cec3633fc7290d07..1e3c9318586483f5bf1bdcdf7071e1d396a2264f 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -1988,44 +1988,6 @@ static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
      return dlbState;
  }
  
-/* Sets the order of the DD dimensions, returns the number of DD dimensions */
-static int set_dd_dim(const ivec        numDDCells,
-                      const DDSettings &ddSettings,
-                      ivec              dims)
-{
-    int ndim = 0;
-    if (ddSettings.useDDOrderZYX)
-    {
-        /* Decomposition order z,y,x */
-        for (int dim = DIM - 1; dim >= 0; dim--)
-        {
-            if (numDDCells[dim] > 1)
-            {
-                dims[ndim++] = dim;
-            }
-        }
-    }
-    else
-    {
-        /* Decomposition order x,y,z */
-        for (int dim = 0; dim < DIM; dim++)
-        {
-            if (numDDCells[dim] > 1)
-            {
-                dims[ndim++] = dim;
-            }
-        }
-    }
-
-    if (ndim == 0)
-    {
-        /* Set dim[0] to avoid extra checks on ndim in several places */
-        dims[0] = XX;
-    }
-
-    return ndim;
-}
-
  static gmx_domdec_comm_t *init_dd_comm()
  {
      gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
@@ -2167,7 +2129,7 @@ moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
  /*! \brief Generate the simulation system information */
  static DDSystemInfo
  getSystemInfo(const gmx::MDLogger           &mdlog,
-              t_commrec                     *cr,
+              const t_commrec               *cr,
                const DomdecOptions           &options,
                const gmx_mtop_t              *mtop,
                const t_inputrec              *ir,
@@ -2356,77 +2318,38 @@ getSystemInfo(const gmx::MDLogger           &mdlog,
      return systemInfo;
  }
  
-/*! \brief Set the cell size and interaction limits, as well as the DD grid
- *
- * Also computes the initial ddbox.
- */
-static DDGridSetup
-getDDGridSetup(const gmx::MDLogger           &mdlog,
-               t_commrec                     *cr,
-               const DomdecOptions           &options,
-               const DDSettings              &ddSettings,
-               const DDSystemInfo            &systemInfo,
-               const gmx_mtop_t              *mtop,
-               const t_inputrec              *ir,
-               const matrix                   box,
-               gmx::ArrayRef<const gmx::RVec> xGlobal,
-               gmx_ddbox_t                   *ddbox)
+/*! \brief Exit with a fatal error if the DDGridSetup cannot be
+ * implemented. */
+static void
+checkDDGridSetup(const DDGridSetup   &ddGridSetup,
+                 const t_commrec     *cr,
+                 const DomdecOptions &options,
+                 const DDSettings    &ddSettings,
+                 const DDSystemInfo  &systemInfo,
+                 const real           cellsizeLimit,
+                 const gmx_ddbox_t   &ddbox)
  {
-    DDGridSetup ddGridSetup;
-
-    if (options.numCells[XX] > 0)
+    if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
      {
-        copy_ivec(options.numCells, ddGridSetup.numDomains);
-        set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
+        char     buf[STRLEN];
+        gmx_bool bC = (systemInfo.haveSplitConstraints &&
+                       systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
+        sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
+                !bC ? "-rdd" : "-rcon",
+                ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
+                bC ? " or your LINCS settings" : "");
  
-        if (options.numPmeRanks >= 0)
-        {
-            ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
-        }
-        else
-        {
-            /* When the DD grid is set explicitly and -npme is set to auto,
-             * don't use PME ranks. We check later if the DD grid is
-             * compatible with the total number of ranks.
-             */
-            ddGridSetup.numPmeOnlyRanks = 0;
-        }
-    }
-    else
-    {
-        set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
-
-        /* We need to choose the optimal DD grid and possibly PME nodes */
-        ddGridSetup =
-            dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
-                           options.numPmeRanks,
-                           ddSettings.request1DAnd1Pulse,
-                           !isDlbDisabled(ddSettings.initialDlbState),
-                           options.dlbScaling,
-                           systemInfo);
-
-        if (ddGridSetup.numDomains[XX] == 0)
-        {
-            char     buf[STRLEN];
-            gmx_bool bC = (systemInfo.haveSplitConstraints &&
-                           systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
-            sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
-                    !bC ? "-rdd" : "-rcon",
-                    ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
-                    bC ? " or your LINCS settings" : "");
-
-            gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
-                                 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
-                                 "%s\n"
-                                 "Look in the log file for details on the domain decomposition",
-                                 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
-                                 ddGridSetup.cellsizeLimit,
-                                 buf);
-        }
+        gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
+                             "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
+                             "%s\n"
+                             "Look in the log file for details on the domain decomposition",
+                             cr->nnodes - ddGridSetup.numPmeOnlyRanks,
+                             cellsizeLimit,
+                             buf);
      }
  
-    const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
-    if (acs < systemInfo.cellsizeLimit)
+    const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
+    if (acs < cellsizeLimit)
      {
          if (options.numCells[XX] <= 0)
          {
@@ -2436,7 +2359,7 @@ getDDGridSetup(const gmx::MDLogger           &mdlog,
          {
              gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
                                   "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
-                                 acs, systemInfo.cellsizeLimit);
+                                 acs, cellsizeLimit);
          }
      }
  
@@ -2444,7 +2367,7 @@ getDDGridSetup(const gmx::MDLogger           &mdlog,
      if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
      {
          gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
-                             "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
+                             "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
                               numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
      }
      if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
@@ -2452,11 +2375,6 @@ getDDGridSetup(const gmx::MDLogger           &mdlog,
          gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
                               "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
      }
-
-    ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
-                                             ddGridSetup.ddDimensions);
-
-    return ddGridSetup;
  }
  
  /*! \brief Set the cell size and interaction limits, as well as the DD grid */
@@ -2545,6 +2463,7 @@ static void set_dd_limits(const gmx::MDLogger &mdlog,
                            const DDSettings &ddSettings,
                            const DDSystemInfo &systemInfo,
                            const DDGridSetup &ddGridSetup,
+                          const int numPPRanks,
                            const gmx_mtop_t *mtop,
                            const t_inputrec *ir,
                            const gmx_ddbox_t &ddbox)
@@ -2575,7 +2494,7 @@ static void set_dd_limits(const gmx::MDLogger &mdlog,
           *       but that is not yet available here. But this anyhow only
           *       affect performance up to the second dd_partition_system call.
           */
-        const int homeAtomCountEstimate =  mtop->natoms/cr->nnodes;
+        const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
          comm->updateGroupsCog =
              std::make_unique<gmx::UpdateGroupsCog>(*mtop,
                                                     systemInfo.updateGroupingPerMoleculetype,
@@ -3029,7 +2948,7 @@ static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
      else
      {
          vol_frac =
-            (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
+            (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
      }
      if (debug)
      {
@@ -3117,9 +3036,21 @@ gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
  
      DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
  
+    int          numRanksRequested = cr->nnodes;
+    checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks);
+
+    // DD grid setup uses a more conservative cell size limit for
+    // automated setup than the one in systemInfo. The latter is used
+    // later during DLB, for example.
+    const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse,
+                                                                    !isDlbDisabled(ddSettings.initialDlbState),
+                                                                    options.dlbScaling, *ir,
+                                                                    systemInfo.cellsizeLimit);
      gmx_ddbox_t  ddbox       = {0};
-    DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
-                                              mtop, ir, box, xGlobal, &ddbox);
+    DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options, ddSettings, systemInfo,
+                                              gridSetupCellsizeLimit,
+                                              *mtop, *ir, box, xGlobal, &ddbox);
+    checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox);
  
      cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
  
@@ -3143,7 +3074,9 @@ gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
      dd->comm->cartesianRankSetup = cartSetup;
  
      set_dd_limits(mdlog, cr, dd, options,
-                  ddSettings, systemInfo, ddGridSetup,
+                  ddSettings, systemInfo,
+                  ddGridSetup,
+                  ddRankSetup.numPPRanks,
                    mtop, ir,
                    ddbox);
  
diff --git a/src/gromacs/domdec/domdec_internal.h b/src/gromacs/domdec/domdec_internal.h

index 8b707f39779de6afbe20b0baa17907f70091639a..457c65fb62458b70ad1d95db223c9f045eef481e 100644 (file)
--- a/src/gromacs/domdec/domdec_internal.h
+++ b/src/gromacs/domdec/domdec_internal.h
@@ -656,7 +656,10 @@ struct gmx_domdec_comm_t // NOLINT (clang-analyzer-optin.performance.Padding)
  
      /** The coordinate/force communication setup and indices */
      gmx_domdec_comm_dim_t cd[DIM];
-    /** The maximum number of cells to communicate with in one dimension */
+    /** Restricts the maximum number of cells to communicate with in one dimension
+     *
+     * Dynamic load balancing is not permitted to change sizes if it
+     * would violate this restriction. */
      int                   maxpulse = 0;
  
      /** Which cg distribution is stored on the master node,
diff --git a/src/gromacs/domdec/domdec_setup.cpp b/src/gromacs/domdec/domdec_setup.cpp

index 71904ac379b680f2bced670d6c3c166c4866e74e..e685336bd75f2cdaa99bb4eebd51ca4bcf83c721 100644 (file)
--- a/src/gromacs/domdec/domdec_setup.cpp
+++ b/src/gromacs/domdec/domdec_setup.cpp
@@ -53,6 +53,7 @@
  
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/domdec/options.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/math/utilities.h"
@@ -67,7 +68,12 @@
  #include "gromacs/utility/logger.h"
  #include "gromacs/utility/stringutil.h"
  
+#include "box.h"
  #include "domdec_internal.h"
+#include "utility.h"
+
+// TODO remove this when moving domdec into gmx namespace
+using gmx::DomdecOptions;
  
  /*! \brief Margin for setting up the DD grid */
  #define DD_GRID_MARGIN_PRES_SCALE 1.05
@@ -179,9 +185,10 @@ static gmx_bool fits_pp_pme_perf(int ntot, int npme, float ratio)
  
  /*! \brief Make a guess for the number of PME ranks to use. */
  static int guess_npme(const gmx::MDLogger &mdlog,
-                      const gmx_mtop_t *mtop, const t_inputrec *ir,
-                      const matrix box,
-                      int nrank_tot)
+                      const gmx_mtop_t    &mtop,
+                      const t_inputrec    &ir,
+                      const matrix         box,
+                      int                  nrank_tot)
  {
      float      ratio;
      int        npme;
@@ -243,7 +250,7 @@ static int guess_npme(const gmx::MDLogger &mdlog,
      {
          gmx_fatal(FARGS, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
                    "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
-                  ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir->nkx, ir->nky);
+                  ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir.nkx, ir.nky);
      }
      else
      {
@@ -262,7 +269,7 @@ static int div_up(int n, int f)
      return (n + f - 1)/f;
  }
  
-real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox)
+real comm_box_frac(const gmx::IVec &dd_nc, real cutoff, const gmx_ddbox_t &ddbox)
  {
      int  i, j, k;
      rvec nw;
@@ -270,7 +277,7 @@ real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox)
  
      for (i = 0; i < DIM; i++)
      {
-        real bt = ddbox->box_size[i]*ddbox->skew_fac[i];
+        real bt = ddbox.box_size[i]*ddbox.skew_fac[i];
          nw[i] = dd_nc[i]*cutoff/bt;
      }
  
@@ -301,10 +308,10 @@ real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox)
  }
  
  /*! \brief Return whether the DD inhomogeneous in the z direction */
-static gmx_bool inhomogeneous_z(const t_inputrec *ir)
+static gmx_bool inhomogeneous_z(const t_inputrec &ir)
  {
-    return ((EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) &&
-            ir->ePBC == epbcXYZ && ir->ewald_geometry == eewg3DC);
+    return ((EEL_PME(ir.coulombtype) || ir.coulombtype == eelEWALD) &&
+            ir.ePBC == epbcXYZ && ir.ewald_geometry == eewg3DC);
  }
  
  /*! \brief Estimate cost of PME FFT communication
@@ -330,15 +337,15 @@ static float comm_pme_cost_vol(int npme, int a, int b, int c)
  
  /*! \brief Estimate cost of communication for a possible domain decomposition. */
  static float comm_cost_est(real limit, real cutoff,
-                           const matrix box, const gmx_ddbox_t *ddbox,
-                           int natoms, const t_inputrec *ir,
+                           const matrix box, const gmx_ddbox_t &ddbox,
+                           int natoms, const t_inputrec &ir,
                             float pbcdxr,
-                           int npme_tot, ivec nc)
+                           int npme_tot, const gmx::IVec &nc)
  {
-    ivec  npme = {1, 1, 1};
-    int   i, j, nk, overlap;
-    rvec  bt;
-    float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx;
+    gmx::IVec npme = {1, 1, 1};
+    int       i, j, nk, overlap;
+    rvec      bt;
+    float     comm_vol, comm_vol_xf, comm_pme, cost_pbcdx;
      /* This is the cost of a pbc_dx call relative to the cost
       * of communicating the coordinate and force of an atom.
       * This will be machine dependent.
@@ -349,8 +356,8 @@ static float comm_cost_est(real limit, real cutoff,
      float temp;
  
      /* Check the DD algorithm restrictions */
-    if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) ||
-        (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
+    if ((ir.ePBC == epbcXY && ir.nwall < 2 && nc[ZZ] > 1) ||
+        (ir.ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
      {
          return -1;
      }
@@ -360,15 +367,15 @@ static float comm_cost_est(real limit, real cutoff,
          return -1;
      }
  
-    assert(ddbox->npbcdim <= DIM);
+    assert(ddbox.npbcdim <= DIM);
  
      /* Check if the triclinic requirements are met */
      for (i = 0; i < DIM; i++)
      {
-        for (j = i+1; j < ddbox->npbcdim; j++)
+        for (j = i+1; j < ddbox.npbcdim; j++)
          {
-            if (box[j][i] != 0 || ir->deform[j][i] != 0 ||
-                (ir->epc != epcNO && ir->compress[j][i] != 0))
+            if (box[j][i] != 0 || ir.deform[j][i] != 0 ||
+                (ir.epc != epcNO && ir.compress[j][i] != 0))
              {
                  if (nc[j] > 1 && nc[i] == 1)
                  {
@@ -380,15 +387,15 @@ static float comm_cost_est(real limit, real cutoff,
  
      for (i = 0; i < DIM; i++)
      {
-        bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i];
+        bt[i] = ddbox.box_size[i]*ddbox.skew_fac[i];
  
          /* Without PBC and with 2 cells, there are no lower limits on the cell size */
-        if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
+        if (!(i >= ddbox.npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
          {
              return -1;
          }
          /* With PBC, check if the cut-off fits in nc[i]-1 cells */
-        if (i < ddbox->npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff)
+        if (i < ddbox.npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff)
          {
              return -1;
          }
@@ -417,7 +424,7 @@ static float comm_cost_est(real limit, real cutoff,
          }
      }
  
-    if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
+    if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
      {
          /* Check the PME grid restrictions.
           * Currently these can only be invalid here with too few grid lines
@@ -432,7 +439,7 @@ static float comm_cost_est(real limit, real cutoff,
           */
          bool useThreads     = true;
          bool errorsAreFatal = false;
-        if (!gmx_pme_check_restrictions(ir->pme_order, ir->nkx, ir->nky, ir->nkz,
+        if (!gmx_pme_check_restrictions(ir.pme_order, ir.nkx, ir.nky, ir.nkz,
                                          npme_x, useThreads, errorsAreFatal))
          {
              return -1;
@@ -497,28 +504,28 @@ static float comm_cost_est(real limit, real cutoff,
          /* Grid overlap communication */
          if (npme[i] > 1)
          {
-            nk        = (i == 0 ? ir->nkx : ir->nky);
-            overlap   = (nk % npme[i] == 0 ? ir->pme_order-1 : ir->pme_order);
+            nk        = (i == 0 ? ir.nkx : ir.nky);
+            overlap   = (nk % npme[i] == 0 ? ir.pme_order-1 : ir.pme_order);
              temp      = npme[i];
              temp     *= overlap;
-            temp     *= ir->nkx;
-            temp     *= ir->nky;
-            temp     *= ir->nkz;
+            temp     *= ir.nkx;
+            temp     *= ir.nky;
+            temp     *= ir.nkz;
              temp     /= nk;
              comm_pme += temp;
-/* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
+/* Old line comm_pme += npme[i]*overlap*ir.nkx*ir.nky*ir.nkz/nk; */
          }
      }
  
-    comm_pme += comm_pme_cost_vol(npme[YY], ir->nky, ir->nkz, ir->nkx);
-    comm_pme += comm_pme_cost_vol(npme[XX], ir->nkx, ir->nky, ir->nkz);
+    comm_pme += comm_pme_cost_vol(npme[YY], ir.nky, ir.nkz, ir.nkx);
+    comm_pme += comm_pme_cost_vol(npme[XX], ir.nkx, ir.nky, ir.nkz);
  
      /* Add cost of pbc_dx for bondeds */
      cost_pbcdx = 0;
-    if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY))
+    if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir.ePBC != epbcXY))
      {
-        if ((ddbox->tric_dir[XX] && nc[XX] == 1) ||
-            (ddbox->tric_dir[YY] && nc[YY] == 1))
+        if ((ddbox.tric_dir[XX] && nc[XX] == 1) ||
+            (ddbox.tric_dir[YY] && nc[YY] == 1))
          {
              cost_pbcdx = pbcdxr*pbcdx_tric_fac;
          }
@@ -540,17 +547,20 @@ static float comm_cost_est(real limit, real cutoff,
      return 3*natoms*(comm_vol + cost_pbcdx) + comm_pme;
  }
  
-/*! \brief Assign penalty factors to possible domain decompositions, based on the estimated communication costs. */
+/*! \brief Assign penalty factors to possible domain decompositions,
+ * based on the estimated communication costs. */
  static void assign_factors(const real limit, const bool request1D,
                             const real cutoff,
-                           const matrix box, const gmx_ddbox_t *ddbox,
-                           int natoms, const t_inputrec *ir,
+                           const matrix box, const gmx_ddbox_t &ddbox,
+                           int natoms, const t_inputrec &ir,
                             float pbcdxr, int npme,
                             int ndiv, const int *div, const int *mdiv,
-                           ivec ir_try, ivec opt)
+                           gmx::IVec *irTryPtr,
+                           gmx::IVec *opt)
  {
-    int   x, y, i;
-    float ce;
+    int        x, y, i;
+    float      ce;
+    gmx::IVec &ir_try = *irTryPtr;
  
      if (ndiv == 0)
      {
@@ -565,12 +575,12 @@ static void assign_factors(const real limit, const bool request1D,
  
          ce = comm_cost_est(limit, cutoff, box, ddbox,
                             natoms, ir, pbcdxr, npme, ir_try);
-        if (ce >= 0 && (opt[XX] == 0 ||
+        if (ce >= 0 && ((*opt)[XX] == 0 ||
                          ce < comm_cost_est(limit, cutoff, box, ddbox,
                                             natoms, ir, pbcdxr,
-                                           npme, opt)))
+                                           npme, *opt)))
          {
-            copy_ivec(ir_try, opt);
+            *opt = ir_try;
          }
  
          return;
@@ -596,7 +606,7 @@ static void assign_factors(const real limit, const bool request1D,
              /* recurse */
              assign_factors(limit, request1D,
                             cutoff, box, ddbox, natoms, ir, pbcdxr, npme,
-                           ndiv-1, div+1, mdiv+1, ir_try, opt);
+                           ndiv-1, div+1, mdiv+1, irTryPtr, opt);
  
              for (i = 0; i < mdiv[0]-x-y; i++)
              {
@@ -614,38 +624,45 @@ static void assign_factors(const real limit, const bool request1D,
      }
  }
  
-/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
-static real optimize_ncells(const gmx::MDLogger &mdlog,
-                            const int nnodes_tot, const int npme_only,
-                            const bool request1DAnd1Pulse,
-                            const bool bDynLoadBal, const real dlb_scale,
-                            const gmx_mtop_t *mtop,
-                            const matrix box, const gmx_ddbox_t *ddbox,
-                            const t_inputrec *ir,
-                            const DDSystemInfo &systemInfo,
-                            ivec nc)
+/*! \brief Determine the optimal distribution of DD cells for the
+ * simulation system and number of MPI ranks
+ *
+ * \returns The optimal grid cell choice. The latter will contain all
+ *          zeros if no valid cell choice exists. */
+static gmx::IVec
+optimizeDDCells(const gmx::MDLogger &mdlog,
+                const int            numRanksRequested,
+                const int            numPmeOnlyRanks,
+                const real           cellSizeLimit,
+                const bool           request1DAnd1Pulse,
+                const gmx_mtop_t    &mtop,
+                const matrix         box,
+                const gmx_ddbox_t   &ddbox,
+                const t_inputrec    &ir,
+                const DDSystemInfo  &systemInfo)
  {
-    int      npp, npme, d, nmax;
-    double   pbcdxr;
-    real     limit;
-    ivec     itry;
+    double    pbcdxr;
  
-    limit  = systemInfo.cellsizeLimit;
-    if (request1DAnd1Pulse)
-    {
-        limit = std::max(limit, ir->rlist);
-    }
+    const int numPPRanks = numRanksRequested - numPmeOnlyRanks;
  
-    npp = nnodes_tot - npme_only;
-    if (EEL_PME(ir->coulombtype))
-    {
-        npme = (npme_only > 0 ? npme_only : npp);
-    }
-    else
+    GMX_LOG(mdlog.info).appendTextFormatted(
+            "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
+            numPPRanks, cellSizeLimit);
+    if (inhomogeneous_z(ir))
      {
-        npme = 0;
+        GMX_LOG(mdlog.info).appendTextFormatted(
+                "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
+                eewg_names[ir.ewald_geometry]);
      }
  
+
+    // For cost estimates, we need the number of ranks doing PME work,
+    // which is the number of PP ranks when not using separate
+    // PME-only ranks.
+    const int numRanksDoingPmeWork = (EEL_PME(ir.coulombtype) ?
+                                      ((numPmeOnlyRanks > 0) ? numPmeOnlyRanks : numPPRanks) :
+                                      0);
+
      if (systemInfo.haveInterDomainBondeds)
      {
          /* If we can skip PBC for distance calculations in plain-C bondeds,
@@ -653,51 +670,21 @@ static real optimize_ncells(const gmx::MDLogger &mdlog,
           * Here we ignore SIMD bondeds as they always do (fast) PBC.
           */
          count_bonded_distances(mtop, ir, &pbcdxr, nullptr);
-        pbcdxr /= static_cast<double>(mtop->natoms);
+        pbcdxr /= static_cast<double>(mtop.natoms);
      }
      else
      {
          /* Every molecule is a single charge group: no pbc required */
          pbcdxr = 0;
      }
-    /* Add a margin for DLB and/or pressure scaling */
-    if (bDynLoadBal)
-    {
-        if (dlb_scale >= 1.0)
-        {
-            gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
-        }
-        GMX_LOG(mdlog.info).appendTextFormatted(
-                "Scaling the initial minimum size with 1/%g (option -dds) = %g",
-                dlb_scale, 1/dlb_scale);
-        limit /= dlb_scale;
-    }
-    else if (ir->epc != epcNO)
-    {
-        GMX_LOG(mdlog.info).appendTextFormatted(
-                "To account for pressure scaling, scaling the initial minimum size with %g",
-                DD_GRID_MARGIN_PRES_SCALE);
-        limit *= DD_GRID_MARGIN_PRES_SCALE;
-    }
-
-    GMX_LOG(mdlog.info).appendTextFormatted(
-            "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
-            npp, limit);
-
-    if (inhomogeneous_z(ir))
-    {
-        GMX_LOG(mdlog.info).appendTextFormatted(
-                "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
-                eewg_names[ir->ewald_geometry]);
-    }
  
-    if (limit > 0)
+    if (cellSizeLimit > 0)
      {
          std::string maximumCells = "The maximum allowed number of cells is:";
-        for (d = 0; d < DIM; d++)
+        for (int d = 0; d < DIM; d++)
          {
-            nmax = static_cast<int>(ddbox->box_size[d]*ddbox->skew_fac[d]/limit);
-            if (d >= ddbox->npbcdim && nmax < 2)
+            int nmax = static_cast<int>(ddbox.box_size[d]*ddbox.skew_fac[d]/cellSizeLimit);
+            if (d >= ddbox.npbcdim && nmax < 2)
              {
                  nmax = 2;
              }
@@ -715,123 +702,254 @@ static real optimize_ncells(const gmx::MDLogger &mdlog,
          fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr);
      }
  
-    /* Decompose npp in factors */
+    /* Decompose numPPRanks in factors */
      std::vector<int> div;
      std::vector<int> mdiv;
-    factorize(npp, &div, &mdiv);
+    factorize(numPPRanks, &div, &mdiv);
  
-    itry[XX] = 1;
-    itry[YY] = 1;
-    itry[ZZ] = 1;
-    clear_ivec(nc);
-    assign_factors(limit, request1DAnd1Pulse,
-                   systemInfo.cutoff, box, ddbox, mtop->natoms, ir, pbcdxr,
-                   npme, div.size(), div.data(), mdiv.data(), itry, nc);
+    gmx::IVec itry       = { 1, 1, 1 };
+    gmx::IVec numDomains = { 0, 0, 0 };
+    assign_factors(cellSizeLimit, request1DAnd1Pulse,
+                   systemInfo.cutoff, box, ddbox, mtop.natoms, ir, pbcdxr,
+                   numRanksDoingPmeWork, div.size(), div.data(), mdiv.data(), &itry, &numDomains);
  
-    return limit;
+    return numDomains;
  }
  
-DDGridSetup
-dd_choose_grid(const gmx::MDLogger &mdlog,
-               const t_commrec *cr,
-               const t_inputrec *ir,
-               const gmx_mtop_t *mtop,
-               const matrix box, const gmx_ddbox_t *ddbox,
-               const int numPmeRanksRequested,
-               const bool request1DAnd1Pulse,
-               const bool bDynLoadBal, const real dlb_scale,
-               const DDSystemInfo &systemInfo)
+real
+getDDGridSetupCellSizeLimit(const gmx::MDLogger &mdlog,
+                            const bool           request1DAnd1Pulse,
+                            const bool           bDynLoadBal,
+                            const real           dlb_scale,
+                            const t_inputrec    &ir,
+                            const real           systemInfoCellSizeLimit)
  {
-    DDGridSetup ddGridSetup;
+    real cellSizeLimit = systemInfoCellSizeLimit;
+    if (request1DAnd1Pulse)
+    {
+        cellSizeLimit = std::max(cellSizeLimit, ir.rlist);
+    }
  
-    if (MASTER(cr))
+    /* Add a margin for DLB and/or pressure scaling */
+    if (bDynLoadBal)
      {
-        int64_t nnodes_div = cr->nnodes;
-        if (EEL_PME(ir->coulombtype))
+        if (dlb_scale >= 1.0)
          {
-            if (numPmeRanksRequested > 0)
-            {
-                if (numPmeRanksRequested >= cr->nnodes)
-                {
-                    gmx_fatal(FARGS,
-                              "Cannot have %d separate PME ranks with just %d total ranks",
-                              numPmeRanksRequested, cr->nnodes);
-                }
-
-                /* If the user purposely selected the number of PME nodes,
-                 * only check for large primes in the PP node count.
-                 */
-                nnodes_div -= numPmeRanksRequested;
-            }
+            gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
          }
-        else
+        GMX_LOG(mdlog.info).appendTextFormatted(
+                "Scaling the initial minimum size with 1/%g (option -dds) = %g",
+                dlb_scale, 1/dlb_scale);
+        cellSizeLimit /= dlb_scale;
+    }
+    else if (ir.epc != epcNO)
+    {
+        GMX_LOG(mdlog.info).appendTextFormatted(
+                "To account for pressure scaling, scaling the initial minimum size with %g",
+                DD_GRID_MARGIN_PRES_SCALE);
+        cellSizeLimit *= DD_GRID_MARGIN_PRES_SCALE;
+    }
+
+    return cellSizeLimit;
+}
+void
+checkForValidRankCountRequests(const int  numRanksRequested,
+                               const bool usingPme,
+                               const int  numPmeRanksRequested)
+{
+    int numPPRanksRequested = numRanksRequested;
+    if (usingPme)
+    {
+        numPPRanksRequested -= numPmeRanksRequested;
+        if (numPmeRanksRequested > numPPRanksRequested)
          {
-            ddGridSetup.numPmeOnlyRanks = 0;
+            gmx_fatal(FARGS,
+                      "Cannot have %d separate PME ranks with only %d PP ranks, choose fewer or no separate PME ranks",
+                      numPmeRanksRequested, numPPRanksRequested);
          }
+    }
  
-        if (nnodes_div > 12)
+    // Once the rank count is large enough, it becomes worth
+    // suggesting improvements to the user.
+    const int minPPRankCountToCheckForLargePrimeFactors = 13;
+    if (numPPRanksRequested >= minPPRankCountToCheckForLargePrimeFactors)
+    {
+        const int largestDivisor = largest_divisor(numPPRanksRequested);
+        /* Check if the largest divisor is more than numPPRanks ^ (2/3) */
+        if (largestDivisor*largestDivisor*largestDivisor >
+            numPPRanksRequested*numPPRanksRequested)
          {
-            const int64_t ldiv = largest_divisor(nnodes_div);
-            /* Check if the largest divisor is more than nnodes^2/3 */
-            if (ldiv*ldiv*ldiv > nnodes_div*nnodes_div)
-            {
-                gmx_fatal(FARGS, "The number of ranks you selected (%" PRId64 ") contains a large prime factor %" PRId64 ". In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
-                          nnodes_div, ldiv);
-            }
+            gmx_fatal(FARGS, "The number of ranks selected for particle-particle work (%d) "
+                      "contains a large prime factor %d. In most cases this will lead to "
+                      "bad performance. Choose a number with smaller prime factors or "
+                      "set the decomposition (option -dd) manually.",
+                      numPPRanksRequested, largestDivisor);
          }
+    }
+}
+
+/*! \brief Return the number of PME-only ranks used by the simulation
+ *
+ * If the user did not choose a number, then decide for them. */
+static int
+getNumPmeOnlyRanksToUse(const gmx::MDLogger &mdlog,
+                        const DomdecOptions &options,
+                        const gmx_mtop_t    &mtop,
+                        const t_inputrec    &ir,
+                        const matrix         box,
+                        const int            numRanksRequested)
+{
+    int         numPmeOnlyRanks;
+    const char *extraMessage = "";
  
-        if (EEL_PME(ir->coulombtype))
+    if (options.numCells[XX] > 0)
+    {
+        if (options.numPmeRanks >= 0)
+        {
+            // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
+            numPmeOnlyRanks = options.numPmeRanks;
+        }
+        else
+        {
+            // When the DD grid is set explicitly and -npme is set to auto,
+            // don't use PME ranks. We check later if the DD grid is
+            // compatible with the total number of ranks.
+            numPmeOnlyRanks = 0;
+        }
+    }
+    else
+    {
+        if (!EEL_PME(ir.coulombtype))
+        {
+            // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
+            numPmeOnlyRanks = 0;
+        }
+        else
          {
-            if (numPmeRanksRequested < 0)
+            if (options.numPmeRanks >= 0)
+            {
+                numPmeOnlyRanks = options.numPmeRanks;
+            }
+            else
              {
-                /* Use PME nodes when the number of nodes is more than 16 */
-                if (cr->nnodes <= 18)
+                // Controls the automated choice of when to use separate PME-only ranks.
+                const int minRankCountToDefaultToSeparatePmeRanks = 19;
+
+                if (numRanksRequested < minRankCountToDefaultToSeparatePmeRanks)
                  {
-                    ddGridSetup.numPmeOnlyRanks = 0;
-                    GMX_LOG(mdlog.info).appendTextFormatted(
-                            "Using %d separate PME ranks, as there are too few total\n"
-                            " ranks for efficient splitting",
-                            ddGridSetup.numPmeOnlyRanks);
+                    numPmeOnlyRanks = 0;
+                    extraMessage    = ", as there are too few total\n"
+                        " ranks for efficient splitting";
                  }
                  else
                  {
-                    ddGridSetup.numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, cr->nnodes);
-                    GMX_LOG(mdlog.info).appendTextFormatted(
-                            "Using %d separate PME ranks, as guessed by mdrun",
-                            ddGridSetup.numPmeOnlyRanks);
+                    numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, numRanksRequested);
+                    extraMessage    = ", as guessed by mdrun";
                  }
              }
-            else
+        }
+
+    }
+    GMX_RELEASE_ASSERT(numPmeOnlyRanks <= numRanksRequested,
+                       "Cannot have more PME ranks than total ranks");
+    if (EEL_PME(ir.coulombtype))
+    {
+        GMX_LOG(mdlog.info).appendTextFormatted
+            ("Using %d separate PME ranks%s", numPmeOnlyRanks, extraMessage);
+    }
+
+    return numPmeOnlyRanks;
+}
+
+/*! \brief Sets the order of the DD dimensions, returns the number of DD dimensions */
+static int set_dd_dim(const gmx::IVec  &numDDCells,
+                      const DDSettings &ddSettings,
+                      ivec             *dims)
+{
+    int ndim = 0;
+    if (ddSettings.useDDOrderZYX)
+    {
+        /* Decomposition order z,y,x */
+        for (int dim = DIM - 1; dim >= 0; dim--)
+        {
+            if (numDDCells[dim] > 1)
+            {
+                (*dims)[ndim++] = dim;
+            }
+        }
+    }
+    else
+    {
+        /* Decomposition order x,y,z */
+        for (int dim = 0; dim < DIM; dim++)
+        {
+            if (numDDCells[dim] > 1)
              {
-                /* We checked above that nPmeRanks is a valid number */
-                ddGridSetup.numPmeOnlyRanks = numPmeRanksRequested;
-                GMX_LOG(mdlog.info).appendTextFormatted(
-                        "Using %d separate PME ranks", ddGridSetup.numPmeOnlyRanks);
-                // TODO: there was a ", per user request" note here, but it's not correct anymore,
-                // as with GPUs decision about nPmeRanks can be made in runner() as well.
-                // Consider a single spot for setting nPmeRanks.
+                (*dims)[ndim++] = dim;
              }
          }
+    }
  
-        ddGridSetup.cellsizeLimit =
-            optimize_ncells(mdlog, cr->nnodes, ddGridSetup.numPmeOnlyRanks,
-                            request1DAnd1Pulse,
-                            bDynLoadBal, dlb_scale,
-                            mtop, box, ddbox, ir,
-                            systemInfo,
-                            ddGridSetup.numDomains);
+    if (ndim == 0)
+    {
+        /* Set dim[0] to avoid extra checks on ndim in several places */
+        (*dims)[0] = XX;
      }
  
-    /* Communicate the information set by the master to all nodes */
-    gmx_bcast(sizeof(ddGridSetup.numDomains), ddGridSetup.numDomains, cr);
-    if (EEL_PME(ir->coulombtype))
+    return ndim;
+}
+
+DDGridSetup
+getDDGridSetup(const gmx::MDLogger           &mdlog,
+               const t_commrec               *cr,
+               const int                      numRanksRequested,
+               const DomdecOptions           &options,
+               const DDSettings              &ddSettings,
+               const DDSystemInfo            &systemInfo,
+               const real                     cellSizeLimit,
+               const gmx_mtop_t              &mtop,
+               const t_inputrec              &ir,
+               const matrix                   box,
+               gmx::ArrayRef<const gmx::RVec> xGlobal,
+               gmx_ddbox_t                   *ddbox)
+{
+    int       numPmeOnlyRanks = getNumPmeOnlyRanksToUse(mdlog, options, mtop, ir, box, numRanksRequested);
+
+    gmx::IVec numDomains;
+    if (options.numCells[XX] > 0)
      {
-        gmx_bcast(sizeof(ddGridSetup.numPmeOnlyRanks), &ddGridSetup.numPmeOnlyRanks, cr);
+        numDomains = gmx::IVec(options.numCells);
+        const ivec numDomainsLegacyIvec = { numDomains[XX], numDomains[YY], numDomains[ZZ] };
+        set_ddbox_cr(*cr, &numDomainsLegacyIvec, ir, box, xGlobal, ddbox);
      }
      else
      {
-        ddGridSetup.numPmeOnlyRanks = 0;
+        set_ddbox_cr(*cr, nullptr, ir, box, xGlobal, ddbox);
+
+        if (MASTER(cr))
+        {
+            numDomains =
+                optimizeDDCells(mdlog, numRanksRequested, numPmeOnlyRanks,
+                                cellSizeLimit,
+                                ddSettings.request1DAnd1Pulse,
+                                mtop, box, *ddbox, ir,
+                                systemInfo);
+        }
+    }
+
+    /* Communicate the information set by the master to all ranks */
+    gmx_bcast(sizeof(numDomains), numDomains, cr);
+    if (EEL_PME(ir.coulombtype))
+    {
+        gmx_bcast(sizeof(numPmeOnlyRanks), &numPmeOnlyRanks, cr);
      }
  
+    DDGridSetup ddGridSetup;
+    ddGridSetup.numPmeOnlyRanks = numPmeOnlyRanks;
+    ddGridSetup.numDomains[XX]  = numDomains[XX];
+    ddGridSetup.numDomains[YY]  = numDomains[YY];
+    ddGridSetup.numDomains[ZZ]  = numDomains[ZZ];
+    ddGridSetup.numDDDimensions = set_dd_dim(numDomains, ddSettings, &ddGridSetup.ddDimensions);
+
      return ddGridSetup;
  }
diff --git a/src/gromacs/domdec/domdec_setup.h b/src/gromacs/domdec/domdec_setup.h

index f4f4dc7e5e249cc9e4fffa4e263811e7214cba07..feb0a73f004ffa8a35c9130876da1eaa41df00fa 100644 (file)
--- a/src/gromacs/domdec/domdec_setup.h
+++ b/src/gromacs/domdec/domdec_setup.h
@@ -44,6 +44,7 @@
  
  #include "gromacs/math/vec.h"
  
+struct DDSettings;
  struct DDSystemInfo;
  struct gmx_ddbox_t;
  struct gmx_mtop_t;
@@ -52,11 +53,13 @@ struct t_inputrec;
  
  namespace gmx
  {
+struct DomdecOptions;
  class MDLogger;
+template <typename T> class ArrayRef;
  } // namespace
  
  /*! \brief Returns the volume fraction of the system that is communicated */
-real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox);
+real comm_box_frac(const gmx::IVec &dd_nc, real cutoff, const gmx_ddbox_t &ddbox);
  
  /*! \internal
   * \brief Describes the DD grid setup
@@ -70,26 +73,49 @@ struct DDGridSetup
      int  numPmeOnlyRanks = 0;
      //! The number of domains along each dimension
      ivec numDomains      = { 0, 0, 0 };
-    //! The mininum required cell size in nm
-    real cellsizeLimit   = 0;
      //! The number of dimensions which we decompose in domains
      int  numDDDimensions = 0;
      //! The domain decomposition dimensions, the first numDDDimensions entries are used
      ivec ddDimensions    = { -1, -1, -1 };
  };
  
-/*! \brief Determines the optimal DD cell setup dd->nc and possibly npmenodes
- * for the system.
+/*! \brief Checks that requests for PP and PME ranks honor basic expectations
+ *
+ * Issues a fatal error if there are more PME ranks than PP, or if the
+ * count of PP ranks has a prime factor that is too large to be likely
+ * to have good performance. */
+void
+checkForValidRankCountRequests(int  numRanksRequested,
+                               bool usingPme,
+                               int  numPmeRanksRequested);
+
+/*! \brief Return the minimum cell size (in nm) required for DD */
+real
+getDDGridSetupCellSizeLimit(const gmx::MDLogger &mdlog,
+                            bool                 request1DAnd1Pulse,
+                            bool                 bDynLoadBal,
+                            real                 dlb_scale,
+                            const t_inputrec    &ir,
+                            real                 systemInfoCellSizeLimit);
+
+/*! \brief Determines the DD grid setup
+ *
+ * Either implements settings required by the user, or otherwise
+ * chooses estimated optimal number of separate PME ranks and DD grid
+ * cell setup, DD cell size limits, and the initial ddbox.
   */
  DDGridSetup
-dd_choose_grid(const gmx::MDLogger &mdlog,
-               const t_commrec *cr,
-               const t_inputrec *ir,
-               const gmx_mtop_t *mtop,
-               const matrix box, const gmx_ddbox_t *ddbox,
-               int numPmeRanksRequested,
-               bool request1DAnd1Pulse,
-               bool bDynLoadBal, real dlb_scale,
-               const DDSystemInfo &systemInfo);
+getDDGridSetup(const gmx::MDLogger           &mdlog,
+               const t_commrec               *cr,
+               int                            numRanksRequested,
+               const gmx::DomdecOptions      &options,
+               const DDSettings              &ddSettings,
+               const DDSystemInfo            &systemInfo,
+               real                           cellSizeLimit,
+               const gmx_mtop_t              &mtop,
+               const t_inputrec              &ir,
+               const matrix                   box,
+               gmx::ArrayRef<const gmx::RVec> xGlobal,
+               gmx_ddbox_t                   *ddbox);
  
  #endif
diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp

index ffdc4da528b5e8bb86fcc35a4cf64fab24e3147d..a3a2c3f05fb7d78c3c75cfb3239d8dfe694b2a45 100644 (file)
--- a/src/gromacs/gmxpreprocess/grompp.cpp
+++ b/src/gromacs/gmxpreprocess/grompp.cpp
@@ -2377,7 +2377,7 @@ int gmx_grompp(int argc, char *argv[])
  
      if (EEL_PME(ir->coulombtype))
      {
-        float ratio = pme_load_estimate(&sys, ir, state.box);
+        float ratio = pme_load_estimate(sys, *ir, state.box);
          fprintf(stderr, "Estimate for the relative computational load of the PME mesh part: %.2f\n", ratio);
          /* With free energy we might need to do PME both for the A and B state
           * charges. This will double the cost, but the optimal performance will
diff --git a/src/gromacs/mdlib/perf_est.cpp b/src/gromacs/mdlib/perf_est.cpp

index a66b119536f4845868e91df0fb042c27ff6b5185..5ba7a740d524eb9e1cbaf327c761690e8cdb0414 100644 (file)
--- a/src/gromacs/mdlib/perf_est.cpp
+++ b/src/gromacs/mdlib/perf_est.cpp
@@ -153,7 +153,7 @@ static double simd_cycle_factor(gmx_bool bUseSIMD)
      return simd_cycle_no_simd/speedup;
  }
  
-void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
+void count_bonded_distances(const gmx_mtop_t &mtop, const t_inputrec &ir,
                              double *ndistance_c, double *ndistance_simd)
  {
      gmx_bool       bExcl;
@@ -166,25 +166,25 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
      gmx_bool       bSimdBondeds = FALSE;
  #endif
  
-    bExcl = (ir->cutoff_scheme == ecutsGROUP && inputrecExclForces(ir)
-             && !EEL_FULL(ir->coulombtype));
+    bExcl = (ir.cutoff_scheme == ecutsGROUP && inputrecExclForces(&ir)
+             && !EEL_FULL(ir.coulombtype));
  
      if (bSimdBondeds)
      {
          /* We only have SIMD versions of these bondeds without energy and
           * without shift-forces, we take that into account here.
           */
-        if (ir->nstcalcenergy > 0)
+        if (ir.nstcalcenergy > 0)
          {
-            nonsimd_step_frac = 1.0/ir->nstcalcenergy;
+            nonsimd_step_frac = 1.0/ir.nstcalcenergy;
          }
          else
          {
              nonsimd_step_frac = 0;
          }
-        if (ir->epc != epcNO && 1.0/ir->nstpcouple > nonsimd_step_frac)
+        if (ir.epc != epcNO && 1.0/ir.nstpcouple > nonsimd_step_frac)
          {
-            nonsimd_step_frac = 1.0/ir->nstpcouple;
+            nonsimd_step_frac = 1.0/ir.nstpcouple;
          }
      }
      else
@@ -197,9 +197,9 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
       */
      ndtot_c    = 0;
      ndtot_simd = 0;
-    for (const gmx_molblock_t &molb : mtop->molblock)
+    for (const gmx_molblock_t &molb : mtop.molblock)
      {
-        const gmx_moltype_t *molt = &mtop->moltype[molb.type];
+        const gmx_moltype_t *molt = &mtop.moltype[molb.type];
          for (ftype = 0; ftype < F_NRE; ftype++)
          {
              int nbonds;
@@ -259,7 +259,7 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
      }
  }
  
-static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
+static void pp_verlet_load(const gmx_mtop_t &mtop, const t_inputrec &ir,
                             const matrix box,
                             int *nq_tot, int *nlj_tot,
                             double *cost_pp,
@@ -280,17 +280,17 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
      const real     nbnxn_refkernel_fac = 8.0;
  #endif
  
-    bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
+    bQRF = (EEL_RF(ir.coulombtype) || ir.coulombtype == eelCUT);
  
-    gmx::ArrayRef<const t_iparams> iparams = mtop->ffparams.iparams;
-    atnr              = mtop->ffparams.atnr;
+    gmx::ArrayRef<const t_iparams> iparams = mtop.ffparams.iparams;
+    atnr              = mtop.ffparams.atnr;
      nqlj              = 0;
      nq                = 0;
      *bChargePerturbed = FALSE;
      *bTypePerturbed   = FALSE;
-    for (const gmx_molblock_t &molb : mtop->molblock)
+    for (const gmx_molblock_t &molb : mtop.molblock)
      {
-        const gmx_moltype_t *molt = &mtop->moltype[molb.type];
+        const gmx_moltype_t *molt = &mtop.moltype[molb.type];
          const t_atom        *atom = molt->atoms.atom;
          for (a = 0; a < molt->atoms.nr; a++)
          {
@@ -317,7 +317,7 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
          }
      }
  
-    nlj = mtop->natoms - nqlj - nq;
+    nlj = mtop.natoms - nqlj - nq;
  
      *nq_tot  = nqlj + nq;
      *nlj_tot = nqlj + nlj;
@@ -331,27 +331,27 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
  #else
      j_cluster_size = 4;
  #endif
-    r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop->natoms/det(box));
+    r_eff = ir.rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop.natoms/det(box));
  
      /* The average number of pairs per atom */
-    nppa  = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop->natoms/det(box);
+    nppa  = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop.natoms/det(box);
  
      if (debug)
      {
          fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f pairs per atom %.1f\n",
-                nqlj, nq, nlj, ir->rlist, r_eff, nppa);
+                nqlj, nq, nlj, ir.rlist, r_eff, nppa);
      }
  
      /* Determine the cost per pair interaction */
      c_qlj = (bQRF ? c_nbnxn_qrf_lj : c_nbnxn_qexp_lj);
      c_q   = (bQRF ? c_nbnxn_qrf    : c_nbnxn_qexp);
      c_lj  = c_nbnxn_lj;
-    if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype))
+    if (ir.vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir.vdwtype))
      {
          c_qlj += c_nbnxn_ljexp_add;
          c_lj  += c_nbnxn_ljexp_add;
      }
-    if (EVDW_PME(ir->vdwtype) && ir->ljpme_combination_rule == eljpmeLB)
+    if (EVDW_PME(ir.vdwtype) && ir.ljpme_combination_rule == eljpmeLB)
      {
          /* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */
          c_qlj *= nbnxn_refkernel_fac;
@@ -367,7 +367,7 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
      *cost_pp *= simd_cycle_factor(bHaveSIMD);
  }
  
-float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir,
+float pme_load_estimate(const gmx_mtop_t &mtop, const t_inputrec &ir,
                          const matrix box)
  {
      int            nq_tot, nlj_tot;
@@ -401,31 +401,31 @@ float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir,
      cost_solve  = 0;
  
      int gridNkzFactor = int{
-        (ir->nkz + 1)/2
+        (ir.nkz + 1)/2
      };
-    if (EEL_PME(ir->coulombtype))
+    if (EEL_PME(ir.coulombtype))
      {
-        double grid = ir->nkx*ir->nky*gridNkzFactor;
+        double grid = ir.nkx*ir.nky*gridNkzFactor;
  
-        int    f     = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
+        int    f     = ((ir.efep != efepNO && bChargePerturbed) ? 2 : 1);
          cost_redist +=   c_pme_redist*nq_tot;
-        cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir->pme_order);
+        cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir.pme_order);
          cost_fft    += f*c_pme_fft*grid*std::log(grid)/std::log(2.0);
          cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
      }
  
-    if (EVDW_PME(ir->vdwtype))
+    if (EVDW_PME(ir.vdwtype))
      {
-        double grid = ir->nkx*ir->nky*gridNkzFactor;
+        double grid = ir.nkx*ir.nky*gridNkzFactor;
  
-        int    f     = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
-        if (ir->ljpme_combination_rule == eljpmeLB)
+        int    f     = ((ir.efep != efepNO && bTypePerturbed) ? 2 : 1);
+        if (ir.ljpme_combination_rule == eljpmeLB)
          {
              /* LB combination rule: we have 7 mesh terms */
              f       *= 7;
          }
          cost_redist +=   c_pme_redist*nlj_tot;
-        cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir->pme_order);
+        cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir.pme_order);
          cost_fft    += f*c_pme_fft*2*grid*std::log(grid)/std::log(2.0);
          cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
      }
diff --git a/src/gromacs/mdlib/perf_est.h b/src/gromacs/mdlib/perf_est.h

index de9f9abca1a6464b4c5aab94e485fa6238fee632..16fad2a057e87e10a34fb57108082d0933ca83be 100644 (file)
--- a/src/gromacs/mdlib/perf_est.h
+++ b/src/gromacs/mdlib/perf_est.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2010,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2010,2014,2015,2016,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -43,7 +43,7 @@
  struct gmx_mtop_t;
  struct t_inputrec;
  
-void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
+void count_bonded_distances(const gmx_mtop_t &mtop, const t_inputrec &ir,
                              double *ndistance_c, double *ndistance_simd);
  /* Count the number of distance calculations in bonded interactions,
   * separately for plain-C and SIMD bonded functions.
@@ -51,7 +51,7 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
   * It is allowed to pass NULL for the last two arguments.
   */
  
-float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir,
+float pme_load_estimate(const gmx_mtop_t &mtop, const t_inputrec &ir,
                          const matrix box);
  /* Returns an estimate for the relative load of the PME mesh calculation
   * in the total force calculation.
diff --git a/src/gromacs/mdtypes/commrec.h b/src/gromacs/mdtypes/commrec.h

index 9eeb07d79fe0e0e6b6251aa885097acd95ae7d1c..b8c68de92d509d284557a0ba57ed570a2ca4cf2a 100644 (file)
--- a/src/gromacs/mdtypes/commrec.h
+++ b/src/gromacs/mdtypes/commrec.h
@@ -150,8 +150,9 @@ static bool inline havePPDomainDecomposition(const t_commrec *cr)
      /* NOTE: It would be better to use cr->dd->nnodes, but we do not want
       *       to pull in a dependency on domdec.h into this file.
       */
-    return (cr != nullptr &&
-            cr->dd != nullptr &&
+    GMX_ASSERT(cr != nullptr, "Invalid call of havePPDomainDecomposition before commrec is made");
+    GMX_ASSERT(cr->npmenodes >= 0, "Invalid call of havePPDomainDecomposition before MPMD automated decomposition was chosen.");
+    return (cr->dd != nullptr &&
              cr->nnodes - cr->npmenodes > 1);
  }
  
diff --git a/src/gromacs/tools/tune_pme.cpp b/src/gromacs/tools/tune_pme.cpp

index 5771c257246aa05acd3c00506067175e1da1e1f0..3d9ab118d78eb983251f86452e75fc25b06047b6 100644 (file)
--- a/src/gromacs/tools/tune_pme.cpp
+++ b/src/gromacs/tools/tune_pme.cpp
@@ -2019,7 +2019,7 @@ static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb)
      *rcoulomb = ir->rcoulomb;
  
      /* Return the estimate for the number of PME nodes */
-    float npme = pme_load_estimate(&mtop, ir, state.box);
+    float npme = pme_load_estimate(mtop, *ir, state.box);
      return npme;
  }
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 17 Sep 2019 10:38:05 +0000 (12:38 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 19 Sep 2019 16:20:18 +0000 (18:20 +0200)
src/gromacs/domdec/dlbtiming.h		patch \| blob \| history
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/domdec/domdec_internal.h		patch \| blob \| history
src/gromacs/domdec/domdec_setup.cpp		patch \| blob \| history
src/gromacs/domdec/domdec_setup.h		patch \| blob \| history
src/gromacs/gmxpreprocess/grompp.cpp		patch \| blob \| history
src/gromacs/mdlib/perf_est.cpp		patch \| blob \| history
src/gromacs/mdlib/perf_est.h		patch \| blob \| history
src/gromacs/mdtypes/commrec.h		patch \| blob \| history
src/gromacs/tools/tune_pme.cpp		patch \| blob \| history