return dlbState;
}
-/* Sets the order of the DD dimensions, returns the number of DD dimensions */
-static int set_dd_dim(const ivec numDDCells,
- const DDSettings &ddSettings,
- ivec dims)
-{
- int ndim = 0;
- if (ddSettings.useDDOrderZYX)
- {
- /* Decomposition order z,y,x */
- for (int dim = DIM - 1; dim >= 0; dim--)
- {
- if (numDDCells[dim] > 1)
- {
- dims[ndim++] = dim;
- }
- }
- }
- else
- {
- /* Decomposition order x,y,z */
- for (int dim = 0; dim < DIM; dim++)
- {
- if (numDDCells[dim] > 1)
- {
- dims[ndim++] = dim;
- }
- }
- }
-
- if (ndim == 0)
- {
- /* Set dim[0] to avoid extra checks on ndim in several places */
- dims[0] = XX;
- }
-
- return ndim;
-}
-
static gmx_domdec_comm_t *init_dd_comm()
{
gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
/*! \brief Generate the simulation system information */
static DDSystemInfo
getSystemInfo(const gmx::MDLogger &mdlog,
- t_commrec *cr,
+ const t_commrec *cr,
const DomdecOptions &options,
const gmx_mtop_t *mtop,
const t_inputrec *ir,
return systemInfo;
}
-/*! \brief Set the cell size and interaction limits, as well as the DD grid
- *
- * Also computes the initial ddbox.
- */
-static DDGridSetup
-getDDGridSetup(const gmx::MDLogger &mdlog,
- t_commrec *cr,
- const DomdecOptions &options,
- const DDSettings &ddSettings,
- const DDSystemInfo &systemInfo,
- const gmx_mtop_t *mtop,
- const t_inputrec *ir,
- const matrix box,
- gmx::ArrayRef<const gmx::RVec> xGlobal,
- gmx_ddbox_t *ddbox)
+/*! \brief Exit with a fatal error if the DDGridSetup cannot be
+ * implemented. */
+static void
+checkDDGridSetup(const DDGridSetup &ddGridSetup,
+ const t_commrec *cr,
+ const DomdecOptions &options,
+ const DDSettings &ddSettings,
+ const DDSystemInfo &systemInfo,
+ const real cellsizeLimit,
+ const gmx_ddbox_t &ddbox)
{
- DDGridSetup ddGridSetup;
-
- if (options.numCells[XX] > 0)
+ if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
{
- copy_ivec(options.numCells, ddGridSetup.numDomains);
- set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
+ char buf[STRLEN];
+ gmx_bool bC = (systemInfo.haveSplitConstraints &&
+ systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
+ sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
+ !bC ? "-rdd" : "-rcon",
+ ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
+ bC ? " or your LINCS settings" : "");
- if (options.numPmeRanks >= 0)
- {
- ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
- }
- else
- {
- /* When the DD grid is set explicitly and -npme is set to auto,
- * don't use PME ranks. We check later if the DD grid is
- * compatible with the total number of ranks.
- */
- ddGridSetup.numPmeOnlyRanks = 0;
- }
- }
- else
- {
- set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
-
- /* We need to choose the optimal DD grid and possibly PME nodes */
- ddGridSetup =
- dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
- options.numPmeRanks,
- ddSettings.request1DAnd1Pulse,
- !isDlbDisabled(ddSettings.initialDlbState),
- options.dlbScaling,
- systemInfo);
-
- if (ddGridSetup.numDomains[XX] == 0)
- {
- char buf[STRLEN];
- gmx_bool bC = (systemInfo.haveSplitConstraints &&
- systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
- sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
- !bC ? "-rdd" : "-rcon",
- ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
- bC ? " or your LINCS settings" : "");
-
- gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
- "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
- "%s\n"
- "Look in the log file for details on the domain decomposition",
- cr->nnodes - ddGridSetup.numPmeOnlyRanks,
- ddGridSetup.cellsizeLimit,
- buf);
- }
+ gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
+ "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
+ "%s\n"
+ "Look in the log file for details on the domain decomposition",
+ cr->nnodes - ddGridSetup.numPmeOnlyRanks,
+ cellsizeLimit,
+ buf);
}
- const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
- if (acs < systemInfo.cellsizeLimit)
+ const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
+ if (acs < cellsizeLimit)
{
if (options.numCells[XX] <= 0)
{
{
gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
"The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
- acs, systemInfo.cellsizeLimit);
+ acs, cellsizeLimit);
}
}
if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
{
gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
- "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
+ "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
}
if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
"The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
}
-
- ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
- ddGridSetup.ddDimensions);
-
- return ddGridSetup;
}
/*! \brief Set the cell size and interaction limits, as well as the DD grid */
const DDSettings &ddSettings,
const DDSystemInfo &systemInfo,
const DDGridSetup &ddGridSetup,
+ const int numPPRanks,
const gmx_mtop_t *mtop,
const t_inputrec *ir,
const gmx_ddbox_t &ddbox)
* but that is not yet available here. But this anyhow only
* affect performance up to the second dd_partition_system call.
*/
- const int homeAtomCountEstimate = mtop->natoms/cr->nnodes;
+ const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
comm->updateGroupsCog =
std::make_unique<gmx::UpdateGroupsCog>(*mtop,
systemInfo.updateGroupingPerMoleculetype,
else
{
vol_frac =
- (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
+ (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
}
if (debug)
{
DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
+ int numRanksRequested = cr->nnodes;
+ checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks);
+
+ // DD grid setup uses a more conservative cell size limit for
+ // automated setup than the one in systemInfo. The latter is used
+ // later during DLB, for example.
+ const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse,
+ !isDlbDisabled(ddSettings.initialDlbState),
+ options.dlbScaling, *ir,
+ systemInfo.cellsizeLimit);
gmx_ddbox_t ddbox = {0};
- DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
- mtop, ir, box, xGlobal, &ddbox);
+ DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options, ddSettings, systemInfo,
+ gridSetupCellsizeLimit,
+ *mtop, *ir, box, xGlobal, &ddbox);
+ checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox);
cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
dd->comm->cartesianRankSetup = cartSetup;
set_dd_limits(mdlog, cr, dd, options,
- ddSettings, systemInfo, ddGridSetup,
+ ddSettings, systemInfo,
+ ddGridSetup,
+ ddRankSetup.numPPRanks,
mtop, ir,
ddbox);
#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/domdec/options.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/math/utilities.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/stringutil.h"
+#include "box.h"
#include "domdec_internal.h"
+#include "utility.h"
+
+// TODO remove this when moving domdec into gmx namespace
+using gmx::DomdecOptions;
/*! \brief Margin for setting up the DD grid */
#define DD_GRID_MARGIN_PRES_SCALE 1.05
/*! \brief Make a guess for the number of PME ranks to use. */
static int guess_npme(const gmx::MDLogger &mdlog,
- const gmx_mtop_t *mtop, const t_inputrec *ir,
- const matrix box,
- int nrank_tot)
+ const gmx_mtop_t &mtop,
+ const t_inputrec &ir,
+ const matrix box,
+ int nrank_tot)
{
float ratio;
int npme;
{
gmx_fatal(FARGS, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
"Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
- ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir->nkx, ir->nky);
+ ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir.nkx, ir.nky);
}
else
{
return (n + f - 1)/f;
}
-real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox)
+real comm_box_frac(const gmx::IVec &dd_nc, real cutoff, const gmx_ddbox_t &ddbox)
{
int i, j, k;
rvec nw;
for (i = 0; i < DIM; i++)
{
- real bt = ddbox->box_size[i]*ddbox->skew_fac[i];
+ real bt = ddbox.box_size[i]*ddbox.skew_fac[i];
nw[i] = dd_nc[i]*cutoff/bt;
}
}
/*! \brief Return whether the DD inhomogeneous in the z direction */
-static gmx_bool inhomogeneous_z(const t_inputrec *ir)
+static gmx_bool inhomogeneous_z(const t_inputrec &ir)
{
- return ((EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) &&
- ir->ePBC == epbcXYZ && ir->ewald_geometry == eewg3DC);
+ return ((EEL_PME(ir.coulombtype) || ir.coulombtype == eelEWALD) &&
+ ir.ePBC == epbcXYZ && ir.ewald_geometry == eewg3DC);
}
/*! \brief Estimate cost of PME FFT communication
/*! \brief Estimate cost of communication for a possible domain decomposition. */
static float comm_cost_est(real limit, real cutoff,
- const matrix box, const gmx_ddbox_t *ddbox,
- int natoms, const t_inputrec *ir,
+ const matrix box, const gmx_ddbox_t &ddbox,
+ int natoms, const t_inputrec &ir,
float pbcdxr,
- int npme_tot, ivec nc)
+ int npme_tot, const gmx::IVec &nc)
{
- ivec npme = {1, 1, 1};
- int i, j, nk, overlap;
- rvec bt;
- float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx;
+ gmx::IVec npme = {1, 1, 1};
+ int i, j, nk, overlap;
+ rvec bt;
+ float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx;
/* This is the cost of a pbc_dx call relative to the cost
* of communicating the coordinate and force of an atom.
* This will be machine dependent.
float temp;
/* Check the DD algorithm restrictions */
- if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) ||
- (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
+ if ((ir.ePBC == epbcXY && ir.nwall < 2 && nc[ZZ] > 1) ||
+ (ir.ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
{
return -1;
}
return -1;
}
- assert(ddbox->npbcdim <= DIM);
+ assert(ddbox.npbcdim <= DIM);
/* Check if the triclinic requirements are met */
for (i = 0; i < DIM; i++)
{
- for (j = i+1; j < ddbox->npbcdim; j++)
+ for (j = i+1; j < ddbox.npbcdim; j++)
{
- if (box[j][i] != 0 || ir->deform[j][i] != 0 ||
- (ir->epc != epcNO && ir->compress[j][i] != 0))
+ if (box[j][i] != 0 || ir.deform[j][i] != 0 ||
+ (ir.epc != epcNO && ir.compress[j][i] != 0))
{
if (nc[j] > 1 && nc[i] == 1)
{
for (i = 0; i < DIM; i++)
{
- bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i];
+ bt[i] = ddbox.box_size[i]*ddbox.skew_fac[i];
/* Without PBC and with 2 cells, there are no lower limits on the cell size */
- if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
+ if (!(i >= ddbox.npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
{
return -1;
}
/* With PBC, check if the cut-off fits in nc[i]-1 cells */
- if (i < ddbox->npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff)
+ if (i < ddbox.npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff)
{
return -1;
}
}
}
- if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
+ if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
{
/* Check the PME grid restrictions.
* Currently these can only be invalid here with too few grid lines
*/
bool useThreads = true;
bool errorsAreFatal = false;
- if (!gmx_pme_check_restrictions(ir->pme_order, ir->nkx, ir->nky, ir->nkz,
+ if (!gmx_pme_check_restrictions(ir.pme_order, ir.nkx, ir.nky, ir.nkz,
npme_x, useThreads, errorsAreFatal))
{
return -1;
/* Grid overlap communication */
if (npme[i] > 1)
{
- nk = (i == 0 ? ir->nkx : ir->nky);
- overlap = (nk % npme[i] == 0 ? ir->pme_order-1 : ir->pme_order);
+ nk = (i == 0 ? ir.nkx : ir.nky);
+ overlap = (nk % npme[i] == 0 ? ir.pme_order-1 : ir.pme_order);
temp = npme[i];
temp *= overlap;
- temp *= ir->nkx;
- temp *= ir->nky;
- temp *= ir->nkz;
+ temp *= ir.nkx;
+ temp *= ir.nky;
+ temp *= ir.nkz;
temp /= nk;
comm_pme += temp;
-/* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
+/* Old line comm_pme += npme[i]*overlap*ir.nkx*ir.nky*ir.nkz/nk; */
}
}
- comm_pme += comm_pme_cost_vol(npme[YY], ir->nky, ir->nkz, ir->nkx);
- comm_pme += comm_pme_cost_vol(npme[XX], ir->nkx, ir->nky, ir->nkz);
+ comm_pme += comm_pme_cost_vol(npme[YY], ir.nky, ir.nkz, ir.nkx);
+ comm_pme += comm_pme_cost_vol(npme[XX], ir.nkx, ir.nky, ir.nkz);
/* Add cost of pbc_dx for bondeds */
cost_pbcdx = 0;
- if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY))
+ if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir.ePBC != epbcXY))
{
- if ((ddbox->tric_dir[XX] && nc[XX] == 1) ||
- (ddbox->tric_dir[YY] && nc[YY] == 1))
+ if ((ddbox.tric_dir[XX] && nc[XX] == 1) ||
+ (ddbox.tric_dir[YY] && nc[YY] == 1))
{
cost_pbcdx = pbcdxr*pbcdx_tric_fac;
}
return 3*natoms*(comm_vol + cost_pbcdx) + comm_pme;
}
-/*! \brief Assign penalty factors to possible domain decompositions, based on the estimated communication costs. */
+/*! \brief Assign penalty factors to possible domain decompositions,
+ * based on the estimated communication costs. */
static void assign_factors(const real limit, const bool request1D,
const real cutoff,
- const matrix box, const gmx_ddbox_t *ddbox,
- int natoms, const t_inputrec *ir,
+ const matrix box, const gmx_ddbox_t &ddbox,
+ int natoms, const t_inputrec &ir,
float pbcdxr, int npme,
int ndiv, const int *div, const int *mdiv,
- ivec ir_try, ivec opt)
+ gmx::IVec *irTryPtr,
+ gmx::IVec *opt)
{
- int x, y, i;
- float ce;
+ int x, y, i;
+ float ce;
+ gmx::IVec &ir_try = *irTryPtr;
if (ndiv == 0)
{
ce = comm_cost_est(limit, cutoff, box, ddbox,
natoms, ir, pbcdxr, npme, ir_try);
- if (ce >= 0 && (opt[XX] == 0 ||
+ if (ce >= 0 && ((*opt)[XX] == 0 ||
ce < comm_cost_est(limit, cutoff, box, ddbox,
natoms, ir, pbcdxr,
- npme, opt)))
+ npme, *opt)))
{
- copy_ivec(ir_try, opt);
+ *opt = ir_try;
}
return;
/* recurse */
assign_factors(limit, request1D,
cutoff, box, ddbox, natoms, ir, pbcdxr, npme,
- ndiv-1, div+1, mdiv+1, ir_try, opt);
+ ndiv-1, div+1, mdiv+1, irTryPtr, opt);
for (i = 0; i < mdiv[0]-x-y; i++)
{
}
}
-/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
-static real optimize_ncells(const gmx::MDLogger &mdlog,
- const int nnodes_tot, const int npme_only,
- const bool request1DAnd1Pulse,
- const bool bDynLoadBal, const real dlb_scale,
- const gmx_mtop_t *mtop,
- const matrix box, const gmx_ddbox_t *ddbox,
- const t_inputrec *ir,
- const DDSystemInfo &systemInfo,
- ivec nc)
+/*! \brief Determine the optimal distribution of DD cells for the
+ * simulation system and number of MPI ranks
+ *
+ * \returns The optimal grid cell choice. The latter will contain all
+ * zeros if no valid cell choice exists. */
+static gmx::IVec
+optimizeDDCells(const gmx::MDLogger &mdlog,
+ const int numRanksRequested,
+ const int numPmeOnlyRanks,
+ const real cellSizeLimit,
+ const bool request1DAnd1Pulse,
+ const gmx_mtop_t &mtop,
+ const matrix box,
+ const gmx_ddbox_t &ddbox,
+ const t_inputrec &ir,
+ const DDSystemInfo &systemInfo)
{
- int npp, npme, d, nmax;
- double pbcdxr;
- real limit;
- ivec itry;
+ double pbcdxr;
- limit = systemInfo.cellsizeLimit;
- if (request1DAnd1Pulse)
- {
- limit = std::max(limit, ir->rlist);
- }
+ const int numPPRanks = numRanksRequested - numPmeOnlyRanks;
- npp = nnodes_tot - npme_only;
- if (EEL_PME(ir->coulombtype))
- {
- npme = (npme_only > 0 ? npme_only : npp);
- }
- else
+ GMX_LOG(mdlog.info).appendTextFormatted(
+ "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
+ numPPRanks, cellSizeLimit);
+ if (inhomogeneous_z(ir))
{
- npme = 0;
+ GMX_LOG(mdlog.info).appendTextFormatted(
+ "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
+ eewg_names[ir.ewald_geometry]);
}
+
+ // For cost estimates, we need the number of ranks doing PME work,
+ // which is the number of PP ranks when not using separate
+ // PME-only ranks.
+ const int numRanksDoingPmeWork = (EEL_PME(ir.coulombtype) ?
+ ((numPmeOnlyRanks > 0) ? numPmeOnlyRanks : numPPRanks) :
+ 0);
+
if (systemInfo.haveInterDomainBondeds)
{
/* If we can skip PBC for distance calculations in plain-C bondeds,
* Here we ignore SIMD bondeds as they always do (fast) PBC.
*/
count_bonded_distances(mtop, ir, &pbcdxr, nullptr);
- pbcdxr /= static_cast<double>(mtop->natoms);
+ pbcdxr /= static_cast<double>(mtop.natoms);
}
else
{
/* Every molecule is a single charge group: no pbc required */
pbcdxr = 0;
}
- /* Add a margin for DLB and/or pressure scaling */
- if (bDynLoadBal)
- {
- if (dlb_scale >= 1.0)
- {
- gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
- }
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Scaling the initial minimum size with 1/%g (option -dds) = %g",
- dlb_scale, 1/dlb_scale);
- limit /= dlb_scale;
- }
- else if (ir->epc != epcNO)
- {
- GMX_LOG(mdlog.info).appendTextFormatted(
- "To account for pressure scaling, scaling the initial minimum size with %g",
- DD_GRID_MARGIN_PRES_SCALE);
- limit *= DD_GRID_MARGIN_PRES_SCALE;
- }
-
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
- npp, limit);
-
- if (inhomogeneous_z(ir))
- {
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
- eewg_names[ir->ewald_geometry]);
- }
- if (limit > 0)
+ if (cellSizeLimit > 0)
{
std::string maximumCells = "The maximum allowed number of cells is:";
- for (d = 0; d < DIM; d++)
+ for (int d = 0; d < DIM; d++)
{
- nmax = static_cast<int>(ddbox->box_size[d]*ddbox->skew_fac[d]/limit);
- if (d >= ddbox->npbcdim && nmax < 2)
+ int nmax = static_cast<int>(ddbox.box_size[d]*ddbox.skew_fac[d]/cellSizeLimit);
+ if (d >= ddbox.npbcdim && nmax < 2)
{
nmax = 2;
}
fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr);
}
- /* Decompose npp in factors */
+ /* Decompose numPPRanks in factors */
std::vector<int> div;
std::vector<int> mdiv;
- factorize(npp, &div, &mdiv);
+ factorize(numPPRanks, &div, &mdiv);
- itry[XX] = 1;
- itry[YY] = 1;
- itry[ZZ] = 1;
- clear_ivec(nc);
- assign_factors(limit, request1DAnd1Pulse,
- systemInfo.cutoff, box, ddbox, mtop->natoms, ir, pbcdxr,
- npme, div.size(), div.data(), mdiv.data(), itry, nc);
+ gmx::IVec itry = { 1, 1, 1 };
+ gmx::IVec numDomains = { 0, 0, 0 };
+ assign_factors(cellSizeLimit, request1DAnd1Pulse,
+ systemInfo.cutoff, box, ddbox, mtop.natoms, ir, pbcdxr,
+ numRanksDoingPmeWork, div.size(), div.data(), mdiv.data(), &itry, &numDomains);
- return limit;
+ return numDomains;
}
-DDGridSetup
-dd_choose_grid(const gmx::MDLogger &mdlog,
- const t_commrec *cr,
- const t_inputrec *ir,
- const gmx_mtop_t *mtop,
- const matrix box, const gmx_ddbox_t *ddbox,
- const int numPmeRanksRequested,
- const bool request1DAnd1Pulse,
- const bool bDynLoadBal, const real dlb_scale,
- const DDSystemInfo &systemInfo)
+real
+getDDGridSetupCellSizeLimit(const gmx::MDLogger &mdlog,
+ const bool request1DAnd1Pulse,
+ const bool bDynLoadBal,
+ const real dlb_scale,
+ const t_inputrec &ir,
+ const real systemInfoCellSizeLimit)
{
- DDGridSetup ddGridSetup;
+ real cellSizeLimit = systemInfoCellSizeLimit;
+ if (request1DAnd1Pulse)
+ {
+ cellSizeLimit = std::max(cellSizeLimit, ir.rlist);
+ }
- if (MASTER(cr))
+ /* Add a margin for DLB and/or pressure scaling */
+ if (bDynLoadBal)
{
- int64_t nnodes_div = cr->nnodes;
- if (EEL_PME(ir->coulombtype))
+ if (dlb_scale >= 1.0)
{
- if (numPmeRanksRequested > 0)
- {
- if (numPmeRanksRequested >= cr->nnodes)
- {
- gmx_fatal(FARGS,
- "Cannot have %d separate PME ranks with just %d total ranks",
- numPmeRanksRequested, cr->nnodes);
- }
-
- /* If the user purposely selected the number of PME nodes,
- * only check for large primes in the PP node count.
- */
- nnodes_div -= numPmeRanksRequested;
- }
+ gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
}
- else
+ GMX_LOG(mdlog.info).appendTextFormatted(
+ "Scaling the initial minimum size with 1/%g (option -dds) = %g",
+ dlb_scale, 1/dlb_scale);
+ cellSizeLimit /= dlb_scale;
+ }
+ else if (ir.epc != epcNO)
+ {
+ GMX_LOG(mdlog.info).appendTextFormatted(
+ "To account for pressure scaling, scaling the initial minimum size with %g",
+ DD_GRID_MARGIN_PRES_SCALE);
+ cellSizeLimit *= DD_GRID_MARGIN_PRES_SCALE;
+ }
+
+ return cellSizeLimit;
+}
+void
+checkForValidRankCountRequests(const int numRanksRequested,
+ const bool usingPme,
+ const int numPmeRanksRequested)
+{
+ int numPPRanksRequested = numRanksRequested;
+ if (usingPme)
+ {
+ numPPRanksRequested -= numPmeRanksRequested;
+ if (numPmeRanksRequested > numPPRanksRequested)
{
- ddGridSetup.numPmeOnlyRanks = 0;
+ gmx_fatal(FARGS,
+ "Cannot have %d separate PME ranks with only %d PP ranks, choose fewer or no separate PME ranks",
+ numPmeRanksRequested, numPPRanksRequested);
}
+ }
- if (nnodes_div > 12)
+ // Once the rank count is large enough, it becomes worth
+ // suggesting improvements to the user.
+ const int minPPRankCountToCheckForLargePrimeFactors = 13;
+ if (numPPRanksRequested >= minPPRankCountToCheckForLargePrimeFactors)
+ {
+ const int largestDivisor = largest_divisor(numPPRanksRequested);
+ /* Check if the largest divisor is more than numPPRanks ^ (2/3) */
+ if (largestDivisor*largestDivisor*largestDivisor >
+ numPPRanksRequested*numPPRanksRequested)
{
- const int64_t ldiv = largest_divisor(nnodes_div);
- /* Check if the largest divisor is more than nnodes^2/3 */
- if (ldiv*ldiv*ldiv > nnodes_div*nnodes_div)
- {
- gmx_fatal(FARGS, "The number of ranks you selected (%" PRId64 ") contains a large prime factor %" PRId64 ". In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
- nnodes_div, ldiv);
- }
+ gmx_fatal(FARGS, "The number of ranks selected for particle-particle work (%d) "
+ "contains a large prime factor %d. In most cases this will lead to "
+ "bad performance. Choose a number with smaller prime factors or "
+ "set the decomposition (option -dd) manually.",
+ numPPRanksRequested, largestDivisor);
}
+ }
+}
+
+/*! \brief Return the number of PME-only ranks used by the simulation
+ *
+ * If the user did not choose a number, then decide for them. */
+static int
+getNumPmeOnlyRanksToUse(const gmx::MDLogger &mdlog,
+ const DomdecOptions &options,
+ const gmx_mtop_t &mtop,
+ const t_inputrec &ir,
+ const matrix box,
+ const int numRanksRequested)
+{
+ int numPmeOnlyRanks;
+ const char *extraMessage = "";
- if (EEL_PME(ir->coulombtype))
+ if (options.numCells[XX] > 0)
+ {
+ if (options.numPmeRanks >= 0)
+ {
+ // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
+ numPmeOnlyRanks = options.numPmeRanks;
+ }
+ else
+ {
+ // When the DD grid is set explicitly and -npme is set to auto,
+ // don't use PME ranks. We check later if the DD grid is
+ // compatible with the total number of ranks.
+ numPmeOnlyRanks = 0;
+ }
+ }
+ else
+ {
+ if (!EEL_PME(ir.coulombtype))
+ {
+ // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
+ numPmeOnlyRanks = 0;
+ }
+ else
{
- if (numPmeRanksRequested < 0)
+ if (options.numPmeRanks >= 0)
+ {
+ numPmeOnlyRanks = options.numPmeRanks;
+ }
+ else
{
- /* Use PME nodes when the number of nodes is more than 16 */
- if (cr->nnodes <= 18)
+ // Controls the automated choice of when to use separate PME-only ranks.
+ const int minRankCountToDefaultToSeparatePmeRanks = 19;
+
+ if (numRanksRequested < minRankCountToDefaultToSeparatePmeRanks)
{
- ddGridSetup.numPmeOnlyRanks = 0;
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Using %d separate PME ranks, as there are too few total\n"
- " ranks for efficient splitting",
- ddGridSetup.numPmeOnlyRanks);
+ numPmeOnlyRanks = 0;
+ extraMessage = ", as there are too few total\n"
+ " ranks for efficient splitting";
}
else
{
- ddGridSetup.numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, cr->nnodes);
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Using %d separate PME ranks, as guessed by mdrun",
- ddGridSetup.numPmeOnlyRanks);
+ numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, numRanksRequested);
+ extraMessage = ", as guessed by mdrun";
}
}
- else
+ }
+
+ }
+ GMX_RELEASE_ASSERT(numPmeOnlyRanks <= numRanksRequested,
+ "Cannot have more PME ranks than total ranks");
+ if (EEL_PME(ir.coulombtype))
+ {
+ GMX_LOG(mdlog.info).appendTextFormatted
+ ("Using %d separate PME ranks%s", numPmeOnlyRanks, extraMessage);
+ }
+
+ return numPmeOnlyRanks;
+}
+
+/*! \brief Sets the order of the DD dimensions, returns the number of DD dimensions */
+static int set_dd_dim(const gmx::IVec &numDDCells,
+ const DDSettings &ddSettings,
+ ivec *dims)
+{
+ int ndim = 0;
+ if (ddSettings.useDDOrderZYX)
+ {
+ /* Decomposition order z,y,x */
+ for (int dim = DIM - 1; dim >= 0; dim--)
+ {
+ if (numDDCells[dim] > 1)
+ {
+ (*dims)[ndim++] = dim;
+ }
+ }
+ }
+ else
+ {
+ /* Decomposition order x,y,z */
+ for (int dim = 0; dim < DIM; dim++)
+ {
+ if (numDDCells[dim] > 1)
{
- /* We checked above that nPmeRanks is a valid number */
- ddGridSetup.numPmeOnlyRanks = numPmeRanksRequested;
- GMX_LOG(mdlog.info).appendTextFormatted(
- "Using %d separate PME ranks", ddGridSetup.numPmeOnlyRanks);
- // TODO: there was a ", per user request" note here, but it's not correct anymore,
- // as with GPUs decision about nPmeRanks can be made in runner() as well.
- // Consider a single spot for setting nPmeRanks.
+ (*dims)[ndim++] = dim;
}
}
+ }
- ddGridSetup.cellsizeLimit =
- optimize_ncells(mdlog, cr->nnodes, ddGridSetup.numPmeOnlyRanks,
- request1DAnd1Pulse,
- bDynLoadBal, dlb_scale,
- mtop, box, ddbox, ir,
- systemInfo,
- ddGridSetup.numDomains);
+ if (ndim == 0)
+ {
+ /* Set dim[0] to avoid extra checks on ndim in several places */
+ (*dims)[0] = XX;
}
- /* Communicate the information set by the master to all nodes */
- gmx_bcast(sizeof(ddGridSetup.numDomains), ddGridSetup.numDomains, cr);
- if (EEL_PME(ir->coulombtype))
+ return ndim;
+}
+
+DDGridSetup
+getDDGridSetup(const gmx::MDLogger &mdlog,
+ const t_commrec *cr,
+ const int numRanksRequested,
+ const DomdecOptions &options,
+ const DDSettings &ddSettings,
+ const DDSystemInfo &systemInfo,
+ const real cellSizeLimit,
+ const gmx_mtop_t &mtop,
+ const t_inputrec &ir,
+ const matrix box,
+ gmx::ArrayRef<const gmx::RVec> xGlobal,
+ gmx_ddbox_t *ddbox)
+{
+ int numPmeOnlyRanks = getNumPmeOnlyRanksToUse(mdlog, options, mtop, ir, box, numRanksRequested);
+
+ gmx::IVec numDomains;
+ if (options.numCells[XX] > 0)
{
- gmx_bcast(sizeof(ddGridSetup.numPmeOnlyRanks), &ddGridSetup.numPmeOnlyRanks, cr);
+ numDomains = gmx::IVec(options.numCells);
+ const ivec numDomainsLegacyIvec = { numDomains[XX], numDomains[YY], numDomains[ZZ] };
+ set_ddbox_cr(*cr, &numDomainsLegacyIvec, ir, box, xGlobal, ddbox);
}
else
{
- ddGridSetup.numPmeOnlyRanks = 0;
+ set_ddbox_cr(*cr, nullptr, ir, box, xGlobal, ddbox);
+
+ if (MASTER(cr))
+ {
+ numDomains =
+ optimizeDDCells(mdlog, numRanksRequested, numPmeOnlyRanks,
+ cellSizeLimit,
+ ddSettings.request1DAnd1Pulse,
+ mtop, box, *ddbox, ir,
+ systemInfo);
+ }
+ }
+
+ /* Communicate the information set by the master to all ranks */
+ gmx_bcast(sizeof(numDomains), numDomains, cr);
+ if (EEL_PME(ir.coulombtype))
+ {
+ gmx_bcast(sizeof(numPmeOnlyRanks), &numPmeOnlyRanks, cr);
}
+ DDGridSetup ddGridSetup;
+ ddGridSetup.numPmeOnlyRanks = numPmeOnlyRanks;
+ ddGridSetup.numDomains[XX] = numDomains[XX];
+ ddGridSetup.numDomains[YY] = numDomains[YY];
+ ddGridSetup.numDomains[ZZ] = numDomains[ZZ];
+ ddGridSetup.numDDDimensions = set_dd_dim(numDomains, ddSettings, &ddGridSetup.ddDimensions);
+
return ddGridSetup;
}
return simd_cycle_no_simd/speedup;
}
-void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir,
+void count_bonded_distances(const gmx_mtop_t &mtop, const t_inputrec &ir,
double *ndistance_c, double *ndistance_simd)
{
gmx_bool bExcl;
gmx_bool bSimdBondeds = FALSE;
#endif
- bExcl = (ir->cutoff_scheme == ecutsGROUP && inputrecExclForces(ir)
- && !EEL_FULL(ir->coulombtype));
+ bExcl = (ir.cutoff_scheme == ecutsGROUP && inputrecExclForces(&ir)
+ && !EEL_FULL(ir.coulombtype));
if (bSimdBondeds)
{
/* We only have SIMD versions of these bondeds without energy and
* without shift-forces, we take that into account here.
*/
- if (ir->nstcalcenergy > 0)
+ if (ir.nstcalcenergy > 0)
{
- nonsimd_step_frac = 1.0/ir->nstcalcenergy;
+ nonsimd_step_frac = 1.0/ir.nstcalcenergy;
}
else
{
nonsimd_step_frac = 0;
}
- if (ir->epc != epcNO && 1.0/ir->nstpcouple > nonsimd_step_frac)
+ if (ir.epc != epcNO && 1.0/ir.nstpcouple > nonsimd_step_frac)
{
- nonsimd_step_frac = 1.0/ir->nstpcouple;
+ nonsimd_step_frac = 1.0/ir.nstpcouple;
}
}
else
*/
ndtot_c = 0;
ndtot_simd = 0;
- for (const gmx_molblock_t &molb : mtop->molblock)
+ for (const gmx_molblock_t &molb : mtop.molblock)
{
- const gmx_moltype_t *molt = &mtop->moltype[molb.type];
+ const gmx_moltype_t *molt = &mtop.moltype[molb.type];
for (ftype = 0; ftype < F_NRE; ftype++)
{
int nbonds;
}
}
-static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir,
+static void pp_verlet_load(const gmx_mtop_t &mtop, const t_inputrec &ir,
const matrix box,
int *nq_tot, int *nlj_tot,
double *cost_pp,
const real nbnxn_refkernel_fac = 8.0;
#endif
- bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
+ bQRF = (EEL_RF(ir.coulombtype) || ir.coulombtype == eelCUT);
- gmx::ArrayRef<const t_iparams> iparams = mtop->ffparams.iparams;
- atnr = mtop->ffparams.atnr;
+ gmx::ArrayRef<const t_iparams> iparams = mtop.ffparams.iparams;
+ atnr = mtop.ffparams.atnr;
nqlj = 0;
nq = 0;
*bChargePerturbed = FALSE;
*bTypePerturbed = FALSE;
- for (const gmx_molblock_t &molb : mtop->molblock)
+ for (const gmx_molblock_t &molb : mtop.molblock)
{
- const gmx_moltype_t *molt = &mtop->moltype[molb.type];
+ const gmx_moltype_t *molt = &mtop.moltype[molb.type];
const t_atom *atom = molt->atoms.atom;
for (a = 0; a < molt->atoms.nr; a++)
{
}
}
- nlj = mtop->natoms - nqlj - nq;
+ nlj = mtop.natoms - nqlj - nq;
*nq_tot = nqlj + nq;
*nlj_tot = nqlj + nlj;
#else
j_cluster_size = 4;
#endif
- r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop->natoms/det(box));
+ r_eff = ir.rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop.natoms/det(box));
/* The average number of pairs per atom */
- nppa = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop->natoms/det(box);
+ nppa = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop.natoms/det(box);
if (debug)
{
fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f pairs per atom %.1f\n",
- nqlj, nq, nlj, ir->rlist, r_eff, nppa);
+ nqlj, nq, nlj, ir.rlist, r_eff, nppa);
}
/* Determine the cost per pair interaction */
c_qlj = (bQRF ? c_nbnxn_qrf_lj : c_nbnxn_qexp_lj);
c_q = (bQRF ? c_nbnxn_qrf : c_nbnxn_qexp);
c_lj = c_nbnxn_lj;
- if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype))
+ if (ir.vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir.vdwtype))
{
c_qlj += c_nbnxn_ljexp_add;
c_lj += c_nbnxn_ljexp_add;
}
- if (EVDW_PME(ir->vdwtype) && ir->ljpme_combination_rule == eljpmeLB)
+ if (EVDW_PME(ir.vdwtype) && ir.ljpme_combination_rule == eljpmeLB)
{
/* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */
c_qlj *= nbnxn_refkernel_fac;
*cost_pp *= simd_cycle_factor(bHaveSIMD);
}
-float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir,
+float pme_load_estimate(const gmx_mtop_t &mtop, const t_inputrec &ir,
const matrix box)
{
int nq_tot, nlj_tot;
cost_solve = 0;
int gridNkzFactor = int{
- (ir->nkz + 1)/2
+ (ir.nkz + 1)/2
};
- if (EEL_PME(ir->coulombtype))
+ if (EEL_PME(ir.coulombtype))
{
- double grid = ir->nkx*ir->nky*gridNkzFactor;
+ double grid = ir.nkx*ir.nky*gridNkzFactor;
- int f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
+ int f = ((ir.efep != efepNO && bChargePerturbed) ? 2 : 1);
cost_redist += c_pme_redist*nq_tot;
- cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir->pme_order);
+ cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir.pme_order);
cost_fft += f*c_pme_fft*grid*std::log(grid)/std::log(2.0);
cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
}
- if (EVDW_PME(ir->vdwtype))
+ if (EVDW_PME(ir.vdwtype))
{
- double grid = ir->nkx*ir->nky*gridNkzFactor;
+ double grid = ir.nkx*ir.nky*gridNkzFactor;
- int f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
- if (ir->ljpme_combination_rule == eljpmeLB)
+ int f = ((ir.efep != efepNO && bTypePerturbed) ? 2 : 1);
+ if (ir.ljpme_combination_rule == eljpmeLB)
{
/* LB combination rule: we have 7 mesh terms */
f *= 7;
}
cost_redist += c_pme_redist*nlj_tot;
- cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir->pme_order);
+ cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir.pme_order);
cost_fft += f*c_pme_fft*2*grid*std::log(grid)/std::log(2.0);
cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
}