From e5a4840711eff8e05496224f1bcd546f8ad7cd6e Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Tue, 17 Sep 2019 12:38:05 +0200 Subject: [PATCH] Further improve getDDGridSetup Merged and simplified two similar setup functions that were sharing the setting up of the number of PME-only ranks and the DD grid used on the PP ranks. No functionality changes here, although some user messages are slightly improved. This helps create the seam that GPU halo exchange setup will need in order to do a good job of triggering that communication path, namely that there is a clear point when the number of PP ranks is known, so that the search for a DD cell decomposition can be made in a way that might suit GPU halo exchange. It also permits more of the DD setup routines to be called without leading to a fatal error or output confusing to a user, which will also be needed when introducing support for GPU halo exchange. Functions called in init_domain_decomposition no longer look up the total number of ranks from cr, but instead accept an integer parameter. This will enable thread-MPI checking for various possibilities to work smoothly if we ever do that. Used better variable naming, IVec and more const correctness to improve readability, and improved comments. Noted TODOs for further work at some future time. Improved havePPDomainDecomposition (and some of its users) so that we are less likely to erroneously use it before we have initialized DD. Change-Id: I65e285cc10d946c7c5bf298d7ff08c81ba05f3d8 --- src/gromacs/domdec/dlbtiming.h | 2 +- src/gromacs/domdec/domdec.cpp | 167 +++------ src/gromacs/domdec/domdec_internal.h | 5 +- src/gromacs/domdec/domdec_setup.cpp | 498 +++++++++++++++++---------- src/gromacs/domdec/domdec_setup.h | 54 ++- src/gromacs/gmxpreprocess/grompp.cpp | 2 +- src/gromacs/mdlib/perf_est.cpp | 64 ++-- src/gromacs/mdlib/perf_est.h | 6 +- src/gromacs/mdtypes/commrec.h | 5 +- src/gromacs/tools/tune_pme.cpp | 2 +- 10 files changed, 443 insertions(+), 362 deletions(-) diff --git a/src/gromacs/domdec/dlbtiming.h b/src/gromacs/domdec/dlbtiming.h index 8834e3d3b0..ebd84e6eac 100644 --- a/src/gromacs/domdec/dlbtiming.h +++ b/src/gromacs/domdec/dlbtiming.h @@ -86,7 +86,7 @@ class DDBalanceRegionHandler public: //! Constructor, pass a pointer to t_commrec or nullptr when not using domain decomposition DDBalanceRegionHandler(const t_commrec *cr) : - useBalancingRegion_(havePPDomainDecomposition(cr)), + useBalancingRegion_(cr != nullptr ? havePPDomainDecomposition(cr) : false), dd_(cr != nullptr ? cr->dd : nullptr) { } diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 4d7770645e..1e3c931858 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -1988,44 +1988,6 @@ static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog, return dlbState; } -/* Sets the order of the DD dimensions, returns the number of DD dimensions */ -static int set_dd_dim(const ivec numDDCells, - const DDSettings &ddSettings, - ivec dims) -{ - int ndim = 0; - if (ddSettings.useDDOrderZYX) - { - /* Decomposition order z,y,x */ - for (int dim = DIM - 1; dim >= 0; dim--) - { - if (numDDCells[dim] > 1) - { - dims[ndim++] = dim; - } - } - } - else - { - /* Decomposition order x,y,z */ - for (int dim = 0; dim < DIM; dim++) - { - if (numDDCells[dim] > 1) - { - dims[ndim++] = dim; - } - } - } - - if (ndim == 0) - { - /* Set dim[0] to avoid extra checks on ndim in several places */ - dims[0] = XX; - } - - return ndim; -} - static gmx_domdec_comm_t *init_dd_comm() { gmx_domdec_comm_t *comm = new gmx_domdec_comm_t; @@ -2167,7 +2129,7 @@ moleculesAreAlwaysWhole(const gmx_mtop_t &mtop, /*! \brief Generate the simulation system information */ static DDSystemInfo getSystemInfo(const gmx::MDLogger &mdlog, - t_commrec *cr, + const t_commrec *cr, const DomdecOptions &options, const gmx_mtop_t *mtop, const t_inputrec *ir, @@ -2356,77 +2318,38 @@ getSystemInfo(const gmx::MDLogger &mdlog, return systemInfo; } -/*! \brief Set the cell size and interaction limits, as well as the DD grid - * - * Also computes the initial ddbox. - */ -static DDGridSetup -getDDGridSetup(const gmx::MDLogger &mdlog, - t_commrec *cr, - const DomdecOptions &options, - const DDSettings &ddSettings, - const DDSystemInfo &systemInfo, - const gmx_mtop_t *mtop, - const t_inputrec *ir, - const matrix box, - gmx::ArrayRef xGlobal, - gmx_ddbox_t *ddbox) +/*! \brief Exit with a fatal error if the DDGridSetup cannot be + * implemented. */ +static void +checkDDGridSetup(const DDGridSetup &ddGridSetup, + const t_commrec *cr, + const DomdecOptions &options, + const DDSettings &ddSettings, + const DDSystemInfo &systemInfo, + const real cellsizeLimit, + const gmx_ddbox_t &ddbox) { - DDGridSetup ddGridSetup; - - if (options.numCells[XX] > 0) + if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0)) { - copy_ivec(options.numCells, ddGridSetup.numDomains); - set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox); + char buf[STRLEN]; + gmx_bool bC = (systemInfo.haveSplitConstraints && + systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody); + sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", + !bC ? "-rdd" : "-rcon", + ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "", + bC ? " or your LINCS settings" : ""); - if (options.numPmeRanks >= 0) - { - ddGridSetup.numPmeOnlyRanks = options.numPmeRanks; - } - else - { - /* When the DD grid is set explicitly and -npme is set to auto, - * don't use PME ranks. We check later if the DD grid is - * compatible with the total number of ranks. - */ - ddGridSetup.numPmeOnlyRanks = 0; - } - } - else - { - set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox); - - /* We need to choose the optimal DD grid and possibly PME nodes */ - ddGridSetup = - dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox, - options.numPmeRanks, - ddSettings.request1DAnd1Pulse, - !isDlbDisabled(ddSettings.initialDlbState), - options.dlbScaling, - systemInfo); - - if (ddGridSetup.numDomains[XX] == 0) - { - char buf[STRLEN]; - gmx_bool bC = (systemInfo.haveSplitConstraints && - systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody); - sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", - !bC ? "-rdd" : "-rcon", - ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "", - bC ? " or your LINCS settings" : ""); - - gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), - "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n" - "%s\n" - "Look in the log file for details on the domain decomposition", - cr->nnodes - ddGridSetup.numPmeOnlyRanks, - ddGridSetup.cellsizeLimit, - buf); - } + gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), + "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n" + "%s\n" + "Look in the log file for details on the domain decomposition", + cr->nnodes - ddGridSetup.numPmeOnlyRanks, + cellsizeLimit, + buf); } - const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains); - if (acs < systemInfo.cellsizeLimit) + const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains); + if (acs < cellsizeLimit) { if (options.numCells[XX] <= 0) { @@ -2436,7 +2359,7 @@ getDDGridSetup(const gmx::MDLogger &mdlog, { gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details", - acs, systemInfo.cellsizeLimit); + acs, cellsizeLimit); } } @@ -2444,7 +2367,7 @@ getDDGridSetup(const gmx::MDLogger &mdlog, if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks) { gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), - "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d", + "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d", numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes); } if (ddGridSetup.numPmeOnlyRanks > numPPRanks) @@ -2452,11 +2375,6 @@ getDDGridSetup(const gmx::MDLogger &mdlog, gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks); } - - ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings, - ddGridSetup.ddDimensions); - - return ddGridSetup; } /*! \brief Set the cell size and interaction limits, as well as the DD grid */ @@ -2545,6 +2463,7 @@ static void set_dd_limits(const gmx::MDLogger &mdlog, const DDSettings &ddSettings, const DDSystemInfo &systemInfo, const DDGridSetup &ddGridSetup, + const int numPPRanks, const gmx_mtop_t *mtop, const t_inputrec *ir, const gmx_ddbox_t &ddbox) @@ -2575,7 +2494,7 @@ static void set_dd_limits(const gmx::MDLogger &mdlog, * but that is not yet available here. But this anyhow only * affect performance up to the second dd_partition_system call. */ - const int homeAtomCountEstimate = mtop->natoms/cr->nnodes; + const int homeAtomCountEstimate = mtop->natoms/numPPRanks; comm->updateGroupsCog = std::make_unique(*mtop, systemInfo.updateGroupingPerMoleculetype, @@ -3029,7 +2948,7 @@ static void set_ddgrid_parameters(const gmx::MDLogger &mdlog, else { vol_frac = - (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast(dd->nnodes); + (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast(dd->nnodes); } if (debug) { @@ -3117,9 +3036,21 @@ gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog, DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal); + int numRanksRequested = cr->nnodes; + checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks); + + // DD grid setup uses a more conservative cell size limit for + // automated setup than the one in systemInfo. The latter is used + // later during DLB, for example. + const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse, + !isDlbDisabled(ddSettings.initialDlbState), + options.dlbScaling, *ir, + systemInfo.cellsizeLimit); gmx_ddbox_t ddbox = {0}; - DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo, - mtop, ir, box, xGlobal, &ddbox); + DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options, ddSettings, systemInfo, + gridSetupCellsizeLimit, + *mtop, *ir, box, xGlobal, &ddbox); + checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox); cr->npmenodes = ddGridSetup.numPmeOnlyRanks; @@ -3143,7 +3074,9 @@ gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog, dd->comm->cartesianRankSetup = cartSetup; set_dd_limits(mdlog, cr, dd, options, - ddSettings, systemInfo, ddGridSetup, + ddSettings, systemInfo, + ddGridSetup, + ddRankSetup.numPPRanks, mtop, ir, ddbox); diff --git a/src/gromacs/domdec/domdec_internal.h b/src/gromacs/domdec/domdec_internal.h index 8b707f3977..457c65fb62 100644 --- a/src/gromacs/domdec/domdec_internal.h +++ b/src/gromacs/domdec/domdec_internal.h @@ -656,7 +656,10 @@ struct gmx_domdec_comm_t // NOLINT (clang-analyzer-optin.performance.Padding) /** The coordinate/force communication setup and indices */ gmx_domdec_comm_dim_t cd[DIM]; - /** The maximum number of cells to communicate with in one dimension */ + /** Restricts the maximum number of cells to communicate with in one dimension + * + * Dynamic load balancing is not permitted to change sizes if it + * would violate this restriction. */ int maxpulse = 0; /** Which cg distribution is stored on the master node, diff --git a/src/gromacs/domdec/domdec_setup.cpp b/src/gromacs/domdec/domdec_setup.cpp index 71904ac379..e685336bd7 100644 --- a/src/gromacs/domdec/domdec_setup.cpp +++ b/src/gromacs/domdec/domdec_setup.cpp @@ -53,6 +53,7 @@ #include "gromacs/domdec/domdec.h" #include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/options.h" #include "gromacs/ewald/pme.h" #include "gromacs/gmxlib/network.h" #include "gromacs/math/utilities.h" @@ -67,7 +68,12 @@ #include "gromacs/utility/logger.h" #include "gromacs/utility/stringutil.h" +#include "box.h" #include "domdec_internal.h" +#include "utility.h" + +// TODO remove this when moving domdec into gmx namespace +using gmx::DomdecOptions; /*! \brief Margin for setting up the DD grid */ #define DD_GRID_MARGIN_PRES_SCALE 1.05 @@ -179,9 +185,10 @@ static gmx_bool fits_pp_pme_perf(int ntot, int npme, float ratio) /*! \brief Make a guess for the number of PME ranks to use. */ static int guess_npme(const gmx::MDLogger &mdlog, - const gmx_mtop_t *mtop, const t_inputrec *ir, - const matrix box, - int nrank_tot) + const gmx_mtop_t &mtop, + const t_inputrec &ir, + const matrix box, + int nrank_tot) { float ratio; int npme; @@ -243,7 +250,7 @@ static int guess_npme(const gmx::MDLogger &mdlog, { gmx_fatal(FARGS, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n" "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.", - ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir->nkx, ir->nky); + ratio, gmx::roundToInt(0.95*ratio*nrank_tot), nrank_tot/2, ir.nkx, ir.nky); } else { @@ -262,7 +269,7 @@ static int div_up(int n, int f) return (n + f - 1)/f; } -real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox) +real comm_box_frac(const gmx::IVec &dd_nc, real cutoff, const gmx_ddbox_t &ddbox) { int i, j, k; rvec nw; @@ -270,7 +277,7 @@ real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox) for (i = 0; i < DIM; i++) { - real bt = ddbox->box_size[i]*ddbox->skew_fac[i]; + real bt = ddbox.box_size[i]*ddbox.skew_fac[i]; nw[i] = dd_nc[i]*cutoff/bt; } @@ -301,10 +308,10 @@ real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox) } /*! \brief Return whether the DD inhomogeneous in the z direction */ -static gmx_bool inhomogeneous_z(const t_inputrec *ir) +static gmx_bool inhomogeneous_z(const t_inputrec &ir) { - return ((EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) && - ir->ePBC == epbcXYZ && ir->ewald_geometry == eewg3DC); + return ((EEL_PME(ir.coulombtype) || ir.coulombtype == eelEWALD) && + ir.ePBC == epbcXYZ && ir.ewald_geometry == eewg3DC); } /*! \brief Estimate cost of PME FFT communication @@ -330,15 +337,15 @@ static float comm_pme_cost_vol(int npme, int a, int b, int c) /*! \brief Estimate cost of communication for a possible domain decomposition. */ static float comm_cost_est(real limit, real cutoff, - const matrix box, const gmx_ddbox_t *ddbox, - int natoms, const t_inputrec *ir, + const matrix box, const gmx_ddbox_t &ddbox, + int natoms, const t_inputrec &ir, float pbcdxr, - int npme_tot, ivec nc) + int npme_tot, const gmx::IVec &nc) { - ivec npme = {1, 1, 1}; - int i, j, nk, overlap; - rvec bt; - float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx; + gmx::IVec npme = {1, 1, 1}; + int i, j, nk, overlap; + rvec bt; + float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx; /* This is the cost of a pbc_dx call relative to the cost * of communicating the coordinate and force of an atom. * This will be machine dependent. @@ -349,8 +356,8 @@ static float comm_cost_est(real limit, real cutoff, float temp; /* Check the DD algorithm restrictions */ - if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) || - (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1))) + if ((ir.ePBC == epbcXY && ir.nwall < 2 && nc[ZZ] > 1) || + (ir.ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1))) { return -1; } @@ -360,15 +367,15 @@ static float comm_cost_est(real limit, real cutoff, return -1; } - assert(ddbox->npbcdim <= DIM); + assert(ddbox.npbcdim <= DIM); /* Check if the triclinic requirements are met */ for (i = 0; i < DIM; i++) { - for (j = i+1; j < ddbox->npbcdim; j++) + for (j = i+1; j < ddbox.npbcdim; j++) { - if (box[j][i] != 0 || ir->deform[j][i] != 0 || - (ir->epc != epcNO && ir->compress[j][i] != 0)) + if (box[j][i] != 0 || ir.deform[j][i] != 0 || + (ir.epc != epcNO && ir.compress[j][i] != 0)) { if (nc[j] > 1 && nc[i] == 1) { @@ -380,15 +387,15 @@ static float comm_cost_est(real limit, real cutoff, for (i = 0; i < DIM; i++) { - bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i]; + bt[i] = ddbox.box_size[i]*ddbox.skew_fac[i]; /* Without PBC and with 2 cells, there are no lower limits on the cell size */ - if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit) + if (!(i >= ddbox.npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit) { return -1; } /* With PBC, check if the cut-off fits in nc[i]-1 cells */ - if (i < ddbox->npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff) + if (i < ddbox.npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff) { return -1; } @@ -417,7 +424,7 @@ static float comm_cost_est(real limit, real cutoff, } } - if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype)) + if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype)) { /* Check the PME grid restrictions. * Currently these can only be invalid here with too few grid lines @@ -432,7 +439,7 @@ static float comm_cost_est(real limit, real cutoff, */ bool useThreads = true; bool errorsAreFatal = false; - if (!gmx_pme_check_restrictions(ir->pme_order, ir->nkx, ir->nky, ir->nkz, + if (!gmx_pme_check_restrictions(ir.pme_order, ir.nkx, ir.nky, ir.nkz, npme_x, useThreads, errorsAreFatal)) { return -1; @@ -497,28 +504,28 @@ static float comm_cost_est(real limit, real cutoff, /* Grid overlap communication */ if (npme[i] > 1) { - nk = (i == 0 ? ir->nkx : ir->nky); - overlap = (nk % npme[i] == 0 ? ir->pme_order-1 : ir->pme_order); + nk = (i == 0 ? ir.nkx : ir.nky); + overlap = (nk % npme[i] == 0 ? ir.pme_order-1 : ir.pme_order); temp = npme[i]; temp *= overlap; - temp *= ir->nkx; - temp *= ir->nky; - temp *= ir->nkz; + temp *= ir.nkx; + temp *= ir.nky; + temp *= ir.nkz; temp /= nk; comm_pme += temp; -/* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */ +/* Old line comm_pme += npme[i]*overlap*ir.nkx*ir.nky*ir.nkz/nk; */ } } - comm_pme += comm_pme_cost_vol(npme[YY], ir->nky, ir->nkz, ir->nkx); - comm_pme += comm_pme_cost_vol(npme[XX], ir->nkx, ir->nky, ir->nkz); + comm_pme += comm_pme_cost_vol(npme[YY], ir.nky, ir.nkz, ir.nkx); + comm_pme += comm_pme_cost_vol(npme[XX], ir.nkx, ir.nky, ir.nkz); /* Add cost of pbc_dx for bondeds */ cost_pbcdx = 0; - if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY)) + if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir.ePBC != epbcXY)) { - if ((ddbox->tric_dir[XX] && nc[XX] == 1) || - (ddbox->tric_dir[YY] && nc[YY] == 1)) + if ((ddbox.tric_dir[XX] && nc[XX] == 1) || + (ddbox.tric_dir[YY] && nc[YY] == 1)) { cost_pbcdx = pbcdxr*pbcdx_tric_fac; } @@ -540,17 +547,20 @@ static float comm_cost_est(real limit, real cutoff, return 3*natoms*(comm_vol + cost_pbcdx) + comm_pme; } -/*! \brief Assign penalty factors to possible domain decompositions, based on the estimated communication costs. */ +/*! \brief Assign penalty factors to possible domain decompositions, + * based on the estimated communication costs. */ static void assign_factors(const real limit, const bool request1D, const real cutoff, - const matrix box, const gmx_ddbox_t *ddbox, - int natoms, const t_inputrec *ir, + const matrix box, const gmx_ddbox_t &ddbox, + int natoms, const t_inputrec &ir, float pbcdxr, int npme, int ndiv, const int *div, const int *mdiv, - ivec ir_try, ivec opt) + gmx::IVec *irTryPtr, + gmx::IVec *opt) { - int x, y, i; - float ce; + int x, y, i; + float ce; + gmx::IVec &ir_try = *irTryPtr; if (ndiv == 0) { @@ -565,12 +575,12 @@ static void assign_factors(const real limit, const bool request1D, ce = comm_cost_est(limit, cutoff, box, ddbox, natoms, ir, pbcdxr, npme, ir_try); - if (ce >= 0 && (opt[XX] == 0 || + if (ce >= 0 && ((*opt)[XX] == 0 || ce < comm_cost_est(limit, cutoff, box, ddbox, natoms, ir, pbcdxr, - npme, opt))) + npme, *opt))) { - copy_ivec(ir_try, opt); + *opt = ir_try; } return; @@ -596,7 +606,7 @@ static void assign_factors(const real limit, const bool request1D, /* recurse */ assign_factors(limit, request1D, cutoff, box, ddbox, natoms, ir, pbcdxr, npme, - ndiv-1, div+1, mdiv+1, ir_try, opt); + ndiv-1, div+1, mdiv+1, irTryPtr, opt); for (i = 0; i < mdiv[0]-x-y; i++) { @@ -614,38 +624,45 @@ static void assign_factors(const real limit, const bool request1D, } } -/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */ -static real optimize_ncells(const gmx::MDLogger &mdlog, - const int nnodes_tot, const int npme_only, - const bool request1DAnd1Pulse, - const bool bDynLoadBal, const real dlb_scale, - const gmx_mtop_t *mtop, - const matrix box, const gmx_ddbox_t *ddbox, - const t_inputrec *ir, - const DDSystemInfo &systemInfo, - ivec nc) +/*! \brief Determine the optimal distribution of DD cells for the + * simulation system and number of MPI ranks + * + * \returns The optimal grid cell choice. The latter will contain all + * zeros if no valid cell choice exists. */ +static gmx::IVec +optimizeDDCells(const gmx::MDLogger &mdlog, + const int numRanksRequested, + const int numPmeOnlyRanks, + const real cellSizeLimit, + const bool request1DAnd1Pulse, + const gmx_mtop_t &mtop, + const matrix box, + const gmx_ddbox_t &ddbox, + const t_inputrec &ir, + const DDSystemInfo &systemInfo) { - int npp, npme, d, nmax; - double pbcdxr; - real limit; - ivec itry; + double pbcdxr; - limit = systemInfo.cellsizeLimit; - if (request1DAnd1Pulse) - { - limit = std::max(limit, ir->rlist); - } + const int numPPRanks = numRanksRequested - numPmeOnlyRanks; - npp = nnodes_tot - npme_only; - if (EEL_PME(ir->coulombtype)) - { - npme = (npme_only > 0 ? npme_only : npp); - } - else + GMX_LOG(mdlog.info).appendTextFormatted( + "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm", + numPPRanks, cellSizeLimit); + if (inhomogeneous_z(ir)) { - npme = 0; + GMX_LOG(mdlog.info).appendTextFormatted( + "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.", + eewg_names[ir.ewald_geometry]); } + + // For cost estimates, we need the number of ranks doing PME work, + // which is the number of PP ranks when not using separate + // PME-only ranks. + const int numRanksDoingPmeWork = (EEL_PME(ir.coulombtype) ? + ((numPmeOnlyRanks > 0) ? numPmeOnlyRanks : numPPRanks) : + 0); + if (systemInfo.haveInterDomainBondeds) { /* If we can skip PBC for distance calculations in plain-C bondeds, @@ -653,51 +670,21 @@ static real optimize_ncells(const gmx::MDLogger &mdlog, * Here we ignore SIMD bondeds as they always do (fast) PBC. */ count_bonded_distances(mtop, ir, &pbcdxr, nullptr); - pbcdxr /= static_cast(mtop->natoms); + pbcdxr /= static_cast(mtop.natoms); } else { /* Every molecule is a single charge group: no pbc required */ pbcdxr = 0; } - /* Add a margin for DLB and/or pressure scaling */ - if (bDynLoadBal) - { - if (dlb_scale >= 1.0) - { - gmx_fatal(FARGS, "The value for option -dds should be smaller than 1"); - } - GMX_LOG(mdlog.info).appendTextFormatted( - "Scaling the initial minimum size with 1/%g (option -dds) = %g", - dlb_scale, 1/dlb_scale); - limit /= dlb_scale; - } - else if (ir->epc != epcNO) - { - GMX_LOG(mdlog.info).appendTextFormatted( - "To account for pressure scaling, scaling the initial minimum size with %g", - DD_GRID_MARGIN_PRES_SCALE); - limit *= DD_GRID_MARGIN_PRES_SCALE; - } - - GMX_LOG(mdlog.info).appendTextFormatted( - "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm", - npp, limit); - - if (inhomogeneous_z(ir)) - { - GMX_LOG(mdlog.info).appendTextFormatted( - "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.", - eewg_names[ir->ewald_geometry]); - } - if (limit > 0) + if (cellSizeLimit > 0) { std::string maximumCells = "The maximum allowed number of cells is:"; - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { - nmax = static_cast(ddbox->box_size[d]*ddbox->skew_fac[d]/limit); - if (d >= ddbox->npbcdim && nmax < 2) + int nmax = static_cast(ddbox.box_size[d]*ddbox.skew_fac[d]/cellSizeLimit); + if (d >= ddbox.npbcdim && nmax < 2) { nmax = 2; } @@ -715,123 +702,254 @@ static real optimize_ncells(const gmx::MDLogger &mdlog, fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr); } - /* Decompose npp in factors */ + /* Decompose numPPRanks in factors */ std::vector div; std::vector mdiv; - factorize(npp, &div, &mdiv); + factorize(numPPRanks, &div, &mdiv); - itry[XX] = 1; - itry[YY] = 1; - itry[ZZ] = 1; - clear_ivec(nc); - assign_factors(limit, request1DAnd1Pulse, - systemInfo.cutoff, box, ddbox, mtop->natoms, ir, pbcdxr, - npme, div.size(), div.data(), mdiv.data(), itry, nc); + gmx::IVec itry = { 1, 1, 1 }; + gmx::IVec numDomains = { 0, 0, 0 }; + assign_factors(cellSizeLimit, request1DAnd1Pulse, + systemInfo.cutoff, box, ddbox, mtop.natoms, ir, pbcdxr, + numRanksDoingPmeWork, div.size(), div.data(), mdiv.data(), &itry, &numDomains); - return limit; + return numDomains; } -DDGridSetup -dd_choose_grid(const gmx::MDLogger &mdlog, - const t_commrec *cr, - const t_inputrec *ir, - const gmx_mtop_t *mtop, - const matrix box, const gmx_ddbox_t *ddbox, - const int numPmeRanksRequested, - const bool request1DAnd1Pulse, - const bool bDynLoadBal, const real dlb_scale, - const DDSystemInfo &systemInfo) +real +getDDGridSetupCellSizeLimit(const gmx::MDLogger &mdlog, + const bool request1DAnd1Pulse, + const bool bDynLoadBal, + const real dlb_scale, + const t_inputrec &ir, + const real systemInfoCellSizeLimit) { - DDGridSetup ddGridSetup; + real cellSizeLimit = systemInfoCellSizeLimit; + if (request1DAnd1Pulse) + { + cellSizeLimit = std::max(cellSizeLimit, ir.rlist); + } - if (MASTER(cr)) + /* Add a margin for DLB and/or pressure scaling */ + if (bDynLoadBal) { - int64_t nnodes_div = cr->nnodes; - if (EEL_PME(ir->coulombtype)) + if (dlb_scale >= 1.0) { - if (numPmeRanksRequested > 0) - { - if (numPmeRanksRequested >= cr->nnodes) - { - gmx_fatal(FARGS, - "Cannot have %d separate PME ranks with just %d total ranks", - numPmeRanksRequested, cr->nnodes); - } - - /* If the user purposely selected the number of PME nodes, - * only check for large primes in the PP node count. - */ - nnodes_div -= numPmeRanksRequested; - } + gmx_fatal(FARGS, "The value for option -dds should be smaller than 1"); } - else + GMX_LOG(mdlog.info).appendTextFormatted( + "Scaling the initial minimum size with 1/%g (option -dds) = %g", + dlb_scale, 1/dlb_scale); + cellSizeLimit /= dlb_scale; + } + else if (ir.epc != epcNO) + { + GMX_LOG(mdlog.info).appendTextFormatted( + "To account for pressure scaling, scaling the initial minimum size with %g", + DD_GRID_MARGIN_PRES_SCALE); + cellSizeLimit *= DD_GRID_MARGIN_PRES_SCALE; + } + + return cellSizeLimit; +} +void +checkForValidRankCountRequests(const int numRanksRequested, + const bool usingPme, + const int numPmeRanksRequested) +{ + int numPPRanksRequested = numRanksRequested; + if (usingPme) + { + numPPRanksRequested -= numPmeRanksRequested; + if (numPmeRanksRequested > numPPRanksRequested) { - ddGridSetup.numPmeOnlyRanks = 0; + gmx_fatal(FARGS, + "Cannot have %d separate PME ranks with only %d PP ranks, choose fewer or no separate PME ranks", + numPmeRanksRequested, numPPRanksRequested); } + } - if (nnodes_div > 12) + // Once the rank count is large enough, it becomes worth + // suggesting improvements to the user. + const int minPPRankCountToCheckForLargePrimeFactors = 13; + if (numPPRanksRequested >= minPPRankCountToCheckForLargePrimeFactors) + { + const int largestDivisor = largest_divisor(numPPRanksRequested); + /* Check if the largest divisor is more than numPPRanks ^ (2/3) */ + if (largestDivisor*largestDivisor*largestDivisor > + numPPRanksRequested*numPPRanksRequested) { - const int64_t ldiv = largest_divisor(nnodes_div); - /* Check if the largest divisor is more than nnodes^2/3 */ - if (ldiv*ldiv*ldiv > nnodes_div*nnodes_div) - { - gmx_fatal(FARGS, "The number of ranks you selected (%" PRId64 ") contains a large prime factor %" PRId64 ". In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.", - nnodes_div, ldiv); - } + gmx_fatal(FARGS, "The number of ranks selected for particle-particle work (%d) " + "contains a large prime factor %d. In most cases this will lead to " + "bad performance. Choose a number with smaller prime factors or " + "set the decomposition (option -dd) manually.", + numPPRanksRequested, largestDivisor); } + } +} + +/*! \brief Return the number of PME-only ranks used by the simulation + * + * If the user did not choose a number, then decide for them. */ +static int +getNumPmeOnlyRanksToUse(const gmx::MDLogger &mdlog, + const DomdecOptions &options, + const gmx_mtop_t &mtop, + const t_inputrec &ir, + const matrix box, + const int numRanksRequested) +{ + int numPmeOnlyRanks; + const char *extraMessage = ""; - if (EEL_PME(ir->coulombtype)) + if (options.numCells[XX] > 0) + { + if (options.numPmeRanks >= 0) + { + // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0 + numPmeOnlyRanks = options.numPmeRanks; + } + else + { + // When the DD grid is set explicitly and -npme is set to auto, + // don't use PME ranks. We check later if the DD grid is + // compatible with the total number of ranks. + numPmeOnlyRanks = 0; + } + } + else + { + if (!EEL_PME(ir.coulombtype)) + { + // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0 + numPmeOnlyRanks = 0; + } + else { - if (numPmeRanksRequested < 0) + if (options.numPmeRanks >= 0) + { + numPmeOnlyRanks = options.numPmeRanks; + } + else { - /* Use PME nodes when the number of nodes is more than 16 */ - if (cr->nnodes <= 18) + // Controls the automated choice of when to use separate PME-only ranks. + const int minRankCountToDefaultToSeparatePmeRanks = 19; + + if (numRanksRequested < minRankCountToDefaultToSeparatePmeRanks) { - ddGridSetup.numPmeOnlyRanks = 0; - GMX_LOG(mdlog.info).appendTextFormatted( - "Using %d separate PME ranks, as there are too few total\n" - " ranks for efficient splitting", - ddGridSetup.numPmeOnlyRanks); + numPmeOnlyRanks = 0; + extraMessage = ", as there are too few total\n" + " ranks for efficient splitting"; } else { - ddGridSetup.numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, cr->nnodes); - GMX_LOG(mdlog.info).appendTextFormatted( - "Using %d separate PME ranks, as guessed by mdrun", - ddGridSetup.numPmeOnlyRanks); + numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, numRanksRequested); + extraMessage = ", as guessed by mdrun"; } } - else + } + + } + GMX_RELEASE_ASSERT(numPmeOnlyRanks <= numRanksRequested, + "Cannot have more PME ranks than total ranks"); + if (EEL_PME(ir.coulombtype)) + { + GMX_LOG(mdlog.info).appendTextFormatted + ("Using %d separate PME ranks%s", numPmeOnlyRanks, extraMessage); + } + + return numPmeOnlyRanks; +} + +/*! \brief Sets the order of the DD dimensions, returns the number of DD dimensions */ +static int set_dd_dim(const gmx::IVec &numDDCells, + const DDSettings &ddSettings, + ivec *dims) +{ + int ndim = 0; + if (ddSettings.useDDOrderZYX) + { + /* Decomposition order z,y,x */ + for (int dim = DIM - 1; dim >= 0; dim--) + { + if (numDDCells[dim] > 1) + { + (*dims)[ndim++] = dim; + } + } + } + else + { + /* Decomposition order x,y,z */ + for (int dim = 0; dim < DIM; dim++) + { + if (numDDCells[dim] > 1) { - /* We checked above that nPmeRanks is a valid number */ - ddGridSetup.numPmeOnlyRanks = numPmeRanksRequested; - GMX_LOG(mdlog.info).appendTextFormatted( - "Using %d separate PME ranks", ddGridSetup.numPmeOnlyRanks); - // TODO: there was a ", per user request" note here, but it's not correct anymore, - // as with GPUs decision about nPmeRanks can be made in runner() as well. - // Consider a single spot for setting nPmeRanks. + (*dims)[ndim++] = dim; } } + } - ddGridSetup.cellsizeLimit = - optimize_ncells(mdlog, cr->nnodes, ddGridSetup.numPmeOnlyRanks, - request1DAnd1Pulse, - bDynLoadBal, dlb_scale, - mtop, box, ddbox, ir, - systemInfo, - ddGridSetup.numDomains); + if (ndim == 0) + { + /* Set dim[0] to avoid extra checks on ndim in several places */ + (*dims)[0] = XX; } - /* Communicate the information set by the master to all nodes */ - gmx_bcast(sizeof(ddGridSetup.numDomains), ddGridSetup.numDomains, cr); - if (EEL_PME(ir->coulombtype)) + return ndim; +} + +DDGridSetup +getDDGridSetup(const gmx::MDLogger &mdlog, + const t_commrec *cr, + const int numRanksRequested, + const DomdecOptions &options, + const DDSettings &ddSettings, + const DDSystemInfo &systemInfo, + const real cellSizeLimit, + const gmx_mtop_t &mtop, + const t_inputrec &ir, + const matrix box, + gmx::ArrayRef xGlobal, + gmx_ddbox_t *ddbox) +{ + int numPmeOnlyRanks = getNumPmeOnlyRanksToUse(mdlog, options, mtop, ir, box, numRanksRequested); + + gmx::IVec numDomains; + if (options.numCells[XX] > 0) { - gmx_bcast(sizeof(ddGridSetup.numPmeOnlyRanks), &ddGridSetup.numPmeOnlyRanks, cr); + numDomains = gmx::IVec(options.numCells); + const ivec numDomainsLegacyIvec = { numDomains[XX], numDomains[YY], numDomains[ZZ] }; + set_ddbox_cr(*cr, &numDomainsLegacyIvec, ir, box, xGlobal, ddbox); } else { - ddGridSetup.numPmeOnlyRanks = 0; + set_ddbox_cr(*cr, nullptr, ir, box, xGlobal, ddbox); + + if (MASTER(cr)) + { + numDomains = + optimizeDDCells(mdlog, numRanksRequested, numPmeOnlyRanks, + cellSizeLimit, + ddSettings.request1DAnd1Pulse, + mtop, box, *ddbox, ir, + systemInfo); + } + } + + /* Communicate the information set by the master to all ranks */ + gmx_bcast(sizeof(numDomains), numDomains, cr); + if (EEL_PME(ir.coulombtype)) + { + gmx_bcast(sizeof(numPmeOnlyRanks), &numPmeOnlyRanks, cr); } + DDGridSetup ddGridSetup; + ddGridSetup.numPmeOnlyRanks = numPmeOnlyRanks; + ddGridSetup.numDomains[XX] = numDomains[XX]; + ddGridSetup.numDomains[YY] = numDomains[YY]; + ddGridSetup.numDomains[ZZ] = numDomains[ZZ]; + ddGridSetup.numDDDimensions = set_dd_dim(numDomains, ddSettings, &ddGridSetup.ddDimensions); + return ddGridSetup; } diff --git a/src/gromacs/domdec/domdec_setup.h b/src/gromacs/domdec/domdec_setup.h index f4f4dc7e5e..feb0a73f00 100644 --- a/src/gromacs/domdec/domdec_setup.h +++ b/src/gromacs/domdec/domdec_setup.h @@ -44,6 +44,7 @@ #include "gromacs/math/vec.h" +struct DDSettings; struct DDSystemInfo; struct gmx_ddbox_t; struct gmx_mtop_t; @@ -52,11 +53,13 @@ struct t_inputrec; namespace gmx { +struct DomdecOptions; class MDLogger; +template class ArrayRef; } // namespace /*! \brief Returns the volume fraction of the system that is communicated */ -real comm_box_frac(const ivec dd_nc, real cutoff, const gmx_ddbox_t *ddbox); +real comm_box_frac(const gmx::IVec &dd_nc, real cutoff, const gmx_ddbox_t &ddbox); /*! \internal * \brief Describes the DD grid setup @@ -70,26 +73,49 @@ struct DDGridSetup int numPmeOnlyRanks = 0; //! The number of domains along each dimension ivec numDomains = { 0, 0, 0 }; - //! The mininum required cell size in nm - real cellsizeLimit = 0; //! The number of dimensions which we decompose in domains int numDDDimensions = 0; //! The domain decomposition dimensions, the first numDDDimensions entries are used ivec ddDimensions = { -1, -1, -1 }; }; -/*! \brief Determines the optimal DD cell setup dd->nc and possibly npmenodes - * for the system. +/*! \brief Checks that requests for PP and PME ranks honor basic expectations + * + * Issues a fatal error if there are more PME ranks than PP, or if the + * count of PP ranks has a prime factor that is too large to be likely + * to have good performance. */ +void +checkForValidRankCountRequests(int numRanksRequested, + bool usingPme, + int numPmeRanksRequested); + +/*! \brief Return the minimum cell size (in nm) required for DD */ +real +getDDGridSetupCellSizeLimit(const gmx::MDLogger &mdlog, + bool request1DAnd1Pulse, + bool bDynLoadBal, + real dlb_scale, + const t_inputrec &ir, + real systemInfoCellSizeLimit); + +/*! \brief Determines the DD grid setup + * + * Either implements settings required by the user, or otherwise + * chooses estimated optimal number of separate PME ranks and DD grid + * cell setup, DD cell size limits, and the initial ddbox. */ DDGridSetup -dd_choose_grid(const gmx::MDLogger &mdlog, - const t_commrec *cr, - const t_inputrec *ir, - const gmx_mtop_t *mtop, - const matrix box, const gmx_ddbox_t *ddbox, - int numPmeRanksRequested, - bool request1DAnd1Pulse, - bool bDynLoadBal, real dlb_scale, - const DDSystemInfo &systemInfo); +getDDGridSetup(const gmx::MDLogger &mdlog, + const t_commrec *cr, + int numRanksRequested, + const gmx::DomdecOptions &options, + const DDSettings &ddSettings, + const DDSystemInfo &systemInfo, + real cellSizeLimit, + const gmx_mtop_t &mtop, + const t_inputrec &ir, + const matrix box, + gmx::ArrayRef xGlobal, + gmx_ddbox_t *ddbox); #endif diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp index ffdc4da528..a3a2c3f05f 100644 --- a/src/gromacs/gmxpreprocess/grompp.cpp +++ b/src/gromacs/gmxpreprocess/grompp.cpp @@ -2377,7 +2377,7 @@ int gmx_grompp(int argc, char *argv[]) if (EEL_PME(ir->coulombtype)) { - float ratio = pme_load_estimate(&sys, ir, state.box); + float ratio = pme_load_estimate(sys, *ir, state.box); fprintf(stderr, "Estimate for the relative computational load of the PME mesh part: %.2f\n", ratio); /* With free energy we might need to do PME both for the A and B state * charges. This will double the cost, but the optimal performance will diff --git a/src/gromacs/mdlib/perf_est.cpp b/src/gromacs/mdlib/perf_est.cpp index a66b119536..5ba7a740d5 100644 --- a/src/gromacs/mdlib/perf_est.cpp +++ b/src/gromacs/mdlib/perf_est.cpp @@ -153,7 +153,7 @@ static double simd_cycle_factor(gmx_bool bUseSIMD) return simd_cycle_no_simd/speedup; } -void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, +void count_bonded_distances(const gmx_mtop_t &mtop, const t_inputrec &ir, double *ndistance_c, double *ndistance_simd) { gmx_bool bExcl; @@ -166,25 +166,25 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, gmx_bool bSimdBondeds = FALSE; #endif - bExcl = (ir->cutoff_scheme == ecutsGROUP && inputrecExclForces(ir) - && !EEL_FULL(ir->coulombtype)); + bExcl = (ir.cutoff_scheme == ecutsGROUP && inputrecExclForces(&ir) + && !EEL_FULL(ir.coulombtype)); if (bSimdBondeds) { /* We only have SIMD versions of these bondeds without energy and * without shift-forces, we take that into account here. */ - if (ir->nstcalcenergy > 0) + if (ir.nstcalcenergy > 0) { - nonsimd_step_frac = 1.0/ir->nstcalcenergy; + nonsimd_step_frac = 1.0/ir.nstcalcenergy; } else { nonsimd_step_frac = 0; } - if (ir->epc != epcNO && 1.0/ir->nstpcouple > nonsimd_step_frac) + if (ir.epc != epcNO && 1.0/ir.nstpcouple > nonsimd_step_frac) { - nonsimd_step_frac = 1.0/ir->nstpcouple; + nonsimd_step_frac = 1.0/ir.nstpcouple; } } else @@ -197,9 +197,9 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, */ ndtot_c = 0; ndtot_simd = 0; - for (const gmx_molblock_t &molb : mtop->molblock) + for (const gmx_molblock_t &molb : mtop.molblock) { - const gmx_moltype_t *molt = &mtop->moltype[molb.type]; + const gmx_moltype_t *molt = &mtop.moltype[molb.type]; for (ftype = 0; ftype < F_NRE; ftype++) { int nbonds; @@ -259,7 +259,7 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, } } -static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir, +static void pp_verlet_load(const gmx_mtop_t &mtop, const t_inputrec &ir, const matrix box, int *nq_tot, int *nlj_tot, double *cost_pp, @@ -280,17 +280,17 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir, const real nbnxn_refkernel_fac = 8.0; #endif - bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT); + bQRF = (EEL_RF(ir.coulombtype) || ir.coulombtype == eelCUT); - gmx::ArrayRef iparams = mtop->ffparams.iparams; - atnr = mtop->ffparams.atnr; + gmx::ArrayRef iparams = mtop.ffparams.iparams; + atnr = mtop.ffparams.atnr; nqlj = 0; nq = 0; *bChargePerturbed = FALSE; *bTypePerturbed = FALSE; - for (const gmx_molblock_t &molb : mtop->molblock) + for (const gmx_molblock_t &molb : mtop.molblock) { - const gmx_moltype_t *molt = &mtop->moltype[molb.type]; + const gmx_moltype_t *molt = &mtop.moltype[molb.type]; const t_atom *atom = molt->atoms.atom; for (a = 0; a < molt->atoms.nr; a++) { @@ -317,7 +317,7 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir, } } - nlj = mtop->natoms - nqlj - nq; + nlj = mtop.natoms - nqlj - nq; *nq_tot = nqlj + nq; *nlj_tot = nqlj + nlj; @@ -331,27 +331,27 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir, #else j_cluster_size = 4; #endif - r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop->natoms/det(box)); + r_eff = ir.rlist + nbnxn_get_rlist_effective_inc(j_cluster_size, mtop.natoms/det(box)); /* The average number of pairs per atom */ - nppa = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop->natoms/det(box); + nppa = 0.5*4/3*M_PI*r_eff*r_eff*r_eff*mtop.natoms/det(box); if (debug) { fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f pairs per atom %.1f\n", - nqlj, nq, nlj, ir->rlist, r_eff, nppa); + nqlj, nq, nlj, ir.rlist, r_eff, nppa); } /* Determine the cost per pair interaction */ c_qlj = (bQRF ? c_nbnxn_qrf_lj : c_nbnxn_qexp_lj); c_q = (bQRF ? c_nbnxn_qrf : c_nbnxn_qexp); c_lj = c_nbnxn_lj; - if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype)) + if (ir.vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir.vdwtype)) { c_qlj += c_nbnxn_ljexp_add; c_lj += c_nbnxn_ljexp_add; } - if (EVDW_PME(ir->vdwtype) && ir->ljpme_combination_rule == eljpmeLB) + if (EVDW_PME(ir.vdwtype) && ir.ljpme_combination_rule == eljpmeLB) { /* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */ c_qlj *= nbnxn_refkernel_fac; @@ -367,7 +367,7 @@ static void pp_verlet_load(const gmx_mtop_t *mtop, const t_inputrec *ir, *cost_pp *= simd_cycle_factor(bHaveSIMD); } -float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir, +float pme_load_estimate(const gmx_mtop_t &mtop, const t_inputrec &ir, const matrix box) { int nq_tot, nlj_tot; @@ -401,31 +401,31 @@ float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir, cost_solve = 0; int gridNkzFactor = int{ - (ir->nkz + 1)/2 + (ir.nkz + 1)/2 }; - if (EEL_PME(ir->coulombtype)) + if (EEL_PME(ir.coulombtype)) { - double grid = ir->nkx*ir->nky*gridNkzFactor; + double grid = ir.nkx*ir.nky*gridNkzFactor; - int f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1); + int f = ((ir.efep != efepNO && bChargePerturbed) ? 2 : 1); cost_redist += c_pme_redist*nq_tot; - cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir->pme_order); + cost_spread += f*c_pme_spread*nq_tot*gmx::power3(ir.pme_order); cost_fft += f*c_pme_fft*grid*std::log(grid)/std::log(2.0); cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD); } - if (EVDW_PME(ir->vdwtype)) + if (EVDW_PME(ir.vdwtype)) { - double grid = ir->nkx*ir->nky*gridNkzFactor; + double grid = ir.nkx*ir.nky*gridNkzFactor; - int f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1); - if (ir->ljpme_combination_rule == eljpmeLB) + int f = ((ir.efep != efepNO && bTypePerturbed) ? 2 : 1); + if (ir.ljpme_combination_rule == eljpmeLB) { /* LB combination rule: we have 7 mesh terms */ f *= 7; } cost_redist += c_pme_redist*nlj_tot; - cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir->pme_order); + cost_spread += f*c_pme_spread*nlj_tot*gmx::power3(ir.pme_order); cost_fft += f*c_pme_fft*2*grid*std::log(grid)/std::log(2.0); cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD); } diff --git a/src/gromacs/mdlib/perf_est.h b/src/gromacs/mdlib/perf_est.h index de9f9abca1..16fad2a057 100644 --- a/src/gromacs/mdlib/perf_est.h +++ b/src/gromacs/mdlib/perf_est.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2010,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2010,2014,2015,2016,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -43,7 +43,7 @@ struct gmx_mtop_t; struct t_inputrec; -void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, +void count_bonded_distances(const gmx_mtop_t &mtop, const t_inputrec &ir, double *ndistance_c, double *ndistance_simd); /* Count the number of distance calculations in bonded interactions, * separately for plain-C and SIMD bonded functions. @@ -51,7 +51,7 @@ void count_bonded_distances(const gmx_mtop_t *mtop, const t_inputrec *ir, * It is allowed to pass NULL for the last two arguments. */ -float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir, +float pme_load_estimate(const gmx_mtop_t &mtop, const t_inputrec &ir, const matrix box); /* Returns an estimate for the relative load of the PME mesh calculation * in the total force calculation. diff --git a/src/gromacs/mdtypes/commrec.h b/src/gromacs/mdtypes/commrec.h index 9eeb07d79f..b8c68de92d 100644 --- a/src/gromacs/mdtypes/commrec.h +++ b/src/gromacs/mdtypes/commrec.h @@ -150,8 +150,9 @@ static bool inline havePPDomainDecomposition(const t_commrec *cr) /* NOTE: It would be better to use cr->dd->nnodes, but we do not want * to pull in a dependency on domdec.h into this file. */ - return (cr != nullptr && - cr->dd != nullptr && + GMX_ASSERT(cr != nullptr, "Invalid call of havePPDomainDecomposition before commrec is made"); + GMX_ASSERT(cr->npmenodes >= 0, "Invalid call of havePPDomainDecomposition before MPMD automated decomposition was chosen."); + return (cr->dd != nullptr && cr->nnodes - cr->npmenodes > 1); } diff --git a/src/gromacs/tools/tune_pme.cpp b/src/gromacs/tools/tune_pme.cpp index 5771c25724..3d9ab118d7 100644 --- a/src/gromacs/tools/tune_pme.cpp +++ b/src/gromacs/tools/tune_pme.cpp @@ -2019,7 +2019,7 @@ static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb) *rcoulomb = ir->rcoulomb; /* Return the estimate for the number of PME nodes */ - float npme = pme_load_estimate(&mtop, ir, state.box); + float npme = pme_load_estimate(mtop, *ir, state.box); return npme; } -- 2.22.0