From 27018d206525b2a6a74f6a8e64f1c1c86d46c6ce Mon Sep 17 00:00:00 2001 From: Dmitry Morozov Date: Wed, 26 May 2021 15:01:42 +0000 Subject: [PATCH] Moved PME-only ranks check to domain decomposition setup --- src/gromacs/domdec/builder.h | 6 +- src/gromacs/domdec/domdec.cpp | 66 +++++++--- src/gromacs/domdec/domdec_setup.cpp | 187 ++++++++++++++++++---------- src/gromacs/domdec/domdec_setup.h | 57 ++++++--- src/gromacs/mdrun/runner.cpp | 60 +-------- 5 files changed, 221 insertions(+), 155 deletions(-) diff --git a/src/gromacs/domdec/builder.h b/src/gromacs/domdec/builder.h index 1ca7408dd3..aadd54cc59 100644 --- a/src/gromacs/domdec/builder.h +++ b/src/gromacs/domdec/builder.h @@ -63,6 +63,7 @@ class LocalAtomSetManager; class RangePartitioning; struct DomdecOptions; struct MdrunOptions; +struct MDModulesNotifiers; template class ArrayRef; @@ -83,11 +84,14 @@ public: const MdrunOptions& mdrunOptions, const gmx_mtop_t& mtop, const t_inputrec& ir, + const MDModulesNotifiers& notifiers, const matrix box, ArrayRef updateGroupingPerMoleculeType, bool useUpdateGroups, real maxUpdateGroupRadius, - ArrayRef xGlobal); + ArrayRef xGlobal, + bool useGpuForNonbonded, + bool useGpuForPme); //! Destructor ~DomainDecompositionBuilder(); //! Build the resulting DD manager diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 7ebbaaf796..86e75b748a 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -63,6 +63,7 @@ #include "gromacs/domdec/localtopologychecker.h" #include "gromacs/domdec/options.h" #include "gromacs/domdec/partition.h" +#include "gromacs/ewald/pme.h" #include "gromacs/domdec/reversetopology.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" @@ -77,6 +78,7 @@ #include "gromacs/mdlib/updategroups.h" #include "gromacs/mdlib/vcm.h" #include "gromacs/mdlib/vsite.h" +#include "gromacs/mdrun/mdmodules.h" #include "gromacs/mdtypes/commrec.h" #include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" @@ -100,6 +102,7 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/logger.h" +#include "gromacs/utility/mdmodulesnotifiers.h" #include "gromacs/utility/real.h" #include "gromacs/utility/smalloc.h" #include "gromacs/utility/strconvert.h" @@ -2836,11 +2839,14 @@ public: const MdrunOptions& mdrunOptions, const gmx_mtop_t& mtop, const t_inputrec& ir, + const MDModulesNotifiers& notifiers, const matrix box, ArrayRef updateGroupingPerMoleculeType, bool useUpdateGroups, real maxUpdateGroupRadius, - ArrayRef xGlobal); + ArrayRef xGlobal, + bool useGpuForNonbonded, + bool useGpuForPme); //! Build the resulting DD manager gmx_domdec_t* build(LocalAtomSetManager* atomSets); @@ -2857,6 +2863,8 @@ public: const gmx_mtop_t& mtop_; //! User input values from the tpr file const t_inputrec& ir_; + //! MdModules object + const MDModulesNotifiers& notifiers_; //! } //! Internal objects used in constructing DD @@ -2886,12 +2894,15 @@ DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog, const MdrunOptions& mdrunOptions, const gmx_mtop_t& mtop, const t_inputrec& ir, + const MDModulesNotifiers& notifiers, const matrix box, ArrayRef updateGroupingPerMoleculeType, const bool useUpdateGroups, const real maxUpdateGroupRadius, - ArrayRef xGlobal) : - mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir) + ArrayRef xGlobal, + bool useGpuForNonbonded, + bool useGpuForPme) : + mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir), notifiers_(notifiers) { GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator); @@ -2917,8 +2928,18 @@ DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog, const int numRanksRequested = cr_->sizeOfDefaultCommunicator; const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0); - checkForValidRankCountRequests( - numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors); + + + /* Checks for ability to use PME-only ranks */ + auto separatePmeRanksPermitted = checkForSeparatePmeRanks( + notifiers_, options_, numRanksRequested, useGpuForNonbonded, useGpuForPme); + + /* Checks for validity of requested Ranks setup */ + checkForValidRankCountRequests(numRanksRequested, + EEL_PME(ir_.coulombtype) | EVDW_PME(ir_.vdwtype), + options_.numPmeRanks, + separatePmeRanksPermitted, + checkForLargePrimeFactors); // DD grid setup uses a more different cell size limit for // automated setup than the one in systemInfo_. The latter is used @@ -2939,6 +2960,7 @@ DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog, gridSetupCellsizeLimit, mtop_, ir_, + separatePmeRanksPermitted, box, xGlobal, &ddbox_); @@ -3003,18 +3025,34 @@ gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomS return dd; } -DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog, - t_commrec* cr, - const DomdecOptions& options, - const MdrunOptions& mdrunOptions, - const gmx_mtop_t& mtop, - const t_inputrec& ir, - const matrix box, +DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog, + t_commrec* cr, + const DomdecOptions& options, + const MdrunOptions& mdrunOptions, + const gmx_mtop_t& mtop, + const t_inputrec& ir, + const MDModulesNotifiers& notifiers, + const matrix box, ArrayRef updateGroupingPerMoleculeType, const bool useUpdateGroups, const real maxUpdateGroupRadius, - ArrayRef xGlobal) : - impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, xGlobal)) + ArrayRef xGlobal, + const bool useGpuForNonbonded, + const bool useGpuForPme) : + impl_(new Impl(mdlog, + cr, + options, + mdrunOptions, + mtop, + ir, + notifiers, + box, + updateGroupingPerMoleculeType, + useUpdateGroups, + maxUpdateGroupRadius, + xGlobal, + useGpuForNonbonded, + useGpuForPme)) { } diff --git a/src/gromacs/domdec/domdec_setup.cpp b/src/gromacs/domdec/domdec_setup.cpp index 38bfa5c591..f1e4442ba3 100644 --- a/src/gromacs/domdec/domdec_setup.cpp +++ b/src/gromacs/domdec/domdec_setup.cpp @@ -71,6 +71,7 @@ #include "gromacs/topology/topology.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/logger.h" +#include "gromacs/utility/mdmodulesnotifiers.h" #include "gromacs/utility/stringutil.h" #include "box.h" @@ -738,10 +739,61 @@ real getDDGridSetupCellSizeLimit(const gmx::MDLogger& mdlog, return cellSizeLimit; } -void checkForValidRankCountRequests(const int numRanksRequested, - const bool usingPme, - const int numPmeRanksRequested, - const bool checkForLargePrimeFactors) + +gmx::SeparatePmeRanksPermitted checkForSeparatePmeRanks(const gmx::MDModulesNotifiers& notifiers, + const DomdecOptions& options, + int numRanksRequested, + bool useGpuForNonbonded, + bool useGpuForPme) +{ + gmx::SeparatePmeRanksPermitted separatePmeRanksPermitted; + + /* Permit MDModules to notify whether they want to use PME-only ranks */ + notifiers.simulationSetupNotifier_.notify(&separatePmeRanksPermitted); + + /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can + * improve performance with many threads per GPU, since our OpenMP + * scaling is bad, but it's difficult to automate the setup. + */ + if (useGpuForNonbonded && options.numPmeRanks < 0) + { + separatePmeRanksPermitted.disablePmeRanks( + "PME-only CPU ranks are not automatically used when " + "non-bonded interactions are computed on GPUs"); + } + + /* If GPU is used for PME then only 1 PME rank is permitted */ + if (useGpuForPme && (options.numPmeRanks < 0 || options.numPmeRanks > 1)) + { + separatePmeRanksPermitted.disablePmeRanks( + "PME GPU decomposition is not supported, only one separate PME-only GPU rank " + "can be used"); + } + + // With explicit DD grid setup we do not automatically use PME-only ranks + if (options.numCells[XX] > 0 && options.numPmeRanks < 0) + { + separatePmeRanksPermitted.disablePmeRanks("explicit DD grid requested"); + } + + // Controls the automated choice of when to use separate PME-only ranks. + const int minRankCountToDefaultToSeparatePmeRanks = 19; + + // Check if we have enough total ranks to use PME-only ranks + if (numRanksRequested < minRankCountToDefaultToSeparatePmeRanks && options.numPmeRanks < 0) + { + separatePmeRanksPermitted.disablePmeRanks( + "there are too few total ranks for efficient splitting"); + } + + return separatePmeRanksPermitted; +} + +void checkForValidRankCountRequests(const int numRanksRequested, + const bool usingPme, + const int numPmeRanksRequested, + const gmx::SeparatePmeRanksPermitted& separatePmeRanksPermitted, + const bool checkForLargePrimeFactors) { int numPPRanksRequested = numRanksRequested; if (usingPme && numPmeRanksRequested > 0) @@ -757,6 +809,25 @@ void checkForValidRankCountRequests(const int numRanksRequested, } } + /* If simulation is not using PME but -npme > 0 then inform user and throw an error */ + if (!usingPme && numPmeRanksRequested > 0) + { + gmx_fatal(FARGS, + "The system does not use PME for electrostatics or LJ. Requested " + "-npme %d option is not viable.", + numPmeRanksRequested); + } + + /* If some parts of the code could not use PME-only ranks and + * user explicitly used mdrun -npme > 0 option then throw an error */ + if (!separatePmeRanksPermitted.permitSeparatePmeRanks() && numPmeRanksRequested > 0) + { + gmx_fatal(FARGS, + "Cannot have %d separate PME ranks because: %s", + numPmeRanksRequested, + separatePmeRanksPermitted.reasonsWhyDisabled().c_str()); + } + // Once the rank count is large enough, it becomes worth // suggesting improvements to the user. const int minPPRankCountToCheckForLargePrimeFactors = 13; @@ -780,69 +851,53 @@ void checkForValidRankCountRequests(const int numRanksRequested, /*! \brief Return the number of PME-only ranks used by the simulation * * If the user did not choose a number, then decide for them. */ -static int getNumPmeOnlyRanksToUse(const gmx::MDLogger& mdlog, - const DomdecOptions& options, - const gmx_mtop_t& mtop, - const t_inputrec& ir, - const matrix box, - const int numRanksRequested) +static int getNumPmeOnlyRanksToUse(const gmx::MDLogger& mdlog, + const gmx::DomdecOptions& options, + const gmx_mtop_t& mtop, + const t_inputrec& ir, + const gmx::SeparatePmeRanksPermitted& separatePmeRanksPermitted, + const matrix box, + const int numRanksRequested) { - int numPmeOnlyRanks = 0; - const char* extraMessage = ""; + int numPmeOnlyRanks = 0; - if (options.numCells[XX] > 0) + if (!(EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))) { - if (options.numPmeRanks >= 0) - { - // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0 - numPmeOnlyRanks = options.numPmeRanks; - } - else - { - // When the DD grid is set explicitly and -npme is set to auto, - // don't use PME ranks. We check later if the DD grid is - // compatible with the total number of ranks. - numPmeOnlyRanks = 0; - } + // System does not use PME for Electrostatics or LJ + numPmeOnlyRanks = 0; + GMX_LOG(mdlog.info) + .appendTextFormatted("The system does not use PME for electrostatics or LJ"); } else { - if (!EEL_PME(ir.coulombtype)) + std::string extraMessage; + + if (options.numPmeRanks >= 0) { - // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0 - numPmeOnlyRanks = 0; + numPmeOnlyRanks = options.numPmeRanks; + extraMessage += ", as requested with -npme option"; } else { - if (options.numPmeRanks >= 0) + /* Disable PME-only ranks if some parts of the code requested so and it's up to GROMACS to decide */ + if (!separatePmeRanksPermitted.permitSeparatePmeRanks()) { - numPmeOnlyRanks = options.numPmeRanks; + numPmeOnlyRanks = 0; + extraMessage += " because: " + separatePmeRanksPermitted.reasonsWhyDisabled(); } else { - // Controls the automated choice of when to use separate PME-only ranks. - const int minRankCountToDefaultToSeparatePmeRanks = 19; - - if (numRanksRequested < minRankCountToDefaultToSeparatePmeRanks) - { - numPmeOnlyRanks = 0; - extraMessage = - ", as there are too few total\n" - " ranks for efficient splitting"; - } - else - { - numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, numRanksRequested); - extraMessage = ", as guessed by mdrun"; - } + numPmeOnlyRanks = guess_npme(mdlog, mtop, ir, box, numRanksRequested); + extraMessage += ", as guessed by mdrun"; } } - } - GMX_RELEASE_ASSERT(numPmeOnlyRanks <= numRanksRequested, - "Cannot have more PME ranks than total ranks"); - if (EEL_PME(ir.coulombtype)) - { - GMX_LOG(mdlog.info).appendTextFormatted("Using %d separate PME ranks%s", numPmeOnlyRanks, extraMessage); + + GMX_RELEASE_ASSERT(numPmeOnlyRanks <= numRanksRequested, + "Cannot have more PME ranks than total ranks"); + + GMX_LOG(mdlog.info) + .appendTextFormatted( + "Using %d separate PME ranks%s", numPmeOnlyRanks, extraMessage.c_str()); } return numPmeOnlyRanks; @@ -884,21 +939,23 @@ static int set_dd_dim(const gmx::IVec& numDDCells, const DDSettings& ddSettings, return ndim; } -DDGridSetup getDDGridSetup(const gmx::MDLogger& mdlog, - DDRole ddRole, - MPI_Comm communicator, - const int numRanksRequested, - const DomdecOptions& options, - const DDSettings& ddSettings, - const DDSystemInfo& systemInfo, - const real cellSizeLimit, - const gmx_mtop_t& mtop, - const t_inputrec& ir, - const matrix box, - gmx::ArrayRef xGlobal, - gmx_ddbox_t* ddbox) +DDGridSetup getDDGridSetup(const gmx::MDLogger& mdlog, + DDRole ddRole, + MPI_Comm communicator, + const int numRanksRequested, + const DomdecOptions& options, + const DDSettings& ddSettings, + const DDSystemInfo& systemInfo, + const real cellSizeLimit, + const gmx_mtop_t& mtop, + const t_inputrec& ir, + const gmx::SeparatePmeRanksPermitted& separatePmeRanksPermitted, + const matrix box, + gmx::ArrayRef xGlobal, + gmx_ddbox_t* ddbox) { - int numPmeOnlyRanks = getNumPmeOnlyRanksToUse(mdlog, options, mtop, ir, box, numRanksRequested); + int numPmeOnlyRanks = getNumPmeOnlyRanksToUse( + mdlog, options, mtop, ir, separatePmeRanksPermitted, box, numRanksRequested); gmx::IVec numDomains; if (options.numCells[XX] > 0) diff --git a/src/gromacs/domdec/domdec_setup.h b/src/gromacs/domdec/domdec_setup.h index d982ed328a..dc4202c4c8 100644 --- a/src/gromacs/domdec/domdec_setup.h +++ b/src/gromacs/domdec/domdec_setup.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019,2020, by the GROMACS development team, led by + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -56,7 +56,9 @@ struct t_inputrec; namespace gmx { struct DomdecOptions; +struct MDModulesNotifiers; class MDLogger; +class SeparatePmeRanksPermitted; template class ArrayRef; } // namespace gmx @@ -82,14 +84,30 @@ struct DDGridSetup ivec ddDimensions = { -1, -1, -1 }; }; +/*! \brief Checks for ability to use separate PME ranks + * + * Disables automatic usage if: + * some MDModule could not use separate PME ranks, + * GPU setup is not compatible with separate PME ranks, + * user provided explicit DD grid + * or total number of ranks is not large enough to use PME ranks + */ +gmx::SeparatePmeRanksPermitted checkForSeparatePmeRanks(const gmx::MDModulesNotifiers& notifiers, + const gmx::DomdecOptions& options, + int numRanksRequested, + bool useGpuForNonbonded, + bool useGpuForPme); + /*! \brief Checks that requests for PP and PME ranks honor basic expectations * - * Issues a fatal error if there are more PME ranks than PP, or if the + * Issues a fatal error if there are more PME ranks than PP, if the * count of PP ranks has a prime factor that is too large to be likely - * to have good performance. */ -void checkForValidRankCountRequests(int numRanksRequested, - bool usingPme, - int numPmeRanksRequested, + * to have good performance or PME-only ranks could not be used, + * but requested with -npme > 0 */ +void checkForValidRankCountRequests(int numRanksRequested, + bool usingPme, + int numPmeRanksRequested, + const gmx::SeparatePmeRanksPermitted& separatePmeRanksPermitted, bool checkForLargePrimeFactors); /*! \brief Return the minimum cell size (in nm) required for DD */ @@ -105,18 +123,19 @@ real getDDGridSetupCellSizeLimit(const gmx::MDLogger& mdlog, * chooses estimated optimal number of separate PME ranks and DD grid * cell setup, DD cell size limits, and the initial ddbox. */ -DDGridSetup getDDGridSetup(const gmx::MDLogger& mdlog, - DDRole ddRole, - MPI_Comm communicator, - int numRanksRequested, - const gmx::DomdecOptions& options, - const DDSettings& ddSettings, - const DDSystemInfo& systemInfo, - real cellSizeLimit, - const gmx_mtop_t& mtop, - const t_inputrec& ir, - const matrix box, - gmx::ArrayRef xGlobal, - gmx_ddbox_t* ddbox); +DDGridSetup getDDGridSetup(const gmx::MDLogger& mdlog, + DDRole ddRole, + MPI_Comm communicator, + int numRanksRequested, + const gmx::DomdecOptions& options, + const DDSettings& ddSettings, + const DDSystemInfo& systemInfo, + real cellSizeLimit, + const gmx_mtop_t& mtop, + const t_inputrec& ir, + const gmx::SeparatePmeRanksPermitted& separatePmeRanksPermitted, + const matrix box, + gmx::ArrayRef xGlobal, + gmx_ddbox_t* ddbox); #endif diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index a2f44ea30e..ec9c382e64 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1130,61 +1130,6 @@ int Mdrunner::mdrunner() "these are not compatible with mdrun -rerun"); } - /* Object for collecting reasons for not using PME-only ranks */ - SeparatePmeRanksPermitted separatePmeRanksPermitted; - - /* Permit MDModules to notify whether they want to use PME-only ranks */ - setupNotifier.notify(&separatePmeRanksPermitted); - - /* If simulation is not using PME then disable PME-only ranks */ - if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) - { - separatePmeRanksPermitted.disablePmeRanks( - "PME-only ranks are requested, but the system does not use PME " - "for electrostatics or LJ"); - } - - /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can - * improve performance with many threads per GPU, since our OpenMP - * scaling is bad, but it's difficult to automate the setup. - */ - if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0) - { - separatePmeRanksPermitted.disablePmeRanks( - "PME-only CPU ranks are not automatically used when " - "non-bonded interactions are computed on GPUs"); - } - - /* If GPU is used for PME then only 1 PME rank is permitted */ - if (useGpuForPme && (domdecOptions.numPmeRanks < 0 || domdecOptions.numPmeRanks > 1)) - { - separatePmeRanksPermitted.disablePmeRanks( - "PME GPU decomposition is not supported. Only one separate PME-only GPU rank " - "can be used."); - } - - /* Disable PME-only ranks if some parts of the code requested so and it's up to GROMACS to decide */ - if (!separatePmeRanksPermitted.permitSeparatePmeRanks() && domdecOptions.numPmeRanks < 0) - { - domdecOptions.numPmeRanks = 0; - GMX_LOG(mdlog.info) - .asParagraph() - .appendText("Simulation will not use PME-only ranks because: " - + separatePmeRanksPermitted.reasonsWhyDisabled()); - } - - /* If some parts of the code could not use PME-only ranks and - * user explicitly used mdrun -npme option then throw an error */ - if (!separatePmeRanksPermitted.permitSeparatePmeRanks() && domdecOptions.numPmeRanks > 0) - { - gmx_fatal_collective(FARGS, - cr->mpiDefaultCommunicator, - MASTER(cr), - "Requested -npme %d option is not viable because: %s", - domdecOptions.numPmeRanks, - separatePmeRanksPermitted.reasonsWhyDisabled().c_str()); - } - /* NMR restraints must be initialized before load_checkpoint, * since with time averaging the history is added to t_state. * For proper consistency check we therefore need to extend @@ -1362,11 +1307,14 @@ int Mdrunner::mdrunner() mdrunOptions, mtop, *inputrec, + mdModules_->notifiers(), box, updateGroups.updateGroupingPerMoleculeType(), updateGroups.useUpdateGroups(), updateGroups.maxUpdateGroupRadius(), - positionsFromStatePointer(globalState.get())); + positionsFromStatePointer(globalState.get()), + useGpuForNonbonded, + useGpuForPme); } else { -- 2.22.0