From 1a7301b810f1257e0f9a3bb33fdc5a538c2ce2bb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Fri, 24 Jan 2020 14:12:28 +0100 Subject: [PATCH] Improve GPU update tasks assignment consistency GPU update task assignment was not consistent with the assumptions and supported features of the 2020 release and did not implement the correct checks and fallback in cases where GPU update was decided to not be supported. Specifically, this change makes sure that when separate PME ranks are used, without direct GPU communication for PP-PME, GPU update falls back to the CPU. Fixes #3354 Change-Id: I7c9dd67cd8cf61f0201b626b8b7674917e3365a5 --- src/gromacs/mdrun/runner.cpp | 24 +---- src/gromacs/taskassignment/decidegpuusage.cpp | 97 ++++++++++--------- src/gromacs/taskassignment/decidegpuusage.h | 51 +++++++--- 3 files changed, 91 insertions(+), 81 deletions(-) diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index f47caae090..93c934a996 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -167,22 +167,6 @@ namespace gmx { -/*! \brief Structure that holds boolean flags corresponding to the development - * features present enabled through environment variables. - * - */ -struct DevelopmentFeatureFlags -{ - //! True if the Buffer ops development feature is enabled - // TODO: when the trigger of the buffer ops offload is fully automated this should go away - bool enableGpuBufferOps = false; - //! If true, forces 'mdrun -update auto' default to 'gpu' - bool forceGpuUpdateDefault = false; - //! True if the GPU halo exchange development feature is enabled - bool enableGpuHaloExchange = false; - //! True if the PME PP direct communication GPU development feature is enabled - bool enableGpuPmePPComm = false; -}; /*! \brief Manage any development feature flag variables encountered * @@ -1172,10 +1156,10 @@ int Mdrunner::mdrunner() const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; useGpuForUpdate = decideWhetherToUseGpuForUpdate( - devFlags.forceGpuUpdateDefault, useDomainDecomposition, useUpdateGroups, pmeRunMode, - domdecOptions.numPmeRanks > 0, useGpuForNonbonded, updateTarget, gpusWereDetected, - *inputrec, mtop, doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - replExParams.exchangeInterval > 0, doRerun, mdlog); + useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0, + useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, + doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, + replExParams.exchangeInterval > 0, doRerun, devFlags, mdlog); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index fdb7414212..91779f29c8 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -520,48 +520,73 @@ bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded, return gpusWereDetected && usingOurCpuForPmeOrEwald; } -bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault, - const bool isDomainDecomposition, - const bool useUpdateGroups, - const PmeRunMode pmeRunMode, - const bool havePmeOnlyRank, - const bool useGpuForNonbonded, - const TaskTarget updateTarget, - const bool gpusWereDetected, - const t_inputrec& inputrec, - const gmx_mtop_t& mtop, - const bool useEssentialDynamics, - const bool doOrientationRestraints, - const bool useReplicaExchange, - const bool doRerun, - const gmx::MDLogger& mdlog) +bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition, + const bool useUpdateGroups, + const PmeRunMode pmeRunMode, + const bool havePmeOnlyRank, + const bool useGpuForNonbonded, + const TaskTarget updateTarget, + const bool gpusWereDetected, + const t_inputrec& inputrec, + const gmx_mtop_t& mtop, + const bool useEssentialDynamics, + const bool doOrientationRestraints, + const bool useReplicaExchange, + const bool doRerun, + const DevelopmentFeatureFlags& devFlags, + const gmx::MDLogger& mdlog) { // '-update cpu' overrides the environment variable, '-update auto' does not - if (updateTarget == TaskTarget::Cpu || (updateTarget == TaskTarget::Auto && !forceGpuUpdateDefault)) + if (updateTarget == TaskTarget::Cpu + || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault)) { return false; } const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0; + const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed); std::string errorMessage; if (isDomainDecomposition) { - if (!forceGpuUpdateDefault) + if (!devFlags.enableGpuHaloExchange) { - errorMessage += "Domain decomposition is not supported.\n "; + errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n "; } - else if (hasAnyConstraints && !useUpdateGroups) + else + { + if (hasAnyConstraints && !useUpdateGroups) + { + errorMessage += + "Domain decomposition is only supported with constraints when update " + "groups " + "are used. This means constraining all bonds is not supported, except for " + "small molecules, and box sizes close to half the pair-list cutoff are not " + "supported.\n "; + } + + if (pmeUsesCpu) + { + errorMessage += "With domain decomposition, PME must run fully on the GPU.\n"; + } + } + } + + if (havePmeOnlyRank) + { + if (pmeUsesCpu) + { + errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n"; + } + + if (!devFlags.enableGpuPmePPComm) { - errorMessage += - "Domain decomposition is only supported with constraints when update groups " - "are used. This means constraining all bonds is not supported, except for " - "small molecules, and box sizes close to half the pair-list cutoff are not " - "supported.\n "; + errorMessage += "With separate PME rank(s), PME must use direct communication.\n"; } } + if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0) { errorMessage += "SHAKE constraints are not supported.\n"; @@ -575,18 +600,6 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault, "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n"; } - // If PME is active (i.e. not PmeRunMode::None), then GPU update requires - // either a single-rank run, or that PME runs fully on the GPU. - const bool pmeRunningOnCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed); - if (pmeRunningOnCpu && isDomainDecomposition) - { - errorMessage += "With domain decomposition, PME must run fully on the GPU.\n"; - } - if (pmeRunningOnCpu && havePmeOnlyRank) - { - errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n"; - } - if (!gpusWereDetected) { errorMessage += "Compatible GPUs must have been found.\n"; @@ -667,7 +680,7 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault, if (!errorMessage.empty()) { - if (updateTarget != TaskTarget::Gpu && forceGpuUpdateDefault) + if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault) { GMX_LOG(mdlog.warning) .asParagraph() @@ -688,14 +701,8 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault, return false; } - if (isDomainDecomposition) - { - return forceGpuUpdateDefault; - } - else - { - return (updateTarget == TaskTarget::Gpu || forceGpuUpdateDefault); - } + return (updateTarget == TaskTarget::Gpu + || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)); } } // namespace gmx diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index 921b0a778b..bfb002547a 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -72,6 +72,25 @@ enum class EmulateGpuNonbonded : bool Yes }; +/*! \libinternal + * \brief Structure that holds boolean flags corresponding to the development + * features present enabled through environment variables. + * + */ +struct DevelopmentFeatureFlags +{ + //! True if the Buffer ops development feature is enabled + // TODO: when the trigger of the buffer ops offload is fully automated this should go away + bool enableGpuBufferOps = false; + //! If true, forces 'mdrun -update auto' default to 'gpu' + bool forceGpuUpdateDefault = false; + //! True if the GPU halo exchange development feature is enabled + bool enableGpuHaloExchange = false; + //! True if the PME PP direct communication GPU development feature is enabled + bool enableGpuPmePPComm = false; +}; + + class MDAtoms; /*! \brief Decide whether this thread-MPI simulation will run @@ -249,7 +268,6 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, /*! \brief Decide whether to use GPU for update. * - * \param[in] forceGpuUpdateDefault If update should run on GPU by default. * \param[in] isDomainDecomposition Whether there more than one domain. * \param[in] useUpdateGroups If the constraints can be split across domains. * \param[in] pmeRunMode PME running mode: CPU, GPU or mixed. @@ -263,27 +281,28 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, * \param[in] doOrientationRestraints If orientation restraints are enabled. * \param[in] useReplicaExchange If this is a REMD simulation. * \param[in] doRerun It this is a rerun. + * \param[in] devFlags GPU development / experimental feature flags. * \param[in] mdlog MD logger. * * \returns Whether complete simulation can be run on GPU. * \throws std::bad_alloc If out of memory * InconsistentInputError If the user requirements are inconsistent. */ -bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefault, - bool isDomainDecomposition, - bool useUpdateGroups, - PmeRunMode pmeRunMode, - bool havePmeOnlyRank, - bool useGpuForNonbonded, - TaskTarget updateTarget, - bool gpusWereDetected, - const t_inputrec& inputrec, - const gmx_mtop_t& mtop, - bool useEssentialDynamics, - bool doOrientationRestraints, - bool useReplicaExchange, - bool doRerun, - const gmx::MDLogger& mdlog); +bool decideWhetherToUseGpuForUpdate(bool isDomainDecomposition, + bool useUpdateGroups, + PmeRunMode pmeRunMode, + bool havePmeOnlyRank, + bool useGpuForNonbonded, + TaskTarget updateTarget, + bool gpusWereDetected, + const t_inputrec& inputrec, + const gmx_mtop_t& mtop, + bool useEssentialDynamics, + bool doOrientationRestraints, + bool useReplicaExchange, + bool doRerun, + const DevelopmentFeatureFlags& devFlags, + const gmx::MDLogger& mdlog); } // namespace gmx -- 2.22.0