From 1f45888bff3945adb4661ee5b9984c121267dd30 Mon Sep 17 00:00:00 2001 From: Gaurav Garg Date: Fri, 27 Aug 2021 07:00:46 +0000 Subject: [PATCH] Disable DLB if GPU direct communication Halo exchange is enabled. --- src/gromacs/domdec/builder.h | 3 +- src/gromacs/domdec/domdec.cpp | 46 +++++++++++++------- src/gromacs/mdrun/runner.cpp | 80 ++++++++++++++++++++--------------- 3 files changed, 78 insertions(+), 51 deletions(-) diff --git a/src/gromacs/domdec/builder.h b/src/gromacs/domdec/builder.h index aadd54cc59..e5b9a8d92b 100644 --- a/src/gromacs/domdec/builder.h +++ b/src/gromacs/domdec/builder.h @@ -91,7 +91,8 @@ public: real maxUpdateGroupRadius, ArrayRef xGlobal, bool useGpuForNonbonded, - bool useGpuForPme); + bool useGpuForPme, + bool directGpuCommUsedWithGpuUpdate); //! Destructor ~DomainDecompositionBuilder(); //! Build the resulting DD manager diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 3627ea02d6..5f26c40848 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -1803,18 +1803,20 @@ static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState, * state with other run parameters and settings. As a result, the initial state * may be altered or an error may be thrown if incompatibility of options is detected. * - * \param [in] mdlog Logger. - * \param [in] dlbOption Enum value for the DLB option. - * \param [in] bRecordLoad True if the load balancer is recording load information. - * \param [in] mdrunOptions Options for mdrun. - * \param [in] inputrec Pointer mdrun to input parameters. - * \returns DLB initial/startup state. + * \param [in] mdlog Logger. + * \param [in] dlbOption Enum value for the DLB option. + * \param [in] bRecordLoad True if the load balancer is recording load information. + * \param [in] mdrunOptions Options for mdrun. + * \param [in] inputrec Pointer mdrun to input parameters. + * \param [in] directGpuCommUsedWithGpuUpdate Direct GPU halo exchange and GPU update enabled + * \returns DLB initial/startup state. */ static DlbState determineInitialDlbState(const gmx::MDLogger& mdlog, DlbOption dlbOption, gmx_bool bRecordLoad, const gmx::MdrunOptions& mdrunOptions, - const t_inputrec& inputrec) + const t_inputrec& inputrec, + const bool directGpuCommUsedWithGpuUpdate) { DlbState dlbState = DlbState::offCanTurnOn; @@ -1826,6 +1828,15 @@ static DlbState determineInitialDlbState(const gmx::MDLogger& mdlog, default: gmx_incons("Invalid dlbOption enum value"); } + // P2P GPU comm + GPU update leads to case in which we enqueue async work for multiple timesteps + // DLB needs to be disabled in that case + if (directGpuCommUsedWithGpuUpdate) + { + std::string reasonStr = + "it is not supported with GPU direct communication + GPU update enabled."; + return forceDlbOffOrBail(dlbState, reasonStr, mdlog); + } + /* Reruns don't support DLB: bail or override auto mode */ if (mdrunOptions.rerun) { @@ -2775,7 +2786,8 @@ static void set_ddgrid_parameters(const gmx::MDLogger& mdlog, static DDSettings getDDSettings(const gmx::MDLogger& mdlog, const DomdecOptions& options, const gmx::MdrunOptions& mdrunOptions, - const t_inputrec& ir) + const t_inputrec& ir, + const bool directGpuCommUsedWithGpuUpdate) { DDSettings ddSettings; @@ -2808,8 +2820,8 @@ static DDSettings getDDSettings(const gmx::MDLogger& mdlog, ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0); } - ddSettings.initialDlbState = - determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir); + ddSettings.initialDlbState = determineInitialDlbState( + mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir, directGpuCommUsedWithGpuUpdate); GMX_LOG(mdlog.info) .appendTextFormatted("Dynamic load balancing: %s", enumValueToString(ddSettings.initialDlbState)); @@ -2844,7 +2856,8 @@ public: real maxUpdateGroupRadius, ArrayRef xGlobal, bool useGpuForNonbonded, - bool useGpuForPme); + bool useGpuForPme, + bool directGpuCommUsedWithGpuUpdate); //! Build the resulting DD manager gmx_domdec_t* build(LocalAtomSetManager* atomSets); @@ -2899,12 +2912,13 @@ DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog, const real maxUpdateGroupRadius, ArrayRef xGlobal, bool useGpuForNonbonded, - bool useGpuForPme) : + bool useGpuForPme, + bool directGpuCommUsedWithGpuUpdate) : mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir), notifiers_(notifiers) { GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator); - ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_); + ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_, directGpuCommUsedWithGpuUpdate); if (ddSettings_.eFlop > 1) { @@ -3038,7 +3052,8 @@ DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& const real maxUpdateGroupRadius, ArrayRef xGlobal, const bool useGpuForNonbonded, - const bool useGpuForPme) : + const bool useGpuForPme, + const bool directGpuCommUsedWithGpuUpdate) : impl_(new Impl(mdlog, cr, options, @@ -3052,7 +3067,8 @@ DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& maxUpdateGroupRadius, xGlobal, useGpuForNonbonded, - useGpuForPme)) + useGpuForPme, + directGpuCommUsedWithGpuUpdate)) { } diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 64ada7d8d8..98cd178a96 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1298,6 +1298,45 @@ int Mdrunner::mdrunner() systemHasConstraintsOrVsites(mtop), cutoffMargin); + try + { + const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get()); + + useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition, + updateGroups.useUpdateGroups(), + pmeRunMode, + domdecOptions.numPmeRanks > 0, + useGpuForNonbonded, + updateTarget, + gpusWereDetected, + *inputrec, + mtop, + doEssentialDynamics, + gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, + haveFrozenAtoms, + doRerun, + devFlags, + mdlog); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR + + bool useGpuDirectHalo = false; + + if (useGpuForNonbonded) + { + // cr->npmenodes is not yet initialized. + // domdecOptions.numPmeRanks == -1 results in 0 separate PME ranks when useGpuForNonbonded is true. + // Todo: remove this assumption later once auto mode has support for separate PME rank + const int numPmeRanks = domdecOptions.numPmeRanks > 0 ? domdecOptions.numPmeRanks : 0; + bool havePPDomainDecomposition = (cr->sizeOfDefaultCommunicator - numPmeRanks) > 1; + useGpuDirectHalo = decideWhetherToUseGpuForHalo(devFlags, + havePPDomainDecomposition, + useGpuForNonbonded, + useModularSimulator, + doRerun, + EI_ENERGY_MINIMIZATION(inputrec->eI)); + } + // This builder is necessary while we have multi-part construction // of DD. Before DD is constructed, we use the existence of // the builder object to indicate that further construction of DD @@ -1305,7 +1344,10 @@ int Mdrunner::mdrunner() std::unique_ptr ddBuilder; if (useDomainDecomposition) { - ddBuilder = std::make_unique( + // P2P GPU comm + GPU update leads to case in which we enqueue async work for multiple + // timesteps. DLB needs to be disabled in that case + const bool directGpuCommUsedWithGpuUpdate = GMX_THREAD_MPI && useGpuDirectHalo && useGpuForUpdate; + ddBuilder = std::make_unique( mdlog, cr, domdecOptions, @@ -1319,7 +1361,8 @@ int Mdrunner::mdrunner() updateGroups.maxUpdateGroupRadius(), positionsFromStatePointer(globalState.get()), useGpuForNonbonded, - useGpuForPme); + useGpuForPme, + directGpuCommUsedWithGpuUpdate); } else { @@ -1403,32 +1446,6 @@ int Mdrunner::mdrunner() } } - // The GPU update is decided here because we need to know whether the constraints or - // SETTLEs can span across the domain borders (i.e. whether or not update groups are - // defined). This is only known after DD is initialized, hence decision on using GPU - // update is done so late. - try - { - const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get()); - - useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition, - updateGroups.useUpdateGroups(), - pmeRunMode, - domdecOptions.numPmeRanks > 0, - useGpuForNonbonded, - updateTarget, - gpusWereDetected, - *inputrec, - mtop, - doEssentialDynamics, - gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - haveFrozenAtoms, - doRerun, - devFlags, - mdlog); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - const bool printHostName = (cr->nnodes > 1); gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate); @@ -1445,13 +1462,6 @@ int Mdrunner::mdrunner() MdrunScheduleWorkload runScheduleWork; - bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(devFlags, - havePPDomainDecomposition(cr), - useGpuForNonbonded, - useModularSimulator, - doRerun, - EI_ENERGY_MINIMIZATION(inputrec->eI)); - // Also populates the simulation constant workload description. // Note: currently the default duty is DUTY_PP | DUTY_PME for all simulations, including those without PME, // so this boolean is sufficient on all ranks to determine whether separate PME ranks are used, -- 2.22.0