From d30f2cb6d8ef17d2bfffdfff5f2a79ef1e605fe3 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Fri, 27 Dec 2019 12:15:23 +0100 Subject: [PATCH] Change the behavior of the GPU update UI 1. By default, update will now run on the CPU 2. The "-update cpu" will override the GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. Change-Id: I08efb6a594fb062fe145baa7cc00ba5c8ec63374 --- docs/release-notes/2020/major/performance.rst | 2 ++ src/gromacs/mdrun/runner.cpp | 15 +++++++-------- src/gromacs/taskassignment/decidegpuusage.cpp | 15 +++++++++------ src/gromacs/taskassignment/decidegpuusage.h | 4 ++-- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/release-notes/2020/major/performance.rst b/docs/release-notes/2020/major/performance.rst index a9ab8d75e2..784d4b83a1 100644 --- a/docs/release-notes/2020/major/performance.rst +++ b/docs/release-notes/2020/major/performance.rst @@ -36,6 +36,8 @@ For standard simulations (see the user guide for more details), update and constraints can be offloaded to a GPU with CUDA. Thus all compute intensive parts of a simulation can be offloaded, which provides better performance when using a fast GPU combined with a slow CPU. +By default, update will run on the CPU, to use GPU in single rank simulations, +one can use new '-update gpu' command line option. For use with domain decomposition, please see below. GPU Direct Communications diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 477ae85b44..f5b9cb1b7a 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -176,8 +176,8 @@ struct DevelopmentFeatureFlags //! True if the Buffer ops development feature is enabled // TODO: when the trigger of the buffer ops offload is fully automated this should go away bool enableGpuBufferOps = false; - //! If true, forces 'mdrun -update auto' default to 'gpu' when running with DD - bool forceGpuUpdateDefaultWithDD = false; + //! If true, forces 'mdrun -update auto' default to 'gpu' + bool forceGpuUpdateDefault = false; //! True if the GPU halo exchange development feature is enabled bool enableGpuHaloExchange = false; //! True if the PME PP direct communication GPU development feature is enabled @@ -212,7 +212,7 @@ static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& md #pragma GCC diagnostic ignored "-Wunused-result" devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr) && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded; - devFlags.forceGpuUpdateDefaultWithDD = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr); + devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr); devFlags.enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA)); devFlags.enableGpuPmePPComm = @@ -228,7 +228,7 @@ static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& md "GMX_USE_GPU_BUFFER_OPS environment variable."); } - if (devFlags.forceGpuUpdateDefaultWithDD) + if (devFlags.forceGpuUpdateDefault) { GMX_LOG(mdlog.warning) .asParagraph() @@ -1191,10 +1191,9 @@ int Mdrunner::mdrunner() const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; useGpuForUpdate = decideWhetherToUseGpuForUpdate( - devFlags.forceGpuUpdateDefaultWithDD, useDomainDecomposition, useUpdateGroups, - useGpuForPme, useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, - doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - replExParams.exchangeInterval > 0, doRerun); + devFlags.forceGpuUpdateDefault, useDomainDecomposition, useUpdateGroups, useGpuForPme, + useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, doEssentialDynamics, + gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, replExParams.exchangeInterval > 0, doRerun); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index adda067bfa..9b069fd890 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -489,7 +489,7 @@ bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded, return gpusWereDetected && usingOurCpuForPmeOrEwald; } -bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithDD, +bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault, const bool isDomainDecomposition, const bool useUpdateGroups, const bool useGpuForPme, @@ -504,7 +504,8 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithD const bool doRerun) { - if (updateTarget == TaskTarget::Cpu) + // '-update cpu' overrides the environment variable, '-update auto' does not + if (updateTarget == TaskTarget::Cpu || (updateTarget == TaskTarget::Auto && !forceGpuUpdateDefault)) { return false; } @@ -515,7 +516,7 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithD if (isDomainDecomposition) { - if (!forceGpuUpdateDefaultWithDD) + if (!forceGpuUpdateDefault) { errorMessage += "Domain decomposition is not supported.\n "; } @@ -627,10 +628,12 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithD if (isDomainDecomposition) { - return forceGpuUpdateDefaultWithDD; + return forceGpuUpdateDefault; + } + else + { + return (updateTarget == TaskTarget::Gpu || forceGpuUpdateDefault); } - - return true; } } // namespace gmx diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index a1929f1c16..45ba2d333f 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -232,7 +232,7 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, /*! \brief Decide whether to use GPU for update. * - * \param[in] forceGpuUpdateDefaultWithDD If update should run on GPU with DD by default. + * \param[in] forceGpuUpdateDefault If update should run on GPU by default. * \param[in] isDomainDecomposition Whether there more than one domain. * \param[in] useUpdateGroups If the constraints can be split across domains. * \param[in] useGpuForPme Whether GPUs will be used for PME interactions. @@ -250,7 +250,7 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, * \throws std::bad_alloc If out of memory * InconsistentInputError If the user requirements are inconsistent. */ -bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefaultWithDD, +bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefault, bool isDomainDecomposition, bool useUpdateGroups, bool useGpuForPme, -- 2.22.0