From a07e8b21025a4e429ff8ec61783c1ad17e9516a8 Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Thu, 11 Oct 2018 21:53:38 +0200 Subject: [PATCH] Task assignment for bonded interactions on CUDA GPUs Made a query function to find whether any interactions of supported times exist in the global topology, so that we can make efficient high-level decisions. Added free for gpuBondedLists pointer. Minor cleanup in manage-threading.h Fixes #2679 Change-Id: I0ebbbd33c2cba5808561111b0ec6160bfd2f840d --- src/api/cpp/context.cpp | 1 + src/gromacs/ewald/pme.h | 2 +- .../listed-forces/manage-threading.cpp | 109 ++++++++++++++++++ src/gromacs/listed-forces/manage-threading.h | 37 +++++- src/gromacs/mdlib/forcerec.cpp | 9 +- src/gromacs/mdlib/forcerec.h | 2 + src/gromacs/mdrun/legacymdrunoptions.h | 6 +- src/gromacs/mdrun/runner.cpp | 53 ++++++++- src/gromacs/mdrun/runner.h | 30 +++++ src/gromacs/taskassignment/decidegpuusage.cpp | 66 +++++++++++ src/gromacs/taskassignment/decidegpuusage.h | 24 ++++ src/gromacs/taskassignment/reportgpuusage.cpp | 16 ++- src/gromacs/taskassignment/reportgpuusage.h | 9 +- src/gromacs/taskassignment/taskassignment.cpp | 7 +- src/gromacs/taskassignment/taskassignment.h | 8 +- src/programs/mdrun/mdrun.cpp | 1 + 16 files changed, 360 insertions(+), 20 deletions(-) diff --git a/src/api/cpp/context.cpp b/src/api/cpp/context.cpp index c5a8024d99..9bfd993c28 100644 --- a/src/api/cpp/context.cpp +++ b/src/api/cpp/context.cpp @@ -187,6 +187,7 @@ std::shared_ptr ContextImpl::launch(const Workflow &work) builder.addNonBonded(options_.nbpu_opt_choices[0]); // \todo pass by value builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]); + builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]); builder.addNeighborList(options_.nstlist_cmdline); builder.addReplicaExchange(options_.replExParams); // \todo take ownership of multisim resources (ms) diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index 81988e6bc9..ccf5e7227d 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -88,7 +88,7 @@ enum { /*! \brief Possible PME codepaths on a rank. * \todo: make this enum class with gmx_pme_t C++ refactoring */ -enum PmeRunMode +enum class PmeRunMode { None, //!< No PME task is done CPU, //!< Whole PME computation is done on CPU diff --git a/src/gromacs/listed-forces/manage-threading.cpp b/src/gromacs/listed-forces/manage-threading.cpp index 1541edd81a..7c50b774aa 100644 --- a/src/gromacs/listed-forces/manage-threading.cpp +++ b/src/gromacs/listed-forces/manage-threading.cpp @@ -54,11 +54,15 @@ #include #include +#include #include "gromacs/listed-forces/listed-forces.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdtypes/inputrec.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/topology/ifunc.h" +#include "gromacs/topology/topology.h" +#include "gromacs/utility/arrayref.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" @@ -248,6 +252,8 @@ static void divide_bondeds_over_threads(bonded_threading_t *bt, int nrToAssignToCpuThreads = il.nr; if (useGpuForBondeds && + // TODO remove the next line when we have GPU bonded kernels + false && // NOLINT readability-simplify-boolean-expr ftypeGpuIndex < ftypesOnGpu.size() && ftypesOnGpu[ftypeGpuIndex] == ftype) { @@ -370,6 +376,109 @@ static void convertIlistToNbnxnOrder(const t_ilist &src, } } +namespace gmx +{ + +//! Returns whether there are any interactions suitable for a GPU. +static bool someInteractionsCanRunOnGpu(const InteractionLists &ilists) +{ + for (int ftype : ftypesOnGpu) + { + if (!ilists[ftype].iatoms.empty()) + { + // Perturbation is not implemented in the GPU bonded + // kernels. If all the interactions were actually + // perturbed, then that will be detected later on each + // domain, and work will never run on the GPU. This is + // very unlikely to occur, and has little run-time cost, + // so we don't complicate the code by catering for it + // here. + return true; + } + } + return false; +} + +//! Returns whether there are any interactions suitable for a GPU. +static bool bondedInteractionsCanRunOnGpu(const gmx_mtop_t &mtop) +{ + // Check the regular molecule types + for (const auto &moltype : mtop.moltype) + { + if (someInteractionsCanRunOnGpu(moltype.ilist)) + { + return true; + } + } + // Check the inter-molecular interactions. + if (mtop.intermolecular_ilist) + { + if (someInteractionsCanRunOnGpu(*mtop.intermolecular_ilist)) + { + return true; + } + } + return false; +} + +/*! \brief Help build a descriptive message in \c error if there are + * \c errorReasons why bondeds on a GPU are not supported. + * + * \returns Whether the lack of errorReasons indicate there is support. */ +static bool +addMessageIfNotSupported(ArrayRef errorReasons, + std::string *error) +{ + bool isSupported = errorReasons.empty(); + if (!isSupported && error) + { + *error = "Bonded interactions cannot run on GPUs: "; + *error += joinStrings(errorReasons, "; ") + "."; + } + return isSupported; +} + +bool buildSupportsGpuBondeds(std::string *error) +{ + std::vector errorReasons; + if (GMX_DOUBLE) + { + errorReasons.emplace_back("not supported with double precision"); + } + if (GMX_GPU == GMX_GPU_OPENCL) + { + errorReasons.emplace_back("not supported with OpenCL build of GROMACS"); + } + else if (GMX_GPU == GMX_GPU_NONE) + { + errorReasons.emplace_back("not supported with CPU-only build of GROMACS"); + } + return addMessageIfNotSupported(errorReasons, error); +} + +bool inputSupportsGpuBondeds(const t_inputrec &ir, + const gmx_mtop_t &mtop, + std::string *error) +{ + std::vector errorReasons; + + if (!bondedInteractionsCanRunOnGpu(mtop)) + { + errorReasons.emplace_back("No supported bonded interactions are present"); + } + if (ir.cutoff_scheme == ecutsGROUP) + { + errorReasons.emplace_back("group cutoff scheme"); + } + if (!EI_DYNAMICS(ir.eI)) + { + errorReasons.emplace_back("not a dynamical integrator"); + } + return addMessageIfNotSupported(errorReasons, error); +} + +} // namespace gmx + //! Divides bonded interactions over threads and GPU void assign_bondeds_to_gpu(GpuBondedLists *gpuBondedLists, gmx::ArrayRef nbnxnAtomOrder, diff --git a/src/gromacs/listed-forces/manage-threading.h b/src/gromacs/listed-forces/manage-threading.h index 47dd87435c..960bcace4e 100644 --- a/src/gromacs/listed-forces/manage-threading.h +++ b/src/gromacs/listed-forces/manage-threading.h @@ -46,10 +46,15 @@ #include -#include "gromacs/mdtypes/forcerec.h" +#include + #include "gromacs/topology/idef.h" #include "gromacs/utility/arrayref.h" +struct bonded_threading_t; +struct gmx_mtop_t; +struct t_inputrec; + /*! \internal \brief Struct for storing lists of bonded interaction for evaluation on a GPU */ struct GpuBondedLists { @@ -57,6 +62,34 @@ struct GpuBondedLists bool haveInteractions; /**< Tells whether there are any interaction in iLists */ }; + +namespace gmx +{ + +/*! \brief Checks whether the GROMACS build allows to compute bonded interactions on a GPU. + * + * \param[out] error If non-null, the diagnostic message when bondeds cannot run on a GPU. + * + * \returns true when this build can run bonded interactions on a GPU, false otherwise. + * + * \throws std::bad_alloc when out of memory. + */ +bool buildSupportsGpuBondeds(std::string *error); + +/*! \brief Checks whether the input system allows to compute bonded interactions on a GPU. + * + * \param[in] ir Input system. + * \param[in] mtop Complete system topology to search for supported interactions. + * \param[out] error If non-null, the error message if the input is not supported on GPU. + * + * \returns true if PME can run on GPU with this input, false otherwise. + */ +bool inputSupportsGpuBondeds(const t_inputrec &ir, + const gmx_mtop_t &mtop, + std::string *error); + +} // namespace gmx + /*! \brief Copy bonded interactions assigned to the GPU to \p gpuBondedLists */ void assign_bondeds_to_gpu(GpuBondedLists *gpuBondedLists, gmx::ArrayRef nbnxnAtomOrder, @@ -83,6 +116,6 @@ void tear_down_bonded_threading(bonded_threading_t *bt); * A pointer to this struct is returned as \p *bb_ptr. */ void init_bonded_threading(FILE *fplog, int nenergrp, - struct bonded_threading_t **bt_ptr); + bonded_threading_t **bt_ptr); #endif diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index f718d22cbf..2d931d28a4 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -2315,6 +2315,7 @@ void init_forcerec(FILE *fp, gmx::ArrayRef tabbfnm, const gmx_hw_info_t &hardwareInfo, const gmx_device_info_t *deviceInfo, + const bool useGpuForBonded, gmx_bool bNoSolvOpt, real print_force) { @@ -3052,15 +3053,10 @@ void init_forcerec(FILE *fp, init_bonded_threading(fp, mtop->groups.grps[egcENER].nr, &fr->bondedThreading); - // TODO: Replace this condition by the GPU bonded task boolean - if (fr->cutoff_scheme == ecutsVERLET && getenv("GMX_TEST_GPU_BONDEDS")) + if (useGpuForBonded) { fr->gpuBondedLists = new GpuBondedLists; } - else - { - fr->gpuBondedLists = nullptr; - } fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded); snew(fr->ewc_t, fr->nthread_ewc); @@ -3152,6 +3148,7 @@ void done_forcerec(t_forcerec *fr, int numMolBlocks, int numEnergyGroups) done_ns(fr->ns, numEnergyGroups); sfree(fr->ewc_t); tear_down_bonded_threading(fr->bondedThreading); + delete fr->gpuBondedLists; fr->bondedThreading = nullptr; sfree(fr); } diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h index 64e60c7332..1f0694699b 100644 --- a/src/gromacs/mdlib/forcerec.h +++ b/src/gromacs/mdlib/forcerec.h @@ -115,6 +115,7 @@ void init_interaction_const_tables(FILE *fp, * \param[in] tabbfnm Table potential files for bonded interactions * \param[in] hardwareInfo Information about hardware * \param[in] deviceInfo Info about GPU device to use for short-ranged work + * \param[in] useGpuForBonded Whether bonded interactions will run on a GPU * \param[in] bNoSolvOpt Do not use solvent optimization * \param[in] print_force Print forces for atoms with force >= print_force */ @@ -131,6 +132,7 @@ void init_forcerec(FILE *fplog, gmx::ArrayRef tabbfnm, const gmx_hw_info_t &hardwareInfo, const gmx_device_info_t *deviceInfo, + bool useGpuForBonded, gmx_bool bNoSolvOpt, real print_force); diff --git a/src/gromacs/mdrun/legacymdrunoptions.h b/src/gromacs/mdrun/legacymdrunoptions.h index 7584f2da07..637cd2a6b9 100644 --- a/src/gromacs/mdrun/legacymdrunoptions.h +++ b/src/gromacs/mdrun/legacymdrunoptions.h @@ -155,13 +155,15 @@ class LegacyMdrunOptions { nullptr, "auto", "cpu", "gpu", nullptr }; const char *pme_fft_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *bonded_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; gmx_bool bTryToAppendFiles = TRUE; const char *gpuIdsAvailable = ""; const char *userGpuTaskAssignment = ""; ImdOptions &imdOptions = mdrunOptions.imdOptions; - t_pargs pa[47] = { + t_pargs pa[48] = { { "-dd", FALSE, etRVEC, {&realddxyz}, "Domain decomposition grid, 0 is optimize" }, @@ -224,6 +226,8 @@ class LegacyMdrunOptions "Perform PME calculations on" }, { "-pmefft", FALSE, etENUM, {pme_fft_opt_choices}, "Perform PME FFT calculations on" }, + { "-bonded", FALSE, etENUM, {bonded_opt_choices}, + "Perform bonded calculations on" }, { "-v", FALSE, etBOOL, {&mdrunOptions.verbose}, "Be loud and noisy" }, { "-pforce", FALSE, etREAL, {&pforce}, diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 5bc87e3cc8..ce9b5b8eb3 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -75,6 +75,7 @@ #include "gromacs/hardware/detecthardware.h" #include "gromacs/hardware/printhardware.h" #include "gromacs/listed-forces/disre.h" +#include "gromacs/listed-forces/manage-threading.h" #include "gromacs/listed-forces/orires.h" #include "gromacs/math/functions.h" #include "gromacs/math/utilities.h" @@ -185,6 +186,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const newRunner.nbpu_opt = nbpu_opt; newRunner.pme_opt = pme_opt; newRunner.pme_fft_opt = pme_fft_opt; + newRunner.bonded_opt = bonded_opt; newRunner.nstlist_cmdline = nstlist_cmdline; newRunner.replExParams = replExParams; newRunner.pforce = pforce; @@ -473,6 +475,7 @@ int Mdrunner::mdrunner() auto nonbondedTarget = findTaskTarget(nbpu_opt); auto pmeTarget = findTaskTarget(pme_opt); auto pmeFftTarget = findTaskTarget(pme_fft_opt); + auto bondedTarget = findTaskTarget(bonded_opt); PmeRunMode pmeRunMode = PmeRunMode::None; // Here we assume that SIMMASTER(cr) does not change even after the @@ -665,23 +668,34 @@ int Mdrunner::mdrunner() // having an assertion? // // Note that these variables describe only their own node. + // + // Note that when bonded interactions run on a GPU they always run + // alongside a nonbonded task, so do not influence task assignment + // even though they affect the force calculation schedule. bool useGpuForNonbonded = false; bool useGpuForPme = false; + bool useGpuForBonded = false; try { // It's possible that there are different numbers of GPUs on // different nodes, which is the user's responsibilty to // handle. If unsuitable, we will notice that during task // assignment. - bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0; + bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0; + bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET; useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment, - emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET, + emulateGpuNonbonded, usingVerletScheme, gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), gpusWereDetected); auto canUseGpuForPme = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected); + auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr); + useGpuForBonded = + decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme, + bondedTarget, canUseGpuForBonded, cr->nnodes, + domdecOptions.numPmeRanks, gpusWereDetected); pmeRunMode = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU); if (pmeRunMode == PmeRunMode::GPU) @@ -984,7 +998,7 @@ int Mdrunner::mdrunner() // Note that in general useGpuForNonbonded, etc. can have a value // that is inconsistent with the presence of actual GPUs on any // rank, and that is not known to be a problem until the - // duty of the ranks on a node become node. + // duty of the ranks on a node become known. // // TODO Later we might need the concept of computeTasksOnThisRank, // from which we construct gpuTasksOnThisRank. @@ -1001,6 +1015,8 @@ int Mdrunner::mdrunner() { if (useGpuForNonbonded) { + // Note that any bonded tasks on a GPU always accompany a + // non-bonded task. if (haveGpus) { gpuTasksOnThisRank.push_back(GpuTask::Nonbonded); @@ -1009,6 +1025,10 @@ int Mdrunner::mdrunner() { gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected."); } + else if (bondedTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected."); + } } } // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not. @@ -1032,7 +1052,8 @@ int Mdrunner::mdrunner() { // Produce the task assignment for this rank. gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo, - mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank); + mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank, + useGpuForBonded, pmeRunMode); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; @@ -1198,6 +1219,7 @@ int Mdrunner::mdrunner() opt2fn("-tablep", filenames.size(), filenames.data()), opt2fns("-tableb", filenames.size(), filenames.data()), *hwinfo, nonbondedDeviceInfo, + useGpuForBonded, FALSE, pforce); @@ -1553,6 +1575,8 @@ class Mdrunner::BuilderImplementation void addPME(const char* pme_opt_, const char* pme_fft_opt_); + void addBondedTaskAssignment(const char* bonded_opt); + void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions); void addFilenames(ArrayRef filenames); @@ -1572,6 +1596,7 @@ class Mdrunner::BuilderImplementation const char* nbpu_opt_ = nullptr; const char* pme_opt_ = nullptr; const char* pme_fft_opt_ = nullptr; + const char *bonded_opt_ = nullptr; MdrunOptions mdrunOptions_; @@ -1729,6 +1754,15 @@ Mdrunner Mdrunner::BuilderImplementation::build() GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()")); } + if (bonded_opt_) + { + newRunner.bonded_opt = bonded_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()")); + } + newRunner.restraintManager_ = compat::make_unique(); if (stopHandlerBuilder_) @@ -1755,6 +1789,11 @@ void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, pme_fft_opt_ = pme_fft_opt; } +void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt) +{ + bonded_opt_ = bonded_opt; +} + void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions) { hardwareOptions_ = hardwareOptions; @@ -1841,6 +1880,12 @@ MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt, return *this; } +MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt) +{ + impl_->addBondedTaskAssignment(bonded_opt); + return *this; +} + Mdrunner MdrunnerBuilder::build() { return impl_->build(); diff --git a/src/gromacs/mdrun/runner.h b/src/gromacs/mdrun/runner.h index 75ee32f51b..465fdd27f1 100644 --- a/src/gromacs/mdrun/runner.h +++ b/src/gromacs/mdrun/runner.h @@ -233,6 +233,14 @@ class Mdrunner * \todo replace with string or enum class and initialize with sensible value. */ const char *pme_fft_opt = nullptr; + + /*! \brief Target bonded interations for "cpu", "gpu", or "auto". Default is "auto". + * + * \internal + * \todo replace with string or enum class and initialize with sensible value. + */ + const char *bonded_opt = nullptr; + //! Command-line override for the duration of a neighbor list with the Verlet scheme. int nstlist_cmdline = 0; //! Parameters for replica-exchange simulations. @@ -403,6 +411,28 @@ class MdrunnerBuilder final MdrunnerBuilder &addElectrostatics(const char* pme_opt, const char* pme_fft_opt); + /*! + * \brief Assign responsibility for tasks for bonded interactions. + * + * Required. Director code should provide valid options for + * bonded interaction task assignment, whether or not such + * interactions are present. The builder does not apply any + * defaults, so client code should be prepared to provide + * (e.g.) "auto" in the event no user input or logic provides + * an alternative argument. + * + * \param bonded_opt Target bonded interactions for "cpu", "gpu", or "auto". + * + * Calling must guarantee that the pointed-to C strings are valid through + * simulation launch. + * + * \internal + * The arguments are passed as references to elements of arrays of C strings. + * \todo Replace with modern strings or (better) enum classes. + * \todo Make optional and/or encapsulate into task assignment module. + */ + MdrunnerBuilder &addBondedTaskAssignment(const char *bonded_opt); + /*! * \brief Provide access to the multisim communicator to use. * diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index 10bcbb8f50..dbd9dd6746 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -408,4 +408,70 @@ bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded, return false; } +bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded, + const bool /*useGpuForPme*/, + const bool usingVerletScheme, + const TaskTarget bondedTarget, + const bool canUseGpuForBonded, + const int /*numRanksPerSimulation*/, + const int /*numPmeRanksPerSimulation*/, + const bool gpusWereDetected) +{ + if (bondedTarget == TaskTarget::Cpu) + { + return false; + } + + if (!usingVerletScheme) + { + if (bondedTarget == TaskTarget::Gpu) + { + GMX_THROW(InconsistentInputError + ("Bonded interactions on the GPU were required, which requires using " + "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs.")); + } + + return false; + } + + if (!canUseGpuForBonded) + { + if (bondedTarget == TaskTarget::Gpu) + { + GMX_THROW(InconsistentInputError + ("Bonded interactions on the GPU were required, but not supported for these " + "simulation settings. Change your settings, or do not require using GPUs.")); + } + + return false; + } + + if (!useGpuForNonbonded) + { + if (bondedTarget == TaskTarget::Gpu) + { + GMX_THROW(InconsistentInputError + ("Bonded interactions on the GPU were required, but this requires that " + "short-ranged non-bonded interactions are also run on the GPU. Change " + "your settings, or do not require using GPUs.")); + } + + return false; + } + + // TODO If the bonded kernels do not get fused, then performance + // overheads might suggest alternative choices here. + + if (bondedTarget == TaskTarget::Gpu) + { + // We still don't know whether it is an error if no GPUs are + // found. + return true; + } + + // If we get here, then the user permitted GPUs, which we should + // use for bonded interactions if any were detected. + return gpusWereDetected; +} + } // namespace gmx diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index 0730d52b9a..9b8279cea7 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -186,6 +186,30 @@ bool decideWhetherToUseGpusForPme(bool useGpuForNonbonded, int numPmeRanksPerSimulation, bool gpusWereDetected); +/*! \brief Decide whether the simulation will try to run bonded tasks on GPUs. + * + * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. + * \param[in] useGpuForPme Whether GPUs will be used for PME interactions. + * \param[in] usingVerletScheme Whether the nonbondeds are using the Verlet scheme. + * \param[in] bondedTarget The user's choice for mdrun -bonded for where to assign tasks. + * \param[in] canUseGpuForBonded Whether the bonded interactions can run on a GPU + * \param[in] numRanksPerSimulation The number of ranks in each simulation. + * \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation. + * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node. + * + * \returns Whether the simulation will run bondeded tasks on GPUs. + * + * \throws std::bad_alloc If out of memory + * InconsistentInputError If the user requirements are inconsistent. */ +bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, + bool useGpuForPme, + bool usingVerletScheme, + TaskTarget bondedTarget, + bool canUseGpuForBonded, + int numRanksPerSimulation, + int numPmeRanksPerSimulation, + bool gpusWereDetected); + } // namespace gmx #endif diff --git a/src/gromacs/taskassignment/reportgpuusage.cpp b/src/gromacs/taskassignment/reportgpuusage.cpp index e1cb200375..70f50476c5 100644 --- a/src/gromacs/taskassignment/reportgpuusage.cpp +++ b/src/gromacs/taskassignment/reportgpuusage.cpp @@ -45,6 +45,7 @@ #include #include +#include "gromacs/ewald/pme.h" #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/utility/cstringutil.h" #include "gromacs/utility/logger.h" @@ -84,7 +85,9 @@ reportGpuUsage(const MDLogger &mdlog, const GpuTaskAssignments &gpuTaskAssignmentOnRanksOfThisNode, size_t numGpuTasksOnThisNode, size_t numRanks, - bool bPrintHostName) + bool bPrintHostName, + bool useGpuForBonded, + PmeRunMode pmeRunMode) { size_t numGpusInUse = countUniqueGpuIdsUsed(gpuTaskAssignmentOnRanksOfThisNode); if (numGpusInUse == 0) @@ -133,6 +136,17 @@ reportGpuUsage(const MDLogger &mdlog, numRanks, (numRanks > 1) ? "s" : "", gpuIdsString.c_str()); + // Because there is a GPU in use, there must be a PP task on a GPU. + output += gmx::formatString("PP tasks will do short-ranged%s interactions on the GPU\n", + useGpuForBonded ? "and most bonded" : ""); + if (pmeRunMode == PmeRunMode::Mixed) + { + output += gmx::formatString("PME tasks will do only spread and gather on the GPU\n"); + } + else if (pmeRunMode == PmeRunMode::GPU) + { + output += gmx::formatString("PME tasks will do all aspects on the GPU\n"); + } } /* NOTE: this print is only for and on one physical node */ diff --git a/src/gromacs/taskassignment/reportgpuusage.h b/src/gromacs/taskassignment/reportgpuusage.h index 400cdeb165..fa26ddddf1 100644 --- a/src/gromacs/taskassignment/reportgpuusage.h +++ b/src/gromacs/taskassignment/reportgpuusage.h @@ -50,6 +50,8 @@ #include "gromacs/taskassignment/taskassignment.h" +enum class PmeRunMode; + namespace gmx { @@ -67,6 +69,8 @@ class MDLogger; * \param[in] numGpuTasksOnThisNode The number of GPU tasks on this node. * \param[in] numPpRanks Number of PP ranks on this node * \param[in] bPrintHostName Print the hostname in the usage information + * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU + * \param[in] pmeRunMode Describes the execution of PME tasks * * \throws std::bad_alloc if out of memory */ void @@ -75,8 +79,9 @@ reportGpuUsage(const MDLogger &mdlog, const GpuTaskAssignments &gpuTaskAssignmentOnRanksOfThisNode, size_t numGpuTasksOnThisNode, size_t numPpRanks, - bool bPrintHostName); - + bool bPrintHostName, + bool useGpuForBonded, + PmeRunMode pmeRunMode); } // namespace gmx diff --git a/src/gromacs/taskassignment/taskassignment.cpp b/src/gromacs/taskassignment/taskassignment.cpp index 136587e3cb..785a93a804 100644 --- a/src/gromacs/taskassignment/taskassignment.cpp +++ b/src/gromacs/taskassignment/taskassignment.cpp @@ -190,7 +190,9 @@ runTaskAssignment(const std::vector &gpuIdsToUse, const t_commrec *cr, const gmx_multisim_t *ms, const PhysicalNodeCommunicator &physicalNodeComm, - const std::vector &gpuTasksOnThisRank) + const std::vector &gpuTasksOnThisRank, + bool useGpuForBonded, + PmeRunMode pmeRunMode) { /* Communicate among ranks on this node to find each task that can * be executed on a GPU, on each rank. */ @@ -307,7 +309,8 @@ runTaskAssignment(const std::vector &gpuIdsToUse, } reportGpuUsage(mdlog, !userGpuTaskAssignment.empty(), taskAssignmentOnRanksOfThisNode, - numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1); + numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1, + useGpuForBonded, pmeRunMode); // If the user chose a task assignment, give them some hints where appropriate. if (!userGpuTaskAssignment.empty()) diff --git a/src/gromacs/taskassignment/taskassignment.h b/src/gromacs/taskassignment/taskassignment.h index 7c8250a564..0c57951d05 100644 --- a/src/gromacs/taskassignment/taskassignment.h +++ b/src/gromacs/taskassignment/taskassignment.h @@ -56,6 +56,8 @@ struct gmx_hw_info_t; struct gmx_multisim_t; struct t_commrec; +enum class PmeRunMode; + namespace gmx { @@ -108,6 +110,8 @@ using GpuTaskAssignments = std::vector; * \param[in] physicalNodeComm Communication object for this physical node. * \param[in] gpuTasksOnThisRank Information about what GPU tasks * exist on this rank. + * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU + * \param[in] pmeRunMode Describes the execution of PME tasks * * \returns A GPU task assignment for this rank. * @@ -122,7 +126,9 @@ runTaskAssignment(const std::vector &gpuIdsToUse, const t_commrec *cr, const gmx_multisim_t *ms, const PhysicalNodeCommunicator &physicalNodeComm, - const std::vector &gpuTasksOnThisRank); + const std::vector &gpuTasksOnThisRank, + bool useGpuForBonded, + PmeRunMode pmeRunMode); //! Function for whether the task of \c mapping has value \c TaskType. template diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp index 6e568a4a57..b7ee9fc40b 100644 --- a/src/programs/mdrun/mdrun.cpp +++ b/src/programs/mdrun/mdrun.cpp @@ -271,6 +271,7 @@ int gmx_mdrun(int argc, char *argv[]) builder.addNonBonded(options.nbpu_opt_choices[0]); // \todo pass by value builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]); + builder.addBondedTaskAssignment(options.bonded_opt_choices[0]); builder.addNeighborList(options.nstlist_cmdline); builder.addReplicaExchange(options.replExParams); // \todo take ownership of multisim resources (ms) -- 2.22.0