From 48620e5ef3e62b43ed618e9ceb1f49d4bf41913b Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Wed, 25 Oct 2017 11:45:32 +0200 Subject: [PATCH] Update treatment of GPU compatibility data structure Now we only construct the vector of compatible GPUs once per mdrun, and are less coupled to hw_info and gpu_info structs. Change-Id: I181f0486d0ea1670de7a85046c94c1fef83dce17 --- .../ewald/tests/testhardwarecontexts.cpp | 4 +--- src/gromacs/gpu_utils/gpu_utils.cpp | 17 +++++++++++++++- src/gromacs/gpu_utils/gpu_utils.cu | 20 ++++++++++++------- src/gromacs/gpu_utils/gpu_utils.h | 20 +++++++++---------- src/gromacs/gpu_utils/gpu_utils_ocl.cpp | 18 ++++++++++++----- src/gromacs/hardware/detecthardware.cpp | 1 + src/gromacs/hardware/hw_info.h | 2 ++ src/gromacs/taskassignment/hardwareassign.cpp | 18 +---------------- src/gromacs/taskassignment/hardwareassign.h | 13 +++--------- src/programs/mdrun/runner.cpp | 2 +- 10 files changed, 61 insertions(+), 54 deletions(-) diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp index 7bba127012..cad2af2595 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp +++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp @@ -49,7 +49,6 @@ #include "gromacs/gmxlib/network.h" #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/hardware/hw_info.h" -#include "gromacs/taskassignment/hardwareassign.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/loggerbuilder.h" #include "gromacs/utility/unique_cptr.h" @@ -105,8 +104,7 @@ void PmeTestEnvironment::SetUp() // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds TestHardwareContexts gpuContexts; - const auto compatibleGpus = getCompatibleGpus(hardwareInfo_->gpu_info); - for (int gpuIndex : compatibleGpus) + for (int gpuIndex : hardwareInfo_->compatibleGpus) { char stmp[200] = {}; get_gpu_device_info_string(stmp, hardwareInfo_->gpu_info, gpuIndex); diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp index fcaed80d22..3b943dabfb 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cpp +++ b/src/gromacs/gpu_utils/gpu_utils.cpp @@ -33,7 +33,7 @@ * the research papers on the package. Check out http://www.gromacs.org. */ /*! \internal \file - * \brief Stub functions for non-GPU builds + * \brief Function definitions for non-GPU builds * * \author Mark Abraham */ @@ -41,6 +41,8 @@ #include "gpu_utils.h" +#include "gromacs/hardware/gpu_hw_info.h" + /*! \brief Set allocation functions used by the GPU host * * Since GPU support is not configured, there is no host memory to @@ -52,3 +54,16 @@ void gpu_set_host_malloc_and_free(bool, *nb_alloc = nullptr; *nb_free = nullptr; } + +//! This function is documented in the header file +std::vector getCompatibleGpus(const gmx_gpu_info_t & /*gpu_info*/) +{ + // There can't be any compatible GPUs + return std::vector(); +} + +const char *getGpuCompatibilityDescription(const gmx_gpu_info_t & /*gpu_info*/, + int /*index*/) +{ + return gpu_detect_res_str[egpuNonexistent]; +} diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu index 61037e39c3..443aa70540 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cu +++ b/src/gromacs/gpu_utils/gpu_utils.cu @@ -623,14 +623,20 @@ int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str) return retval; } -bool isGpuCompatible(const gmx_gpu_info_t &gpu_info, - int index) +std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info) { - assert(gpu_info.n_dev == 0 || gpu_info.gpu_dev); - - return (index >= gpu_info.n_dev ? - false : - gpu_info.gpu_dev[index].stat == egpuCompatible); + // Possible minor over-allocation here, but not important for anything + std::vector compatibleGpus; + compatibleGpus.reserve(gpu_info.n_dev); + for (int i = 0; i < gpu_info.n_dev; i++) + { + assert(gpu_info.gpu_dev); + if (gpu_info.gpu_dev[i].stat == egpuCompatible) + { + compatibleGpus.push_back(i); + } + } + return compatibleGpus; } const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &gpu_info, diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h index b4371049be..c85b1bed66 100644 --- a/src/gromacs/gpu_utils/gpu_utils.h +++ b/src/gromacs/gpu_utils/gpu_utils.h @@ -47,6 +47,8 @@ #include +#include + #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/utility/basedefinitions.h" @@ -74,15 +76,14 @@ class MDLogger; GPU_FUNC_QUALIFIER int detect_gpus(struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info), char *GPU_FUNC_ARGUMENT(err_str)) GPU_FUNC_TERM_WITH_RETURN(-1) -/*! \brief Return whether the GPU with given \c index is compatible, ie suitable for use. +/*! \brief Return a container of the detected GPUs that are compatible. * - * \param[in] gpu_info Information about detected GPUs - * \param[in] index index of GPU to ask about - * \returns Whether the GPU is compatible. - */ -GPU_FUNC_QUALIFIER -bool isGpuCompatible(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info), - int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM_WITH_RETURN(false) + * This function filters the result of the detection for compatible + * GPUs, based on the previously run compatibility tests. + * + * \param[in] gpu_info Information detected about GPUs, including compatibility. + * \return vector of IDs of GPUs already recorded as compatible */ +std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info); /*! \brief Return a string describing how compatible the GPU with given \c index is. * @@ -90,9 +91,8 @@ bool isGpuCompatible(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info), * \param[in] index index of GPU to ask about * \returns A null-terminated C string describing the compatibility status, useful for error messages. */ -GPU_FUNC_QUALIFIER const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info), - int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM_WITH_RETURN("") + int GPU_FUNC_ARGUMENT(index)); /*! \brief Frees the gpu_dev and dev_use array fields of \p gpu_info. * diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp index efd9934255..5195baccc5 100644 --- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp +++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp @@ -343,12 +343,20 @@ void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info) } //! This function is documented in the header file -bool isGpuCompatible(const gmx_gpu_info_t &gpu_info, - int index) +std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info) { - return (index >= gpu_info.n_dev ? - false : - gpu_info.gpu_dev[index].stat == egpuCompatible); + // Possible minor over-allocation here, but not important for anything + std::vector compatibleGpus; + compatibleGpus.reserve(gpu_info.n_dev); + for (int i = 0; i < gpu_info.n_dev; i++) + { + assert(gpu_info.gpu_dev); + if (gpu_info.gpu_dev[i].stat == egpuCompatible) + { + compatibleGpus.push_back(i); + } + } + return compatibleGpus; } //! This function is documented in the header file diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp index af9fd2edc6..48f9a8c2c7 100644 --- a/src/gromacs/hardware/detecthardware.cpp +++ b/src/gromacs/hardware/detecthardware.cpp @@ -496,6 +496,7 @@ gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog, const t_commrec * gmx_detect_gpus(mdlog, cr); gmx_collect_hardware_mpi(*hwinfo_g->cpuInfo); + hwinfo_g->compatibleGpus = getCompatibleGpus(hwinfo_g->gpu_info); } /* increase the reference counter */ n_hwinfo++; diff --git a/src/gromacs/hardware/hw_info.h b/src/gromacs/hardware/hw_info.h index 7e08e63f3f..29e59de023 100644 --- a/src/gromacs/hardware/hw_info.h +++ b/src/gromacs/hardware/hw_info.h @@ -36,6 +36,7 @@ #define GMX_HARDWARE_HWINFO_H #include +#include #include "gromacs/hardware/gpu_hw_info.h" #include "gromacs/utility/basedefinitions.h" @@ -54,6 +55,7 @@ struct gmx_hw_info_t { /* Data for our local physical node */ struct gmx_gpu_info_t gpu_info; /* Information about GPUs detected in the system */ + std::vector compatibleGpus; /* Contains the device IDs of all GPUs that are compatible */ int nthreads_hw_avail; /* Number of hardware threads available; this number is based on the number of CPUs reported as available diff --git a/src/gromacs/taskassignment/hardwareassign.cpp b/src/gromacs/taskassignment/hardwareassign.cpp index ef9cb744bb..231d0a1d05 100644 --- a/src/gromacs/taskassignment/hardwareassign.cpp +++ b/src/gromacs/taskassignment/hardwareassign.cpp @@ -204,25 +204,10 @@ static void exitUnlessUserGpuTaskAssignmentIsValid(const gmx_gpu_info_t &gpu_i } } -std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info) -{ - // Possible minor over-allocation here, but not important for anything - std::vector compatibleGpus; - compatibleGpus.reserve(gpu_info.n_dev); - for (int i = 0; i < gpu_info.n_dev; i++) - { - GMX_ASSERT(gpu_info.gpu_dev, "Invalid gpu_info.gpu_dev"); - if (isGpuCompatible(gpu_info, i)) - { - compatibleGpus.push_back(i); - } - } - return compatibleGpus; -} - std::vector mapPpRanksToGpus(bool rankCanUseGpu, const t_commrec *cr, const gmx_gpu_info_t &gpu_info, + const std::vector &compatibleGpus, const gmx_hw_opt_t &hw_opt) { std::vector taskAssignment; @@ -232,7 +217,6 @@ std::vector mapPpRanksToGpus(bool rankCanUseGpu, return taskAssignment; } - auto compatibleGpus = getCompatibleGpus(gpu_info); if (!hw_opt.gpuIdTaskAssignment.empty()) { auto userGpuTaskAssignment = parseGpuTaskAssignment(hw_opt.gpuIdTaskAssignment); diff --git a/src/gromacs/taskassignment/hardwareassign.h b/src/gromacs/taskassignment/hardwareassign.h index d8d3952bc9..0294d25d1a 100644 --- a/src/gromacs/taskassignment/hardwareassign.h +++ b/src/gromacs/taskassignment/hardwareassign.h @@ -74,15 +74,6 @@ class MDLogger; */ std::vector parseGpuTaskAssignment(const std::string &gpuTaskAssignment); -/*! \brief Filter the compatible GPUs - * - * This function filters gpu_info.gpu_dev for compatible GPUs based - * on the previously run compatibility tests. - * - * \param[in] gpu_info Information detected about GPUs, including compatibility - * \return vector of IDs of GPUs already recorded as compatible */ -std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info); - /*! \brief Assign PP ranks to valid GPU IDs. * * Will return a validated mapping from PP ranks (ie tasks that can @@ -96,7 +87,8 @@ std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info); * * \param[in] rankCanUseGpu Whether this rank can execute a task on a GPU. * \param[in] cr Communication record. - * \param[in] gpu_info Information detected about GPUs, including compatibility. + * \param[in] gpu_info Information detected about GPUs + * \param[in] compatibleGpus Vector of GPUs that are compatible * \param[in] hw_opt Parallelisation options, including any user-specified GPU task assignment. * * \returns A valid GPU selection. @@ -104,6 +96,7 @@ std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info); std::vector mapPpRanksToGpus(bool rankCanUseGpu, const t_commrec *cr, const gmx_gpu_info_t &gpu_info, + const std::vector &compatibleGpus, const gmx_hw_opt_t &hw_opt); } // namespace diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp index 940079ff28..93bdd871c8 100644 --- a/src/programs/mdrun/runner.cpp +++ b/src/programs/mdrun/runner.cpp @@ -876,7 +876,7 @@ int Mdrunner::mdrunner() * or sharing devices on a node, either from the user * selection, or automatically. */ bool rankCanUseGpu = thisRankHasDuty(cr, DUTY_PP); - gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hw_opt); + gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hwinfo->compatibleGpus, hw_opt); } reportGpuUsage(mdlog, hwinfo->gpu_info, !hw_opt.gpuIdTaskAssignment.empty(), -- 2.22.0