From b97a24ea052831d9b8035b492d51150b3a4acdc4 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Tue, 6 Oct 2020 14:15:54 +0200 Subject: [PATCH] Simplify GPU usage decision function signatures Some of GPU device usage decisions are made based on the number of availabel GPU devices, not on their indices. Hence it is natural to pass one integer instead of the vector. --- src/gromacs/mdrun/runner.cpp | 9 +++---- src/gromacs/taskassignment/decidegpuusage.cpp | 12 ++++------ src/gromacs/taskassignment/decidegpuusage.h | 24 +++++++++++-------- .../taskassignment/resourcedivision.cpp | 20 ++++++++-------- src/gromacs/taskassignment/resourcedivision.h | 20 ++++++++-------- 5 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index b22881a05d..dfc399541f 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -781,6 +781,7 @@ int Mdrunner::mdrunner() gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo); std::vector gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable); + const int numDevicesToUse = gmx::ssize(gpuIdsToUse); // Print citation requests after all software/hardware printing pleaseCiteGromacs(fplog); @@ -823,12 +824,12 @@ int Mdrunner::mdrunner() // the number of GPUs to choose the number of ranks. auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi( - nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded, + nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded, gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI), hw_opt.nthreads_tmpi); useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi( - useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo, + useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo, *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR @@ -839,7 +840,7 @@ int Mdrunner::mdrunner() * prevent any possible subsequent checks from working * correctly. */ hw_opt.nthreads_tmpi = - get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded, useGpuForPme, + get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme, inputrec.get(), &mtop, mdlog, membedHolder.doMembed()); // Now start the threads for thread MPI. @@ -1285,7 +1286,7 @@ int Mdrunner::mdrunner() // where appropriate. if (!userGpuTaskAssignment.empty()) { - gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse)); + gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse); } if (PAR(cr)) diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index be1657af8e..3ec7ce027e 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -109,7 +109,7 @@ const char* g_specifyEverythingFormatString = } // namespace bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget, - const std::vector& gpuIdsToUse, + const int numDevicesToUse, const std::vector& userGpuTaskAssignment, const EmulateGpuNonbonded emulateGpuNonbonded, const bool buildSupportsNonbondedOnGpu, @@ -145,15 +145,13 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget non // Because this is thread-MPI, we already know about the GPUs that // all potential ranks can use, and can use that in a global // decision that will later be consistent. - auto haveGpus = !gpuIdsToUse.empty(); - // If we get here, then the user permitted or required GPUs. - return haveGpus; + return (numDevicesToUse > 0); } bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded, const TaskTarget pmeTarget, - const std::vector& gpuIdsToUse, + const int numDevicesToUse, const std::vector& userGpuTaskAssignment, const gmx_hw_info_t& hardwareInfo, const t_inputrec& inputrec, @@ -222,7 +220,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuFor { // PME can run well on a GPU shared with NB, and we permit // mdrun to default to try that. - return !gpuIdsToUse.empty(); + return numDevicesToUse > 0; } if (numRanksPerSimulation < 1) @@ -230,7 +228,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuFor // Full automated mode for thread-MPI (the default). PME can // run well on a GPU shared with NB, and we permit mdrun to // default to it if there is only one GPU available. - return (gpuIdsToUse.size() == 1); + return (numDevicesToUse == 1); } // Not enough support for PME on GPUs for anything else diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index b151c5d6ea..7dd6ae9b30 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -101,20 +101,24 @@ class MDAtoms; * user. So we need to consider this before any automated choice of * the number of thread-MPI ranks. * - * \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign short-ranged nonbonded interaction tasks. - * \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use. - * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs. - * \param[in] emulateGpuNonbonded Whether we will emulate GPU calculation of nonbonded interactions. - * \param[in] buildSupportsNonbondedOnGpu Whether GROMACS was built with GPU support. - * \param[in] nonbondedOnGpuIsUseful Whether computing nonbonded interactions on a GPU is useful for this calculation. - * \param[in] numRanksPerSimulation The number of ranks in each simulation. + * \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign + * short-ranged nonbonded interaction tasks. + * \param[in] numDevicesToUse Number of compatible GPUs that the user permitted + * us to use. + * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs. + * \param[in] emulateGpuNonbonded Whether we will emulate GPU calculation of nonbonded + * interactions. + * \param[in] buildSupportsNonbondedOnGpu Whether GROMACS was built with GPU support. + * \param[in] nonbondedOnGpuIsUseful Whether computing nonbonded interactions on a GPU is + * useful for this calculation. + * \param[in] numRanksPerSimulation The number of ranks in each simulation. * * \returns Whether the simulation will run nonbonded tasks on GPUs. * * \throws std::bad_alloc If out of memory * InconsistentInputError If the user requirements are inconsistent. */ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget nonbondedTarget, - const std::vector& gpuIdsToUse, + int numDevicesToUse, const std::vector& userGpuTaskAssignment, EmulateGpuNonbonded emulateGpuNonbonded, bool buildSupportsNonbondedOnGpu, @@ -132,7 +136,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget non * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. * \param[in] pmeTarget The user's choice for mdrun -pme for where to assign * long-ranged PME nonbonded interaction tasks. - * \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use. + * \param[in] numDevicesToUse The number of compatible GPUs that the user permitted us to use. * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs. * \param[in] hardwareInfo Hardware information * \param[in] inputrec The user input @@ -145,7 +149,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget non * InconsistentInputError If the user requirements are inconsistent. */ bool decideWhetherToUseGpusForPmeWithThreadMpi(bool useGpuForNonbonded, TaskTarget pmeTarget, - const std::vector& gpuIdsToUse, + int numDevicesToUse, const std::vector& userGpuTaskAssignment, const gmx_hw_info_t& hardwareInfo, const t_inputrec& inputrec, diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp index ac44344003..a967ed1c1b 100644 --- a/src/gromacs/taskassignment/resourcedivision.cpp +++ b/src/gromacs/taskassignment/resourcedivision.cpp @@ -339,15 +339,15 @@ private: * Thus all options should be internally consistent and consistent * with the hardware, except that ntmpi could be larger than #GPU. */ -int get_nthreads_mpi(const gmx_hw_info_t* hwinfo, - gmx_hw_opt_t* hw_opt, - const std::vector& gpuIdsToUse, - bool nonbondedOnGpu, - bool pmeOnGpu, - const t_inputrec* inputrec, - const gmx_mtop_t* mtop, - const gmx::MDLogger& mdlog, - bool doMembed) +int get_nthreads_mpi(const gmx_hw_info_t* hwinfo, + gmx_hw_opt_t* hw_opt, + const int numDevicesToUse, + bool nonbondedOnGpu, + bool pmeOnGpu, + const t_inputrec* inputrec, + const gmx_mtop_t* mtop, + const gmx::MDLogger& mdlog, + bool doMembed) { int nthreads_hw, nthreads_tot_max, nrank, ngpu; int min_atoms_per_mpi_rank; @@ -432,7 +432,7 @@ int get_nthreads_mpi(const gmx_hw_info_t* hwinfo, /* nonbondedOnGpu might be false e.g. because this simulation * is a rerun with energy groups. */ - ngpu = (nonbondedOnGpu ? gmx::ssize(gpuIdsToUse) : 0); + ngpu = (nonbondedOnGpu ? numDevicesToUse : 0); nrank = get_tmpi_omp_thread_division(hwinfo, *hw_opt, nthreads_tot_max, ngpu); diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h index a2185babf4..c45ed95493 100644 --- a/src/gromacs/taskassignment/resourcedivision.h +++ b/src/gromacs/taskassignment/resourcedivision.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -73,15 +73,15 @@ class PhysicalNodeCommunicator; * with the hardware, except that ntmpi could be larger than number of GPUs. * If necessary, this function will modify hw_opt->nthreads_omp. */ -int get_nthreads_mpi(const gmx_hw_info_t* hwinfo, - gmx_hw_opt_t* hw_opt, - const std::vector& gpuIdsToUse, - bool nonbondedOnGpu, - bool pmeOnGpu, - const t_inputrec* inputrec, - const gmx_mtop_t* mtop, - const gmx::MDLogger& mdlog, - bool doMembed); +int get_nthreads_mpi(const gmx_hw_info_t* hwinfo, + gmx_hw_opt_t* hw_opt, + int numDevicesToUse, + bool nonbondedOnGpu, + bool pmeOnGpu, + const t_inputrec* inputrec, + const gmx_mtop_t* mtop, + const gmx::MDLogger& mdlog, + bool doMembed); /*! \brief Check if the number of OpenMP threads is within reasonable range * considering the hardware used. This is a crude check, but mainly -- 2.22.0