From b97a24ea052831d9b8035b492d51150b3a4acdc4 Mon Sep 17 00:00:00 2001
From: Artem Zhmurov <zhmurov@gmail.com>
Date: Tue, 6 Oct 2020 14:15:54 +0200
Subject: [PATCH] Simplify GPU usage decision function signatures

Some of GPU device usage decisions are made based on the number
of availabel GPU devices, not on their indices. Hence it is
natural to pass one integer instead of the vector.
---
 src/gromacs/mdrun/runner.cpp                  |  9 +++----
 src/gromacs/taskassignment/decidegpuusage.cpp | 12 ++++------
 src/gromacs/taskassignment/decidegpuusage.h   | 24 +++++++++++--------
 .../taskassignment/resourcedivision.cpp       | 20 ++++++++--------
 src/gromacs/taskassignment/resourcedivision.h | 20 ++++++++--------
 5 files changed, 44 insertions(+), 41 deletions(-)
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index b22881a05d..dfc399541f 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -781,6 +781,7 @@ int Mdrunner::mdrunner()
     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
 
     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
+    const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
 
     // Print citation requests after all software/hardware printing
     pleaseCiteGromacs(fplog);
@@ -823,12 +824,12 @@ int Mdrunner::mdrunner()
             // the number of GPUs to choose the number of ranks.
             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
             useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
-                    nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
+                    nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                     canUseGpuForNonbonded,
                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
+                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
                     *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
         }
         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -839,7 +840,7 @@ int Mdrunner::mdrunner()
          * prevent any possible subsequent checks from working
          * correctly. */
         hw_opt.nthreads_tmpi =
-                get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded, useGpuForPme,
+                get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
                                  inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
 
         // Now start the threads for thread MPI.
@@ -1285,7 +1286,7 @@ int Mdrunner::mdrunner()
     // where appropriate.
     if (!userGpuTaskAssignment.empty())
     {
-        gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
+        gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
     }
 
     if (PAR(cr))
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp
index be1657af8e..3ec7ce027e 100644
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -109,7 +109,7 @@ const char* g_specifyEverythingFormatString =
 } // namespace
 
 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
-                                                     const std::vector<int>& gpuIdsToUse,
+                                                     const int               numDevicesToUse,
                                                      const std::vector<int>& userGpuTaskAssignment,
                                                      const EmulateGpuNonbonded emulateGpuNonbonded,
                                                      const bool buildSupportsNonbondedOnGpu,
@@ -145,15 +145,13 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        non
     // Because this is thread-MPI, we already know about the GPUs that
     // all potential ranks can use, and can use that in a global
     // decision that will later be consistent.
-    auto haveGpus = !gpuIdsToUse.empty();
-
     // If we get here, then the user permitted or required GPUs.
-    return haveGpus;
+    return (numDevicesToUse > 0);
 }
 
 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
                                                const TaskTarget        pmeTarget,
-                                               const std::vector<int>& gpuIdsToUse,
+                                               const int               numDevicesToUse,
                                                const std::vector<int>& userGpuTaskAssignment,
                                                const gmx_hw_info_t&    hardwareInfo,
                                                const t_inputrec&       inputrec,
@@ -222,7 +220,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
     {
         // PME can run well on a GPU shared with NB, and we permit
         // mdrun to default to try that.
-        return !gpuIdsToUse.empty();
+        return numDevicesToUse > 0;
     }
 
     if (numRanksPerSimulation < 1)
@@ -230,7 +228,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
         // Full automated mode for thread-MPI (the default). PME can
         // run well on a GPU shared with NB, and we permit mdrun to
         // default to it if there is only one GPU available.
-        return (gpuIdsToUse.size() == 1);
+        return (numDevicesToUse == 1);
     }
 
     // Not enough support for PME on GPUs for anything else
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h
index b151c5d6ea..7dd6ae9b30 100644
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -101,20 +101,24 @@ class MDAtoms;
  * user. So we need to consider this before any automated choice of
  * the number of thread-MPI ranks.
  *
- * \param[in]  nonbondedTarget             The user's choice for mdrun -nb for where to assign short-ranged nonbonded interaction tasks.
- * \param[in]  gpuIdsToUse                 The compatible GPUs that the user permitted us to use.
- * \param[in]  userGpuTaskAssignment       The user-specified assignment of GPU tasks to device IDs.
- * \param[in]  emulateGpuNonbonded         Whether we will emulate GPU calculation of nonbonded interactions.
- * \param[in]  buildSupportsNonbondedOnGpu Whether GROMACS was built with GPU support.
- * \param[in]  nonbondedOnGpuIsUseful      Whether computing nonbonded interactions on a GPU is useful for this calculation.
- * \param[in]  numRanksPerSimulation       The number of ranks in each simulation.
+ * \param[in] nonbondedTarget              The user's choice for mdrun -nb for where to assign
+ *                                         short-ranged nonbonded interaction tasks.
+ * \param[in] numDevicesToUse              Number of compatible GPUs that the user permitted
+ *                                         us to use.
+ * \param[in] userGpuTaskAssignment        The user-specified assignment of GPU tasks to device IDs.
+ * \param[in] emulateGpuNonbonded          Whether we will emulate GPU calculation of nonbonded
+ *                                         interactions.
+ * \param[in] buildSupportsNonbondedOnGpu  Whether GROMACS was built with GPU support.
+ * \param[in] nonbondedOnGpuIsUseful       Whether computing nonbonded interactions on a GPU is
+ *                                         useful for this calculation.
+ * \param[in] numRanksPerSimulation        The number of ranks in each simulation.
  *
  * \returns    Whether the simulation will run nonbonded tasks on GPUs.
  *
  * \throws     std::bad_alloc          If out of memory
  *             InconsistentInputError  If the user requirements are inconsistent. */
 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              nonbondedTarget,
-                                                     const std::vector<int>& gpuIdsToUse,
+                                                     int                     numDevicesToUse,
                                                      const std::vector<int>& userGpuTaskAssignment,
                                                      EmulateGpuNonbonded     emulateGpuNonbonded,
                                                      bool buildSupportsNonbondedOnGpu,
@@ -132,7 +136,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              non
  * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
  * \param[in]  pmeTarget                 The user's choice for mdrun -pme for where to assign
  *                                       long-ranged PME nonbonded interaction tasks.
- * \param[in]  gpuIdsToUse               The compatible GPUs that the user permitted us to use.
+ * \param[in]  numDevicesToUse           The number of compatible GPUs that the user permitted us to use.
  * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
  * \param[in]  hardwareInfo              Hardware information
  * \param[in]  inputrec                  The user input
@@ -145,7 +149,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              non
  *             InconsistentInputError  If the user requirements are inconsistent. */
 bool decideWhetherToUseGpusForPmeWithThreadMpi(bool                    useGpuForNonbonded,
                                                TaskTarget              pmeTarget,
-                                               const std::vector<int>& gpuIdsToUse,
+                                               int                     numDevicesToUse,
                                                const std::vector<int>& userGpuTaskAssignment,
                                                const gmx_hw_info_t&    hardwareInfo,
                                                const t_inputrec&       inputrec,
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp
index ac44344003..a967ed1c1b 100644
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -339,15 +339,15 @@ private:
  * Thus all options should be internally consistent and consistent
  * with the hardware, except that ntmpi could be larger than #GPU.
  */
-int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
-                     gmx_hw_opt_t*           hw_opt,
-                     const std::vector<int>& gpuIdsToUse,
-                     bool                    nonbondedOnGpu,
-                     bool                    pmeOnGpu,
-                     const t_inputrec*       inputrec,
-                     const gmx_mtop_t*       mtop,
-                     const gmx::MDLogger&    mdlog,
-                     bool                    doMembed)
+int get_nthreads_mpi(const gmx_hw_info_t* hwinfo,
+                     gmx_hw_opt_t*        hw_opt,
+                     const int            numDevicesToUse,
+                     bool                 nonbondedOnGpu,
+                     bool                 pmeOnGpu,
+                     const t_inputrec*    inputrec,
+                     const gmx_mtop_t*    mtop,
+                     const gmx::MDLogger& mdlog,
+                     bool                 doMembed)
 {
     int nthreads_hw, nthreads_tot_max, nrank, ngpu;
     int min_atoms_per_mpi_rank;
@@ -432,7 +432,7 @@ int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
 
     /* nonbondedOnGpu might be false e.g. because this simulation
      * is a rerun with energy groups. */
-    ngpu = (nonbondedOnGpu ? gmx::ssize(gpuIdsToUse) : 0);
+    ngpu = (nonbondedOnGpu ? numDevicesToUse : 0);
 
     nrank = get_tmpi_omp_thread_division(hwinfo, *hw_opt, nthreads_tot_max, ngpu);
 
diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h
index a2185babf4..c45ed95493 100644
--- a/src/gromacs/taskassignment/resourcedivision.h
+++ b/src/gromacs/taskassignment/resourcedivision.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -73,15 +73,15 @@ class PhysicalNodeCommunicator;
  * with the hardware, except that ntmpi could be larger than number of GPUs.
  * If necessary, this function will modify hw_opt->nthreads_omp.
  */
-int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
-                     gmx_hw_opt_t*           hw_opt,
-                     const std::vector<int>& gpuIdsToUse,
-                     bool                    nonbondedOnGpu,
-                     bool                    pmeOnGpu,
-                     const t_inputrec*       inputrec,
-                     const gmx_mtop_t*       mtop,
-                     const gmx::MDLogger&    mdlog,
-                     bool                    doMembed);
+int get_nthreads_mpi(const gmx_hw_info_t* hwinfo,
+                     gmx_hw_opt_t*        hw_opt,
+                     int                  numDevicesToUse,
+                     bool                 nonbondedOnGpu,
+                     bool                 pmeOnGpu,
+                     const t_inputrec*    inputrec,
+                     const gmx_mtop_t*    mtop,
+                     const gmx::MDLogger& mdlog,
+                     bool                 doMembed);
 
 /*! \brief Check if the number of OpenMP threads is within reasonable range
  * considering the hardware used. This is a crude check, but mainly
-- 
2.22.0