Simplify GPU usage decision function signatures

author Artem Zhmurov <zhmurov@gmail.com>

Tue, 6 Oct 2020 12:15:54 +0000 (14:15 +0200)

committer Artem Zhmurov <zhmurov@gmail.com>

Tue, 6 Oct 2020 12:18:18 +0000 (12:18 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Tue, 6 Oct 2020 12:15:54 +0000 (14:15 +0200)
committer Artem Zhmurov <zhmurov@gmail.com>
Tue, 6 Oct 2020 12:18:18 +0000 (12:18 +0000)
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index b22881a05d42ca286dce9f4f53cdbdd040a42386..dfc399541f395bd24fcd8e2ee5ef06df0812cfb4 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -781,6 +781,7 @@ int Mdrunner::mdrunner()
      gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
  
      std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
+    const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
  
      // Print citation requests after all software/hardware printing
      pleaseCiteGromacs(fplog);
@@ -823,12 +824,12 @@ int Mdrunner::mdrunner()
              // the number of GPUs to choose the number of ranks.
              auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
              useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
-                    nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
+                    nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                      canUseGpuForNonbonded,
                      gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
+                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
                      *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -839,7 +840,7 @@ int Mdrunner::mdrunner()
           * prevent any possible subsequent checks from working
           * correctly. */
          hw_opt.nthreads_tmpi =
-                get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded, useGpuForPme,
+                get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
                                   inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
  
          // Now start the threads for thread MPI.
@@ -1285,7 +1286,7 @@ int Mdrunner::mdrunner()
      // where appropriate.
      if (!userGpuTaskAssignment.empty())
      {
-        gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
+        gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
      }
  
      if (PAR(cr))
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index be1657af8eaa54714c9b90240c3f6d3d3c241d90..3ec7ce027e3bb3c0fed3876f0a7bb0315e517789 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -109,7 +109,7 @@ const char* g_specifyEverythingFormatString =
  } // namespace
  
  bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
-                                                     const std::vector<int>& gpuIdsToUse,
+                                                     const int               numDevicesToUse,
                                                       const std::vector<int>& userGpuTaskAssignment,
                                                       const EmulateGpuNonbonded emulateGpuNonbonded,
                                                       const bool buildSupportsNonbondedOnGpu,
@@ -145,15 +145,13 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        non
      // Because this is thread-MPI, we already know about the GPUs that
      // all potential ranks can use, and can use that in a global
      // decision that will later be consistent.
-    auto haveGpus = !gpuIdsToUse.empty();
-
      // If we get here, then the user permitted or required GPUs.
-    return haveGpus;
+    return (numDevicesToUse > 0);
  }
  
  bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
                                                 const TaskTarget        pmeTarget,
-                                               const std::vector<int>& gpuIdsToUse,
+                                               const int               numDevicesToUse,
                                                 const std::vector<int>& userGpuTaskAssignment,
                                                 const gmx_hw_info_t&    hardwareInfo,
                                                 const t_inputrec&       inputrec,
@@ -222,7 +220,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
      {
          // PME can run well on a GPU shared with NB, and we permit
          // mdrun to default to try that.
-        return !gpuIdsToUse.empty();
+        return numDevicesToUse > 0;
      }
  
      if (numRanksPerSimulation < 1)
@@ -230,7 +228,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
          // Full automated mode for thread-MPI (the default). PME can
          // run well on a GPU shared with NB, and we permit mdrun to
          // default to it if there is only one GPU available.
-        return (gpuIdsToUse.size() == 1);
+        return (numDevicesToUse == 1);
      }
  
      // Not enough support for PME on GPUs for anything else
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index b151c5d6eae0775db7756930d4b99e160beaedce..7dd6ae9b3008fd451dae97f4615f9e33085cb6d7 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -101,20 +101,24 @@ class MDAtoms;
   * user. So we need to consider this before any automated choice of
   * the number of thread-MPI ranks.
   *
- * \param[in]  nonbondedTarget             The user's choice for mdrun -nb for where to assign short-ranged nonbonded interaction tasks.
- * \param[in]  gpuIdsToUse                 The compatible GPUs that the user permitted us to use.
- * \param[in]  userGpuTaskAssignment       The user-specified assignment of GPU tasks to device IDs.
- * \param[in]  emulateGpuNonbonded         Whether we will emulate GPU calculation of nonbonded interactions.
- * \param[in]  buildSupportsNonbondedOnGpu Whether GROMACS was built with GPU support.
- * \param[in]  nonbondedOnGpuIsUseful      Whether computing nonbonded interactions on a GPU is useful for this calculation.
- * \param[in]  numRanksPerSimulation       The number of ranks in each simulation.
+ * \param[in] nonbondedTarget              The user's choice for mdrun -nb for where to assign
+ *                                         short-ranged nonbonded interaction tasks.
+ * \param[in] numDevicesToUse              Number of compatible GPUs that the user permitted
+ *                                         us to use.
+ * \param[in] userGpuTaskAssignment        The user-specified assignment of GPU tasks to device IDs.
+ * \param[in] emulateGpuNonbonded          Whether we will emulate GPU calculation of nonbonded
+ *                                         interactions.
+ * \param[in] buildSupportsNonbondedOnGpu  Whether GROMACS was built with GPU support.
+ * \param[in] nonbondedOnGpuIsUseful       Whether computing nonbonded interactions on a GPU is
+ *                                         useful for this calculation.
+ * \param[in] numRanksPerSimulation        The number of ranks in each simulation.
   *
   * \returns    Whether the simulation will run nonbonded tasks on GPUs.
   *
   * \throws     std::bad_alloc          If out of memory
   *             InconsistentInputError  If the user requirements are inconsistent. */
  bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              nonbondedTarget,
-                                                     const std::vector<int>& gpuIdsToUse,
+                                                     int                     numDevicesToUse,
                                                       const std::vector<int>& userGpuTaskAssignment,
                                                       EmulateGpuNonbonded     emulateGpuNonbonded,
                                                       bool buildSupportsNonbondedOnGpu,
@@ -132,7 +136,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              non
   * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
   * \param[in]  pmeTarget                 The user's choice for mdrun -pme for where to assign
   *                                       long-ranged PME nonbonded interaction tasks.
- * \param[in]  gpuIdsToUse               The compatible GPUs that the user permitted us to use.
+ * \param[in]  numDevicesToUse           The number of compatible GPUs that the user permitted us to use.
   * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
   * \param[in]  hardwareInfo              Hardware information
   * \param[in]  inputrec                  The user input
@@ -145,7 +149,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              non
   *             InconsistentInputError  If the user requirements are inconsistent. */
  bool decideWhetherToUseGpusForPmeWithThreadMpi(bool                    useGpuForNonbonded,
                                                 TaskTarget              pmeTarget,
-                                               const std::vector<int>& gpuIdsToUse,
+                                               int                     numDevicesToUse,
                                                 const std::vector<int>& userGpuTaskAssignment,
                                                 const gmx_hw_info_t&    hardwareInfo,
                                                 const t_inputrec&       inputrec,
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp

index ac44344003de7544368b281c18e667681b7e22f0..a967ed1c1b5d265154194c33953b58cb4e473ce2 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -339,15 +339,15 @@ private:
   * Thus all options should be internally consistent and consistent
   * with the hardware, except that ntmpi could be larger than #GPU.
   */
-int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
-                     gmx_hw_opt_t*           hw_opt,
-                     const std::vector<int>& gpuIdsToUse,
-                     bool                    nonbondedOnGpu,
-                     bool                    pmeOnGpu,
-                     const t_inputrec*       inputrec,
-                     const gmx_mtop_t*       mtop,
-                     const gmx::MDLogger&    mdlog,
-                     bool                    doMembed)
+int get_nthreads_mpi(const gmx_hw_info_t* hwinfo,
+                     gmx_hw_opt_t*        hw_opt,
+                     const int            numDevicesToUse,
+                     bool                 nonbondedOnGpu,
+                     bool                 pmeOnGpu,
+                     const t_inputrec*    inputrec,
+                     const gmx_mtop_t*    mtop,
+                     const gmx::MDLogger& mdlog,
+                     bool                 doMembed)
  {
      int nthreads_hw, nthreads_tot_max, nrank, ngpu;
      int min_atoms_per_mpi_rank;
@@ -432,7 +432,7 @@ int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
  
      /* nonbondedOnGpu might be false e.g. because this simulation
       * is a rerun with energy groups. */
-    ngpu = (nonbondedOnGpu ? gmx::ssize(gpuIdsToUse) : 0);
+    ngpu = (nonbondedOnGpu ? numDevicesToUse : 0);
  
      nrank = get_tmpi_omp_thread_division(hwinfo, *hw_opt, nthreads_tot_max, ngpu);
  
diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h

index a2185babf4a2aa2bd83d2dde99133576890e9282..c45ed954939848314a45f63525b9bc8043ae7e67 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.h
+++ b/src/gromacs/taskassignment/resourcedivision.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -73,15 +73,15 @@ class PhysicalNodeCommunicator;
   * with the hardware, except that ntmpi could be larger than number of GPUs.
   * If necessary, this function will modify hw_opt->nthreads_omp.
   */
-int get_nthreads_mpi(const gmx_hw_info_t*    hwinfo,
-                     gmx_hw_opt_t*           hw_opt,
-                     const std::vector<int>& gpuIdsToUse,
-                     bool                    nonbondedOnGpu,
-                     bool                    pmeOnGpu,
-                     const t_inputrec*       inputrec,
-                     const gmx_mtop_t*       mtop,
-                     const gmx::MDLogger&    mdlog,
-                     bool                    doMembed);
+int get_nthreads_mpi(const gmx_hw_info_t* hwinfo,
+                     gmx_hw_opt_t*        hw_opt,
+                     int                  numDevicesToUse,
+                     bool                 nonbondedOnGpu,
+                     bool                 pmeOnGpu,
+                     const t_inputrec*    inputrec,
+                     const gmx_mtop_t*    mtop,
+                     const gmx::MDLogger& mdlog,
+                     bool                 doMembed);
  
  /*! \brief Check if the number of OpenMP threads is within reasonable range
   * considering the hardware used. This is a crude check, but mainly
author	Artem Zhmurov <zhmurov@gmail.com>
	Tue, 6 Oct 2020 12:15:54 +0000 (14:15 +0200)
committer	Artem Zhmurov <zhmurov@gmail.com>
	Tue, 6 Oct 2020 12:18:18 +0000 (12:18 +0000)
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.cpp		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.h		patch \| blob \| history