Update treatment of GPU compatibility data structure

author Mark Abraham <mark.j.abraham@gmail.com>

Wed, 25 Oct 2017 09:45:32 +0000 (11:45 +0200)

committer Aleksei Iupinov <a.yupinov@gmail.com>

Tue, 31 Oct 2017 16:33:05 +0000 (17:33 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Wed, 25 Oct 2017 09:45:32 +0000 (11:45 +0200)
committer Aleksei Iupinov <a.yupinov@gmail.com>
Tue, 31 Oct 2017 16:33:05 +0000 (17:33 +0100)
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp

index 7bba127012866685db4fad580786ecd1843e6782..cad2af2595952f6a45443148470abecff6cd47cc 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
@@ -49,7 +49,6 @@
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/hardware/hw_info.h"
-#include "gromacs/taskassignment/hardwareassign.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/loggerbuilder.h"
  #include "gromacs/utility/unique_cptr.h"
@@ -105,8 +104,7 @@ void PmeTestEnvironment::SetUp()
  
      // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
      TestHardwareContexts gpuContexts;
-    const auto           compatibleGpus = getCompatibleGpus(hardwareInfo_->gpu_info);
-    for (int gpuIndex : compatibleGpus)
+    for (int gpuIndex : hardwareInfo_->compatibleGpus)
      {
          char        stmp[200] = {};
          get_gpu_device_info_string(stmp, hardwareInfo_->gpu_info, gpuIndex);
diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp

index fcaed80d22eb83c273d185b5d88d3c1456d3d255..3b943dabfb31f3fd29f1d5ab39c48e1e236e5a7f 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils.cpp
@@ -33,7 +33,7 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \internal \file
- *  \brief Stub functions for non-GPU builds
+ *  \brief Function definitions for non-GPU builds
   *
   *  \author Mark Abraham <mark.j.abraham@gmail.com>
   */
@@ -41,6 +41,8 @@
  
  #include "gpu_utils.h"
  
+#include "gromacs/hardware/gpu_hw_info.h"
+
  /*! \brief Set allocation functions used by the GPU host
   *
   * Since GPU support is not configured, there is no host memory to
@@ -52,3 +54,16 @@ void gpu_set_host_malloc_and_free(bool,
      *nb_alloc = nullptr;
      *nb_free  = nullptr;
  }
+
+//! This function is documented in the header file
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t & /*gpu_info*/)
+{
+    // There can't be any compatible GPUs
+    return std::vector<int>();
+}
+
+const char *getGpuCompatibilityDescription(const gmx_gpu_info_t & /*gpu_info*/,
+                                           int                    /*index*/)
+{
+    return gpu_detect_res_str[egpuNonexistent];
+}
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu

index 61037e39c3f37cd766dec1acd0d8c18a89d12bf7..443aa70540f77b8bf5a463f7a3572806301e158c 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -623,14 +623,20 @@ int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
      return retval;
  }
  
-bool isGpuCompatible(const gmx_gpu_info_t &gpu_info,
-                     int                   index)
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
  {
-    assert(gpu_info.n_dev == 0 || gpu_info.gpu_dev);
-
-    return (index >= gpu_info.n_dev ?
-            false :
-            gpu_info.gpu_dev[index].stat == egpuCompatible);
+    // Possible minor over-allocation here, but not important for anything
+    std::vector<int> compatibleGpus;
+    compatibleGpus.reserve(gpu_info.n_dev);
+    for (int i = 0; i < gpu_info.n_dev; i++)
+    {
+        assert(gpu_info.gpu_dev);
+        if (gpu_info.gpu_dev[i].stat == egpuCompatible)
+        {
+            compatibleGpus.push_back(i);
+        }
+    }
+    return compatibleGpus;
  }
  
  const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &gpu_info,
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h

index b4371049be23ac5a520952a8df8b15ae25ff7c7f..c85b1bed668c4699d1b0da42d5ee6bd25aeb43e0 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -47,6 +47,8 @@
  
  #include <cstdio>
  
+#include <vector>
+
  #include "gromacs/gpu_utils/gpu_macros.h"
  #include "gromacs/utility/basedefinitions.h"
  
@@ -74,15 +76,14 @@ class MDLogger;
  GPU_FUNC_QUALIFIER
  int detect_gpus(struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info), char *GPU_FUNC_ARGUMENT(err_str)) GPU_FUNC_TERM_WITH_RETURN(-1)
  
-/*! \brief Return whether the GPU with given \c index is compatible, ie suitable for use.
+/*! \brief Return a container of the detected GPUs that are compatible.
   *
- * \param[in]   gpu_info    Information about detected GPUs
- * \param[in]   index       index of GPU to ask about
- * \returns                 Whether the GPU is compatible.
- */
-GPU_FUNC_QUALIFIER
-bool isGpuCompatible(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
-                     int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM_WITH_RETURN(false)
+ * This function filters the result of the detection for compatible
+ * GPUs, based on the previously run compatibility tests.
+ *
+ * \param[in]     gpu_info    Information detected about GPUs, including compatibility.
+ * \return                    vector of IDs of GPUs already recorded as compatible */
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info);
  
  /*! \brief Return a string describing how compatible the GPU with given \c index is.
   *
@@ -90,9 +91,8 @@ bool isGpuCompatible(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
   * \param[in]   index       index of GPU to ask about
   * \returns                 A null-terminated C string describing the compatibility status, useful for error messages.
   */
-GPU_FUNC_QUALIFIER
  const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
-                                           int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM_WITH_RETURN("")
+                                           int GPU_FUNC_ARGUMENT(index));
  
  /*! \brief Frees the gpu_dev and dev_use array fields of \p gpu_info.
   *
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp

index efd99342558c0f9dd92f62cff45c080b26e37d67..5195baccc5d97ffcede987b83d7eb21aa52d1bf4 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -343,12 +343,20 @@ void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
  }
  
  //! This function is documented in the header file
-bool isGpuCompatible(const gmx_gpu_info_t &gpu_info,
-                     int                   index)
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
  {
-    return (index >= gpu_info.n_dev ?
-            false :
-            gpu_info.gpu_dev[index].stat == egpuCompatible);
+    // Possible minor over-allocation here, but not important for anything
+    std::vector<int> compatibleGpus;
+    compatibleGpus.reserve(gpu_info.n_dev);
+    for (int i = 0; i < gpu_info.n_dev; i++)
+    {
+        assert(gpu_info.gpu_dev);
+        if (gpu_info.gpu_dev[i].stat == egpuCompatible)
+        {
+            compatibleGpus.push_back(i);
+        }
+    }
+    return compatibleGpus;
  }
  
  //! This function is documented in the header file
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp

index af9fd2edc620ec99136baeecf6254de078d9c6fc..48f9a8c2c7d0dfefe1009b94c0c6b7cfa87c1e9e 100644 (file)
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -496,6 +496,7 @@ gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog, const t_commrec *
  
          gmx_detect_gpus(mdlog, cr);
          gmx_collect_hardware_mpi(*hwinfo_g->cpuInfo);
+        hwinfo_g->compatibleGpus = getCompatibleGpus(hwinfo_g->gpu_info);
      }
      /* increase the reference counter */
      n_hwinfo++;
diff --git a/src/gromacs/hardware/hw_info.h b/src/gromacs/hardware/hw_info.h

index 7e08e63f3fe30acb52c961e2959ed1c6099f835b..29e59de023fc7b6d8c26cbc3ced067f0786e707d 100644 (file)
--- a/src/gromacs/hardware/hw_info.h
+++ b/src/gromacs/hardware/hw_info.h
@@ -36,6 +36,7 @@
  #define GMX_HARDWARE_HWINFO_H
  
  #include <string>
+#include <vector>
  
  #include "gromacs/hardware/gpu_hw_info.h"
  #include "gromacs/utility/basedefinitions.h"
@@ -54,6 +55,7 @@ struct gmx_hw_info_t
  {
      /* Data for our local physical node */
      struct gmx_gpu_info_t gpu_info;                /* Information about GPUs detected in the system */
+    std::vector<int>      compatibleGpus;          /* Contains the device IDs of all GPUs that are compatible */
  
      int                   nthreads_hw_avail;       /* Number of hardware threads available; this number
                                                        is based on the number of CPUs reported as available
diff --git a/src/gromacs/taskassignment/hardwareassign.cpp b/src/gromacs/taskassignment/hardwareassign.cpp

index ef9cb744bbbabd541c7b61a81d150e8b879a32cf..231d0a1d0514d66c73b245c4d56ec5751f7001be 100644 (file)
--- a/src/gromacs/taskassignment/hardwareassign.cpp
+++ b/src/gromacs/taskassignment/hardwareassign.cpp
@@ -204,25 +204,10 @@ static void exitUnlessUserGpuTaskAssignmentIsValid(const gmx_gpu_info_t   &gpu_i
      }
  }
  
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
-{
-    // Possible minor over-allocation here, but not important for anything
-    std::vector<int> compatibleGpus;
-    compatibleGpus.reserve(gpu_info.n_dev);
-    for (int i = 0; i < gpu_info.n_dev; i++)
-    {
-        GMX_ASSERT(gpu_info.gpu_dev, "Invalid gpu_info.gpu_dev");
-        if (isGpuCompatible(gpu_info, i))
-        {
-            compatibleGpus.push_back(i);
-        }
-    }
-    return compatibleGpus;
-}
-
  std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
                                    const t_commrec        *cr,
                                    const gmx_gpu_info_t   &gpu_info,
+                                  const std::vector<int> &compatibleGpus,
                                    const gmx_hw_opt_t     &hw_opt)
  {
      std::vector<int> taskAssignment;
@@ -232,7 +217,6 @@ std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
          return taskAssignment;
      }
  
-    auto compatibleGpus = getCompatibleGpus(gpu_info);
      if (!hw_opt.gpuIdTaskAssignment.empty())
      {
          auto userGpuTaskAssignment = parseGpuTaskAssignment(hw_opt.gpuIdTaskAssignment);
diff --git a/src/gromacs/taskassignment/hardwareassign.h b/src/gromacs/taskassignment/hardwareassign.h

index d8d3952bc9236766a4114461eb96c06648c6b3a0..0294d25d1ad83465daed6d1349151e07abbac293 100644 (file)
--- a/src/gromacs/taskassignment/hardwareassign.h
+++ b/src/gromacs/taskassignment/hardwareassign.h
@@ -74,15 +74,6 @@ class MDLogger;
   */
  std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment);
  
-/*! \brief Filter the compatible GPUs
- *
- * This function filters gpu_info.gpu_dev for compatible GPUs based
- * on the previously run compatibility tests.
- *
- * \param[in]     gpu_info    Information detected about GPUs, including compatibility
- * \return                    vector of IDs of GPUs already recorded as compatible */
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info);
-
  /*! \brief Assign PP ranks to valid GPU IDs.
   *
   * Will return a validated mapping from PP ranks (ie tasks that can
@@ -96,7 +87,8 @@ std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info);
   *
   * \param[in]     rankCanUseGpu          Whether this rank can execute a task on a GPU.
   * \param[in]     cr                     Communication record.
- * \param[in]     gpu_info               Information detected about GPUs, including compatibility.
+ * \param[in]     gpu_info               Information detected about GPUs
+ * \param[in]     compatibleGpus         Vector of GPUs that are compatible
   * \param[in]     hw_opt                 Parallelisation options, including any user-specified GPU task assignment.
   *
   * \returns  A valid GPU selection.
@@ -104,6 +96,7 @@ std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info);
  std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
                                    const t_commrec        *cr,
                                    const gmx_gpu_info_t   &gpu_info,
+                                  const std::vector<int> &compatibleGpus,
                                    const gmx_hw_opt_t     &hw_opt);
  
  } // namespace
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 940079ff28f429fd1f2235ddbb03707fa506789c..93bdd871c8864b2d865300c85541bc836334549b 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -876,7 +876,7 @@ int Mdrunner::mdrunner()
           * or sharing devices on a node, either from the user
           * selection, or automatically. */
          bool rankCanUseGpu = thisRankHasDuty(cr, DUTY_PP);
-        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hw_opt);
+        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hwinfo->compatibleGpus, hw_opt);
      }
  
      reportGpuUsage(mdlog, hwinfo->gpu_info, !hw_opt.gpuIdTaskAssignment.empty(),
author	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 25 Oct 2017 09:45:32 +0000 (11:45 +0200)
committer	Aleksei Iupinov <a.yupinov@gmail.com>
	Tue, 31 Oct 2017 16:33:05 +0000 (17:33 +0100)
src/gromacs/ewald/tests/testhardwarecontexts.cpp		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.cpp		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils_ocl.cpp		patch \| blob \| history
src/gromacs/hardware/detecthardware.cpp		patch \| blob \| history
src/gromacs/hardware/hw_info.h		patch \| blob \| history
src/gromacs/taskassignment/hardwareassign.cpp		patch \| blob \| history
src/gromacs/taskassignment/hardwareassign.h		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history