Make mdrun print the list of compatible GPUs

author Mark Abraham <mark.j.abraham@gmail.com>

Sat, 17 May 2014 13:35:56 +0000 (15:35 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Fri, 5 Dec 2014 20:02:22 +0000 (21:02 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Sat, 17 May 2014 13:35:56 +0000 (15:35 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 5 Dec 2014 20:02:22 +0000 (21:02 +0100)
diff --git a/src/gromacs/gmxlib/gmx_detect_hardware.cpp b/src/gromacs/gmxlib/gmx_detect_hardware.cpp

index f1e18bb3d01744b74e9d2b0defe4199273f1f163..eb1aa55b01008400d8df221916942499a4430c53 100644 (file)
--- a/src/gromacs/gmxlib/gmx_detect_hardware.cpp
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.cpp
@@ -64,15 +64,18 @@
  #include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/legacyheaders/types/enums.h"
  #include "gromacs/legacyheaders/types/hw_info.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/gmxomp.h"
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
  #include "gromacs/utility/sysinfo.h"
  
+
  #ifdef GMX_GPU
  const gmx_bool bGPUBinary = TRUE;
  #else
@@ -151,7 +154,7 @@ static void print_gpu_detection_stats(FILE                 *fplog,
      }
  }
  
-/*! \brief Helper function for writing comma-separated GPU IDs.
+/*! \brief Helper function for writing a string of GPU IDs.
   *
   * \param[in] ids  A container of integer GPU IDs
   * \return         A comma-separated string of GPU IDs */
@@ -197,6 +200,21 @@ makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
      }
  
      std::string output;
+    if (!gpu_opt->bUserSet)
+    {
+        // gpu_opt->cuda_dev_compatible is only populated during auto-selection
+        std::string gpuIdsString =
+            makeGpuIdsString(gmx::ConstArrayRef<int>(gpu_opt->cuda_dev_compatible,
+                                                     gpu_opt->cuda_dev_compatible +
+                                                     gpu_opt->ncuda_dev_compatible));
+        bool bPluralGpus = gpu_opt->ncuda_dev_compatible > 1;
+        output += gmx::formatString("%d compatible GPU%s %s present, with ID%s %s\n",
+                                    gpu_opt->ncuda_dev_compatible,
+                                    bPluralGpus ? "s" : "",
+                                    bPluralGpus ? "are" : "is",
+                                    bPluralGpus ? "s" : "",
+                                    gpuIdsString.c_str());
+    }
  
      {
          std::vector<int> gpuIdsInUse;
@@ -812,17 +830,7 @@ void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
      else
      {
          pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
-
-        if (gpu_opt->ncuda_dev_use > cr->nrank_pp_intranode)
-        {
-            /* We picked more GPUs than we can use: limit the number.
-             * We print detailed messages about this later in
-             * gmx_check_hw_runconf_consistency.
-             */
-            limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode);
-        }
-
-        gpu_opt->bUserSet = FALSE;
+        limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode);
      }
  
      /* If the user asked for a GPU, check whether we have a GPU */
@@ -832,30 +840,26 @@ void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
      }
  }
  
-static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count)
+/* If we detected more compatible GPUs than we can use, limit the
+ * number. We print detailed messages about this later in
+ * gmx_check_hw_runconf_consistency.
+ */
+static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int maxNumberToUse)
  {
-    int ndev_use;
-
-    assert(gpu_opt);
+    GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
+    GMX_RELEASE_ASSERT(maxNumberToUse >= 1,
+                       gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
+                                         maxNumberToUse, gpu_opt->ncuda_dev_compatible).c_str());
  
-    ndev_use = gpu_opt->ncuda_dev_use;
-
-    if (count > ndev_use)
+    /* Don't increase the number of GPUs used beyond (e.g.) the number
+       of PP ranks */
+    gpu_opt->ncuda_dev_use = std::min(gpu_opt->ncuda_dev_compatible, maxNumberToUse);
+    snew(gpu_opt->cuda_dev_use, gpu_opt->ncuda_dev_use);
+    for (int i = 0; i != gpu_opt->ncuda_dev_use; ++i)
      {
-        /* won't increase the # of GPUs */
-        return;
+        /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
+        gpu_opt->cuda_dev_use[i] = gpu_opt->cuda_dev_compatible[i];
      }
-
-    if (count < 1)
-    {
-        char sbuf[STRLEN];
-        sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
-                ndev_use, count);
-        gmx_incons(sbuf);
-    }
-
-    /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
-    gpu_opt->ncuda_dev_use = count;
  }
  
  void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
diff --git a/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu b/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu

index f8b741923eb6827ed853639308f7e4e29bee18c1..1c13bc12023bfd3a8becd39463dec8e516fb1d85 100644 (file)
--- a/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
@@ -420,9 +420,9 @@ void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
          }
      }
  
-    gpu_opt->ncuda_dev_use = ncompat;
-    snew(gpu_opt->cuda_dev_use, ncompat);
-    memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
+    gpu_opt->ncuda_dev_compatible = ncompat;
+    snew(gpu_opt->cuda_dev_compatible, ncompat);
+    memcpy(gpu_opt->cuda_dev_compatible, compat, ncompat*sizeof(*compat));
      sfree(compat);
  }
  
diff --git a/src/gromacs/legacyheaders/gmx_detect_hardware.h b/src/gromacs/legacyheaders/gmx_detect_hardware.h

index b426ba4acbd188d4c64e5832f1352fb38e9a521c..4f74e5e061289431ec43850cde9202b19419bc96 100644 (file)
--- a/src/gromacs/legacyheaders/gmx_detect_hardware.h
+++ b/src/gromacs/legacyheaders/gmx_detect_hardware.h
@@ -48,7 +48,8 @@ extern "C" {
  /* the init and consistency functions depend on commrec that may not be
     consistent in cuda because MPI types don't exist there.  */
  #ifndef __CUDACC__
-/* return a pointer to a global hwinfo structure. */
+/* Construct the global hwinfo structure and return a pointer to
+   it. Caller is responsible for freeing this pointer. */
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
                                     gmx_bool bDetectGPUs);
  
diff --git a/src/gromacs/legacyheaders/types/hw_info.h b/src/gromacs/legacyheaders/types/hw_info.h

index 13715add38aea0a6a84b01f3bcbcfe22e8dac2f5..0a8bb4913ff525542e735e4bf7884af22091bc28 100644 (file)
--- a/src/gromacs/legacyheaders/types/hw_info.h
+++ b/src/gromacs/legacyheaders/types/hw_info.h
@@ -98,11 +98,13 @@ enum {
  /* GPU device selection information -- for now with only CUDA devices */
  typedef struct
  {
-    char     *gpu_id;        /* GPU id's to use, each specified as chars */
-    gmx_bool  bUserSet;      /* true if the GPUs in cuda_dev_use are manually provided by the user */
+    char     *gpu_id;               /* GPU id's to use, each specified as chars */
+    gmx_bool  bUserSet;             /* true if the GPUs in cuda_dev_use are manually provided by the user */
  
-    int       ncuda_dev_use; /* number of device (IDs) selected to be used */
-    int      *cuda_dev_use;  /* device index list providing GPU to PP rank mapping, GPUs can be listed multiple times when ranks share them */
+    int       ncuda_dev_compatible; /* number of compatible GPU devices that could be used */
+    int      *cuda_dev_compatible;  /* array of compatible GPU device IDs, from which automatic selection occurs */
+    int       ncuda_dev_use;        /* number of GPU devices selected to be used, either by the user or automatically */
+    int      *cuda_dev_use;         /* array mapping from PP rank index to GPU device ID; GPU IDs can be listed multiple times when ranks share them */
  } gmx_gpu_opt_t;
  
  /* Threading and GPU options, can be set automatically or by the user */
author	Mark Abraham <mark.j.abraham@gmail.com>
	Sat, 17 May 2014 13:35:56 +0000 (15:35 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Fri, 5 Dec 2014 20:02:22 +0000 (21:02 +0100)
src/gromacs/gmxlib/gmx_detect_hardware.cpp		patch \| blob \| history
src/gromacs/gmxlib/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gromacs/legacyheaders/gmx_detect_hardware.h		patch \| blob \| history
src/gromacs/legacyheaders/types/hw_info.h		patch \| blob \| history