Moved additional gmxlib sources to C++

[alexxy/gromacs.git] / src / gromacs / gmxlib / gmx_detect_hardware.cpp
diff --git a/src/gromacs/gmxlib/gmx_detect_hardware.cpp b/src/gromacs/gmxlib/gmx_detect_hardware.cpp

index 90e04bc1bd17835bc9e7375e81cbffbd661f14c2..02a543dffdc93a903b64b86b7c7312d6c912f908 100644 (file)
--- a/src/gromacs/gmxlib/gmx_detect_hardware.cpp
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.cpp
@@ -38,11 +38,11 @@
  
  #include "config.h"
  
-#include <assert.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
+#include <cerrno>
+#include <cstdlib>
+#include <cstring>
  
+#include <algorithm>
  #include <string>
  #include <vector>
  
@@ -77,10 +77,46 @@
  
  
  #ifdef GMX_GPU
-const gmx_bool bGPUBinary = TRUE;
-#else
-const gmx_bool bGPUBinary = FALSE;
-#endif
+
+static const bool  bGPUBinary = TRUE;
+
+#  ifdef GMX_USE_OPENCL
+
+static const char *gpu_implementation       = "OpenCL";
+/* Our current OpenCL implementation only supports using exactly one
+ * GPU per PP rank, so sharing is impossible */
+static const bool bGpuSharingSupported      = false;
+/* Our current OpenCL implementation is not known to handle
+ * concurrency correctly (at context creation, JIT compilation, or JIT
+ * cache-management stages). OpenCL runtimes need not support it
+ * either; library MPI segfaults when creating OpenCL contexts;
+ * thread-MPI seems to work but is not yet known to be safe. */
+static const bool bMultiGpuPerNodeSupported = false;
+
+#  else /* GMX_USE_OPENCL */
+
+// Our CUDA implementation supports everything
+static const char *gpu_implementation        = "CUDA";
+static const bool  bGpuSharingSupported      = true;
+static const bool  bMultiGpuPerNodeSupported = true;
+
+#  endif /* GMX_USE_OPENCL */
+
+#else    /* GMX_GPU */
+
+// Not compiled with GPU support
+static const bool  bGPUBinary                = false;
+static const char *gpu_implementation        = "non-GPU";
+static const bool  bGpuSharingSupported      = false;
+static const bool  bMultiGpuPerNodeSupported = false;
+
+#endif /* GMX_GPU */
+
+/* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
+const char * const gpu_detect_res_str[egpuNR] =
+{
+    "compatible", "inexistent", "incompatible", "insane"
+};
  
  static const char * invalid_gpuid_hint =
      "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
@@ -92,12 +128,23 @@ static int                 n_hwinfo = 0;
  /* A lock to protect the hwinfo structure */
  static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
  
+#define HOSTNAMELEN 80
  
  /* FW decl. */
  static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank);
  static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
                                      const gmx_gpu_opt_t  *gpu_opt);
  
+gmx_bool gmx_multiple_gpu_per_node_supported()
+{
+    return bMultiGpuPerNodeSupported;
+}
+
+gmx_bool gmx_gpu_sharing_supported()
+{
+    return bGpuSharingSupported;
+}
+
  static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
  {
      int      i, ndev;
@@ -122,7 +169,7 @@ static void print_gpu_detection_stats(FILE                 *fplog,
                                        const gmx_gpu_info_t *gpu_info,
                                        const t_commrec      *cr)
  {
-    char onhost[266], stmp[STRLEN];
+    char onhost[HOSTNAMELEN+10], stmp[STRLEN];
      int  ngpu;
  
      if (!gpu_info->bDetectGPUs)
@@ -135,11 +182,11 @@ static void print_gpu_detection_stats(FILE                 *fplog,
  
  #if defined GMX_MPI && !defined GMX_THREAD_MPI
      /* We only print the detection on one, of possibly multiple, nodes */
-    strncpy(onhost, " on host ", 10);
-    gmx_gethostname(onhost+9, 256);
+    std::strncpy(onhost, " on host ", 10);
+    gmx_gethostname(onhost + 9, HOSTNAMELEN);
  #else
      /* We detect all relevant GPUs */
-    strncpy(onhost, "", 1);
+    std::strncpy(onhost, "", 1);
  #endif
  
      if (ngpu > 0)
@@ -157,18 +204,26 @@ static void print_gpu_detection_stats(FILE                 *fplog,
  /*! \brief Helper function for reporting GPU usage information
   * in the mdrun log file
   *
- * \param[in] gpu_info    Pointer to per-node GPU info struct
- * \param[in] gpu_opt     Pointer to per-node GPU options struct
- * \param[in] numPpRanks  Number of PP ranks per node
- * \return                String to write to the log file
- * \throws                std::bad_alloc if out of memory */
+ * \param[in] gpu_info       Pointer to per-node GPU info struct
+ * \param[in] gpu_opt        Pointer to per-node GPU options struct
+ * \param[in] numPpRanks     Number of PP ranks per node
+ * \param[in] bPrintHostName Print the hostname in the usage information
+ * \return                   String to write to the log file
+ * \throws                   std::bad_alloc if out of memory */
  static std::string
  makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
                     const gmx_gpu_opt_t  *gpu_opt,
-                   size_t                numPpRanks)
+                   size_t                numPpRanks,
+                   bool                  bPrintHostName)
  {
-    int ngpu_use  = gpu_opt->n_dev_use;
-    int ngpu_comp = gpu_info->n_dev_compatible;
+    int  ngpu_use  = gpu_opt->n_dev_use;
+    int  ngpu_comp = gpu_info->n_dev_compatible;
+    char host[HOSTNAMELEN];
+
+    if (bPrintHostName)
+    {
+        gmx_gethostname(host, HOSTNAMELEN);
+    }
  
      /* Issue a note if GPUs are available but not used */
      if (ngpu_comp > 0 && ngpu_use < 1)
@@ -187,6 +242,11 @@ makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
                                                        gpu_opt->n_dev_compatible),
                            ",", gmx::StringFormatter("%d"));
          bool bPluralGpus = gpu_opt->n_dev_compatible > 1;
+
+        if (bPrintHostName)
+        {
+            output += gmx::formatString("On host %s ", host);
+        }
          output += gmx::formatString("%d compatible GPU%s %s present, with ID%s %s\n",
                                      gpu_opt->n_dev_compatible,
                                      bPluralGpus ? "s" : "",
@@ -196,16 +256,20 @@ makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
      }
  
      {
-        std::vector<int>   gpuIdsInUse;
+        std::vector<int> gpuIdsInUse;
          for (int i = 0; i < ngpu_use; i++)
          {
-            gpuIdsInUse.push_back(get_cuda_gpu_device_id(gpu_info, gpu_opt, i));
+            gpuIdsInUse.push_back(get_gpu_device_id(gpu_info, gpu_opt, i));
          }
          std::string gpuIdsString =
              formatAndJoin(gpuIdsInUse, ",", gmx::StringFormatter("%d"));
          int         numGpusInUse = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
          bool        bPluralGpus  = numGpusInUse > 1;
  
+        if (bPrintHostName)
+        {
+            output += gmx::formatString("On host %s ", host);
+        }
          output += gmx::formatString("%d GPU%s %sselected for this run.\n"
                                      "Mapping of GPU ID%s to the %d PP rank%s in this node: %s\n",
                                      numGpusInUse, bPluralGpus ? "s" : "",
@@ -261,10 +325,10 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
  {
      int      npppn;
      char     th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
-    gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
+    gmx_bool btMPI, bMPI, bNthreadsAuto, bEmulateGPU;
  
-    assert(hwinfo);
-    assert(cr);
+    GMX_RELEASE_ASSERT(hwinfo, "hwinfo must be a non-NULL pointer");
+    GMX_RELEASE_ASSERT(cr, "cr must be a non-NULL pointer");
  
      /* Below we only do consistency checks for PP and GPUs,
       * this is irrelevant for PME only nodes, so in that case we return
@@ -292,7 +356,6 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
      /* GPU emulation detection is done later, but we need here as well
       * -- uncool, but there's no elegant workaround */
      bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
-    bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
  
      if (hwinfo->gpu_info.n_dev_compatible > 0)
      {
@@ -301,7 +364,8 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
          {
              gpuUseageReport = makeGpuUsageReport(&hwinfo->gpu_info,
                                                   &hw_opt->gpu_opt,
-                                                 cr->nrank_pp_intranode);
+                                                 cr->nrank_pp_intranode,
+                                                 bMPI && cr->nnodes > 1);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
@@ -362,10 +426,10 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
                     could automatically start. */
                  gmx_fatal(FARGS,
                            "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
-                          "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
+                          "%s requires one PP tread-MPI thread per GPU; use fewer GPUs.",
                            ngpu_use, gpu_use_plural,
                            npppn, th_or_proc_plural,
-                          ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
+                          ShortProgram());
              }
  
              if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp)
@@ -374,12 +438,11 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
                     limited the number GPUs used. */
                  md_print_warn(cr, fplog,
                                "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
-                              "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+                              "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.\n",
                                ngpu_comp, gpu_comp_plural,
                                npppn, th_or_proc_plural,
                                ShortProgram(), npppn,
-                              npppn > 1 ? "s" : "",
-                              bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
+                              npppn > 1 ? "s" : "");
              }
          }
  
@@ -398,7 +461,8 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
          }
          else
          {
-            if (ngpu_comp > npppn)
+            /* TODO Should we have a gpu_opt->n_dev_supported field? */
+            if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
              {
                  md_print_warn(cr, fplog,
                                "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
@@ -418,13 +482,26 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
                   */
                  if (cr->rank_pp_intranode == 0)
                  {
+                    std::string reasonForLimit;
+                    if (ngpu_comp > 1 &&
+                        ngpu_use == 1 &&
+                        !gmx_multiple_gpu_per_node_supported())
+                    {
+                        reasonForLimit  = "can be used by ";
+                        reasonForLimit += gpu_implementation;
+                        reasonForLimit += " in GROMACS";
+                    }
+                    else
+                    {
+                        reasonForLimit = "was detected";
+                    }
                      gmx_fatal(FARGS,
                                "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                              "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+                              "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
                                th_or_proc, btMPI ? "s" : "es", pernode,
                                ShortProgram(), npppn, th_or_proc,
                                th_or_proc_plural, pernode,
-                              ngpu_use, gpu_use_plural);
+                              ngpu_use, gpu_use_plural, reasonForLimit.c_str());
                  }
              }
          }
@@ -495,8 +572,8 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
      int  i, uniq_count, ngpu;
      int *uniq_ids;
  
-    assert(gpu_info);
-    assert(gpu_opt);
+    GMX_RELEASE_ASSERT(gpu_info, "gpu_info must be a non-NULL pointer");
+    GMX_RELEASE_ASSERT(gpu_opt, "gpu_opt must be a non-NULL pointer");
  
      ngpu = gpu_info->n_dev;
  
@@ -508,7 +585,10 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
       * to 1 indicates that the respective GPU was selected to be used. */
      for (i = 0; i < gpu_opt->n_dev_use; i++)
      {
-        uniq_ids[get_cuda_gpu_device_id(gpu_info, gpu_opt, i)] = 1;
+        int device_id;
+
+        device_id           = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
+        uniq_ids[device_id] = 1;
      }
      /* Count the devices used. */
      for (i = 0; i < ngpu; i++)
@@ -627,7 +707,7 @@ static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
      MPI_Comm_rank(physicalnode_comm, &rank_local);
  #else
      /* Here there should be only one process, check this */
-    assert(cr->nnodes == 1 && cr->sim_nodeid == 0);
+    GMX_RELEASE_ASSERT(cr->nnodes == 1 && cr->sim_nodeid == 0, "Only a single (master) process should execute here");
  
      rank_local = 0;
  #endif
@@ -941,10 +1021,10 @@ static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
      }
  
  #ifdef GMX_LIB_MPI
-    char host[255];
+    char host[HOSTNAMELEN];
      int  rank;
  
-    gmx_gethostname(host, 255);
+    gmx_gethostname(host, HOSTNAMELEN);
      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  
      s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
@@ -1027,6 +1107,27 @@ void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
      check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo);
  }
  
+//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
+static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
+{
+    /* Loop over IDs in the string */
+    for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
+    {
+        /* Look for the ID in location i in the following part of the
+           string */
+        for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
+        {
+            if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
+            {
+                /* Same ID found in locations i and j */
+                return TRUE;
+            }
+        }
+    }
+
+    return FALSE;
+}
+
  void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
  {
      char *env;
@@ -1055,7 +1156,14 @@ void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
          parse_digits_from_plain_string(env,
                                         &gpu_opt->n_dev_use,
                                         &gpu_opt->dev_use);
-
+        if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
+        {
+            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", gpu_implementation);
+        }
+        if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
+        {
+            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", gpu_implementation);
+        }
          if (gpu_opt->n_dev_use == 0)
          {
              gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
@@ -1119,7 +1227,7 @@ void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
  
          sfree(checkres);
      }
-    else
+    else if (getenv("GMX_EMULATE_GPU") == NULL)
      {
          pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
          set_gpu_ids(gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
@@ -1145,9 +1253,9 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
  
      if (gpu_opt->n_dev_compatible == 0)
      {
-        char host[255];
+        char host[HOSTNAMELEN];
  
-        gmx_gethostname(host, 255);
+        gmx_gethostname(host, HOSTNAMELEN);
          gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
      }
  
@@ -1158,7 +1266,7 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
      {
          if (nrank % gpu_opt->n_dev_compatible == 0)
          {
-            nshare = nrank/gpu_opt->n_dev_compatible;
+            nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
          }
          else
          {
@@ -1179,6 +1287,10 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
  
      /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
      gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
+    if (!gmx_multiple_gpu_per_node_supported())
+    {
+        gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
+    }
      snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
      for (int i = 0; i != gpu_opt->n_dev_use; ++i)
      {