Moved additional gmxlib sources to C++

[alexxy/gromacs.git] / src / gromacs / gmxlib / gmx_detect_hardware.cpp
diff --git a/src/gromacs/gmxlib/gmx_detect_hardware.cpp b/src/gromacs/gmxlib/gmx_detect_hardware.cpp

index ff2e416d785e2725b83b0b378b6ebbab187b10e0..02a543dffdc93a903b64b86b7c7312d6c912f908 100644 (file)
--- a/src/gromacs/gmxlib/gmx_detect_hardware.cpp
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.cpp
@@ -38,11 +38,11 @@
  
  #include "config.h"
  
-#include <assert.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
+#include <cerrno>
+#include <cstdlib>
+#include <cstring>
  
+#include <algorithm>
  #include <string>
  #include <vector>
  
@@ -77,10 +77,40 @@
  
  
  #ifdef GMX_GPU
-const gmx_bool bGPUBinary = TRUE;
-#else
-const gmx_bool bGPUBinary = FALSE;
-#endif
+
+static const bool  bGPUBinary = TRUE;
+
+#  ifdef GMX_USE_OPENCL
+
+static const char *gpu_implementation       = "OpenCL";
+/* Our current OpenCL implementation only supports using exactly one
+ * GPU per PP rank, so sharing is impossible */
+static const bool bGpuSharingSupported      = false;
+/* Our current OpenCL implementation is not known to handle
+ * concurrency correctly (at context creation, JIT compilation, or JIT
+ * cache-management stages). OpenCL runtimes need not support it
+ * either; library MPI segfaults when creating OpenCL contexts;
+ * thread-MPI seems to work but is not yet known to be safe. */
+static const bool bMultiGpuPerNodeSupported = false;
+
+#  else /* GMX_USE_OPENCL */
+
+// Our CUDA implementation supports everything
+static const char *gpu_implementation        = "CUDA";
+static const bool  bGpuSharingSupported      = true;
+static const bool  bMultiGpuPerNodeSupported = true;
+
+#  endif /* GMX_USE_OPENCL */
+
+#else    /* GMX_GPU */
+
+// Not compiled with GPU support
+static const bool  bGPUBinary                = false;
+static const char *gpu_implementation        = "non-GPU";
+static const bool  bGpuSharingSupported      = false;
+static const bool  bMultiGpuPerNodeSupported = false;
+
+#endif /* GMX_GPU */
  
  /* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
  const char * const gpu_detect_res_str[egpuNR] =
@@ -105,6 +135,16 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank);
  static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
                                      const gmx_gpu_opt_t  *gpu_opt);
  
+gmx_bool gmx_multiple_gpu_per_node_supported()
+{
+    return bMultiGpuPerNodeSupported;
+}
+
+gmx_bool gmx_gpu_sharing_supported()
+{
+    return bGpuSharingSupported;
+}
+
  static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
  {
      int      i, ndev;
@@ -142,11 +182,11 @@ static void print_gpu_detection_stats(FILE                 *fplog,
  
  #if defined GMX_MPI && !defined GMX_THREAD_MPI
      /* We only print the detection on one, of possibly multiple, nodes */
-    strncpy(onhost, " on host ", 10);
+    std::strncpy(onhost, " on host ", 10);
      gmx_gethostname(onhost + 9, HOSTNAMELEN);
  #else
      /* We detect all relevant GPUs */
-    strncpy(onhost, "", 1);
+    std::strncpy(onhost, "", 1);
  #endif
  
      if (ngpu > 0)
@@ -216,10 +256,10 @@ makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
      }
  
      {
-        std::vector<int>   gpuIdsInUse;
+        std::vector<int> gpuIdsInUse;
          for (int i = 0; i < ngpu_use; i++)
          {
-            gpuIdsInUse.push_back(get_cuda_gpu_device_id(gpu_info, gpu_opt, i));
+            gpuIdsInUse.push_back(get_gpu_device_id(gpu_info, gpu_opt, i));
          }
          std::string gpuIdsString =
              formatAndJoin(gpuIdsInUse, ",", gmx::StringFormatter("%d"));
@@ -287,8 +327,8 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
      char     th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
      gmx_bool btMPI, bMPI, bNthreadsAuto, bEmulateGPU;
  
-    assert(hwinfo);
-    assert(cr);
+    GMX_RELEASE_ASSERT(hwinfo, "hwinfo must be a non-NULL pointer");
+    GMX_RELEASE_ASSERT(cr, "cr must be a non-NULL pointer");
  
      /* Below we only do consistency checks for PP and GPUs,
       * this is irrelevant for PME only nodes, so in that case we return
@@ -421,7 +461,8 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
          }
          else
          {
-            if (ngpu_comp > npppn)
+            /* TODO Should we have a gpu_opt->n_dev_supported field? */
+            if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
              {
                  md_print_warn(cr, fplog,
                                "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
@@ -441,13 +482,26 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
                   */
                  if (cr->rank_pp_intranode == 0)
                  {
+                    std::string reasonForLimit;
+                    if (ngpu_comp > 1 &&
+                        ngpu_use == 1 &&
+                        !gmx_multiple_gpu_per_node_supported())
+                    {
+                        reasonForLimit  = "can be used by ";
+                        reasonForLimit += gpu_implementation;
+                        reasonForLimit += " in GROMACS";
+                    }
+                    else
+                    {
+                        reasonForLimit = "was detected";
+                    }
                      gmx_fatal(FARGS,
                                "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                              "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+                              "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
                                th_or_proc, btMPI ? "s" : "es", pernode,
                                ShortProgram(), npppn, th_or_proc,
                                th_or_proc_plural, pernode,
-                              ngpu_use, gpu_use_plural);
+                              ngpu_use, gpu_use_plural, reasonForLimit.c_str());
                  }
              }
          }
@@ -518,8 +572,8 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
      int  i, uniq_count, ngpu;
      int *uniq_ids;
  
-    assert(gpu_info);
-    assert(gpu_opt);
+    GMX_RELEASE_ASSERT(gpu_info, "gpu_info must be a non-NULL pointer");
+    GMX_RELEASE_ASSERT(gpu_opt, "gpu_opt must be a non-NULL pointer");
  
      ngpu = gpu_info->n_dev;
  
@@ -531,7 +585,10 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
       * to 1 indicates that the respective GPU was selected to be used. */
      for (i = 0; i < gpu_opt->n_dev_use; i++)
      {
-        uniq_ids[get_cuda_gpu_device_id(gpu_info, gpu_opt, i)] = 1;
+        int device_id;
+
+        device_id           = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
+        uniq_ids[device_id] = 1;
      }
      /* Count the devices used. */
      for (i = 0; i < ngpu; i++)
@@ -650,7 +707,7 @@ static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
      MPI_Comm_rank(physicalnode_comm, &rank_local);
  #else
      /* Here there should be only one process, check this */
-    assert(cr->nnodes == 1 && cr->sim_nodeid == 0);
+    GMX_RELEASE_ASSERT(cr->nnodes == 1 && cr->sim_nodeid == 0, "Only a single (master) process should execute here");
  
      rank_local = 0;
  #endif
@@ -1050,6 +1107,27 @@ void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
      check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo);
  }
  
+//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
+static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
+{
+    /* Loop over IDs in the string */
+    for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
+    {
+        /* Look for the ID in location i in the following part of the
+           string */
+        for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
+        {
+            if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
+            {
+                /* Same ID found in locations i and j */
+                return TRUE;
+            }
+        }
+    }
+
+    return FALSE;
+}
+
  void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
  {
      char *env;
@@ -1078,7 +1156,14 @@ void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
          parse_digits_from_plain_string(env,
                                         &gpu_opt->n_dev_use,
                                         &gpu_opt->dev_use);
-
+        if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
+        {
+            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", gpu_implementation);
+        }
+        if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
+        {
+            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", gpu_implementation);
+        }
          if (gpu_opt->n_dev_use == 0)
          {
              gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
@@ -1181,7 +1266,7 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
      {
          if (nrank % gpu_opt->n_dev_compatible == 0)
          {
-            nshare = nrank/gpu_opt->n_dev_compatible;
+            nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
          }
          else
          {
@@ -1202,6 +1287,10 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
  
      /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
      gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
+    if (!gmx_multiple_gpu_per_node_supported())
+    {
+        gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
+    }
      snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
      for (int i = 0; i != gpu_opt->n_dev_use; ++i)
      {