GPU detection is done once per physical node

author Berk Hess <hess@kth.se>

Thu, 17 Oct 2013 10:11:55 +0000 (12:11 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Tue, 5 Nov 2013 18:21:41 +0000 (19:21 +0100)
author Berk Hess <hess@kth.se>
Thu, 17 Oct 2013 10:11:55 +0000 (12:11 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Tue, 5 Nov 2013 18:21:41 +0000 (19:21 +0100)
diff --git a/include/gpu_utils.h b/include/gpu_utils.h

index 68f8f7a2c766e6e330a14f74f47f7b3f515d420b..cdb3dc14d307848a67e483b9c56fcdecada49b92 100644 (file)
--- a/include/gpu_utils.h
+++ b/include/gpu_utils.h
@@ -100,6 +100,9 @@ int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
  FUNC_QUALIFIER
  void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index) FUNC_TERM_VOID
  
+FUNC_QUALIFIER
+size_t sizeof_cuda_dev_info(void) FUNC_TERM_INT
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/include/network.h b/include/network.h

index 5b00c546eae86cdb020d33d4a12a685c224fe330..1cc018fdfd5d20b3430330285cb97bb5b59ebeb1 100644 (file)
--- a/include/network.h
+++ b/include/network.h
@@ -65,6 +65,12 @@ int gmx_node_num(void);
  int gmx_node_rank(void);
  /* return the rank of the node */
  
+GMX_LIBGMX_EXPORT
+int gmx_physicalnode_id_hash(void);
+/* Return a non-negative hash that is, hopefully, unique for each physical node.
+ * This hash is useful for determining hardware locality.
+ */
+
  GMX_LIBGMX_EXPORT
  int gmx_hostname_num(void);
  /* Ostensibly, returns a integer characteristic of and unique to each
diff --git a/include/string2.h b/include/string2.h

index 807eff30760ecb92270cb94af8af3673eff9ae9d..4e94036938dce7b6de22b9cc1fa3d90a1c3edf7b 100644 (file)
--- a/include/string2.h
+++ b/include/string2.h
@@ -136,14 +136,19 @@ extern const unsigned int
      gmx_string_hash_init;
  
  /* Return a hash of the string according to Dan J. Bernsteins algorithm.
- * This routine only uses characters for which isalnum(c) is true,
- * and all characters are converted to upper case.
   * On the first invocation for a new string, use the constant
   * gmx_string_hash_init for the second argument. If you want to create a hash
   * corresponding to several concatenated strings, provide the returned hash
   * value as hash_init for the second string, etc.
   */
  unsigned int
+gmx_string_fullhash_func(const char *s, unsigned int hash_init);
+
+/* Identical to gmx_string_fullhash_func, except that
+ * this routine only uses characters for which isalnum(c) is true,
+ * and all characters are converted to upper case.
+ */
+unsigned int
  gmx_string_hash_func(const char *s, unsigned int hash_init);
  
  /** Pattern matcing with wildcards. */
diff --git a/src/gmxlib/gmx_detect_hardware.c b/src/gmxlib/gmx_detect_hardware.c

index c407f40b5d34fc421680f34571ac6aaf9ad67dab..2a3156eeaad73e0b45f850ce9fdc10c659a36ad6 100644 (file)
--- a/src/gmxlib/gmx_detect_hardware.c
+++ b/src/gmxlib/gmx_detect_hardware.c
@@ -477,12 +477,88 @@ static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
      return ret;
  }
  
+static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr,
+                            gmx_gpu_info_t *gpu_info)
+{
+#ifdef GMX_LIB_MPI
+    int              rank_world;
+    MPI_Comm         physicalnode_comm;
+#endif
+    int              rank_local;
+
+    /* Under certain circumstances MPI ranks on the same physical node
+     * can not simultaneously access the same GPU(s). Therefore we run
+     * the detection only on one MPI rank per node and broadcast the info.
+     * Note that with thread-MPI only a single thread runs this code.
+     *
+     * TODO: We should also do CPU hardware detection only once on each
+     * physical node and broadcast it, instead of do it on every MPI rank.
+     */
+#ifdef GMX_LIB_MPI
+    /* A split of MPI_COMM_WORLD over physical nodes is only required here,
+     * so we create and destroy it locally.
+     */
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
+    MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(),
+                   rank_world, &physicalnode_comm);
+    MPI_Comm_rank(physicalnode_comm, &rank_local);
+#else
+    /* Here there should be only one process, check this */
+    assert(cr->nnodes == 1 && cr->sim_nodeid == 0);
+
+    rank_local = 0;
+#endif
+
+    if (rank_local == 0)
+    {
+        char detection_error[STRLEN], sbuf[STRLEN];
+
+        if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
+        {
+            if (detection_error != NULL && detection_error[0] != '\0')
+            {
+                sprintf(sbuf, ":\n      %s\n", detection_error);
+            }
+            else
+            {
+                sprintf(sbuf, ".");
+            }
+            md_print_warn(cr, fplog,
+                          "NOTE: Error occurred during GPU detection%s"
+                          "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
+                          sbuf);
+        }
+    }
+
+#ifdef GMX_LIB_MPI
+    /* Broadcast the GPU info to the other ranks within this node */
+    MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev, 1, MPI_INT, 0, physicalnode_comm);
+
+    if (hwinfo_g->gpu_info.ncuda_dev > 0)
+    {
+        int cuda_dev_size;
+
+        cuda_dev_size = hwinfo_g->gpu_info.ncuda_dev*sizeof_cuda_dev_info();
+
+        if (rank_local > 0)
+        {
+            hwinfo_g->gpu_info.cuda_dev =
+                (cuda_dev_info_ptr_t)malloc(cuda_dev_size);
+        }
+        MPI_Bcast(hwinfo_g->gpu_info.cuda_dev, cuda_dev_size, MPI_BYTE,
+                  0, physicalnode_comm);
+        MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev_compatible, 1, MPI_INT,
+                  0, physicalnode_comm);
+    }
+
+    MPI_Comm_free(&physicalnode_comm);
+#endif
+}
+
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
                                     gmx_bool bDetectGPUs)
  {
-    char             sbuf[STRLEN];
      gmx_hw_info_t   *hw;
-    gmx_gpu_info_t   gpuinfo_auto, gpuinfo_user;
      int              ret;
  
      /* make sure no one else is doing the same thing */
@@ -520,23 +596,7 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
               getenv("GMX_DISABLE_GPU_DETECTION") == NULL);
          if (hwinfo_g->gpu_info.bDetectGPUs)
          {
-            char detection_error[STRLEN];
-
-            if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
-            {
-                if (detection_error != NULL && detection_error[0] != '\0')
-                {
-                    sprintf(sbuf, ":\n      %s\n", detection_error);
-                }
-                else
-                {
-                    sprintf(sbuf, ".");
-                }
-                md_print_warn(cr, fplog,
-                              "NOTE: Error occurred during GPU detection%s"
-                              "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
-                              sbuf);
-            }
+            gmx_detect_gpus(fplog, cr, &hwinfo_g->gpu_info);
          }
      }
      /* increase the reference counter */
diff --git a/src/gmxlib/gpu_utils/gpu_utils.cu b/src/gmxlib/gpu_utils/gpu_utils.cu

index 8640ed3bc80e74e69c1415bd3c4ba851abccafc5..ee3d5e10d6a99e6d9fa700edc67cd0ac8d05b030 100644 (file)
--- a/src/gmxlib/gpu_utils/gpu_utils.cu
+++ b/src/gmxlib/gpu_utils/gpu_utils.cu
@@ -882,3 +882,14 @@ int get_current_gpu_device_id(void)
  
      return gpuid;
  }
+
+/*! \brief Returns the size of the cuda_dev_info struct.
+ *
+ * The size of cuda_dev_info can be used for allocation and communication.
+ *
+ * \returns                 size in bytes of cuda_dev_info
+ */
+size_t sizeof_cuda_dev_info(void)
+{
+    return sizeof(cuda_dev_info);
+}
diff --git a/src/gmxlib/network.c b/src/gmxlib/network.c

index ec1c6bac828204194457455a6e75eb2fb18de4c4..30d4a248b9993b94df425883ffcc83cc3878742b 100644 (file)
--- a/src/gmxlib/network.c
+++ b/src/gmxlib/network.c
@@ -48,6 +48,7 @@
  #include "statutil.h"
  #include <ctype.h>
  #include "macros.h"
+#include "string2.h"
  
  #ifdef GMX_LIB_MPI
  #include <mpi.h>
@@ -264,6 +265,41 @@ int gmx_node_rank(void)
  #include <spi/include/kernel/location.h>
  #endif
  
+int gmx_physicalnode_id_hash(void)
+{
+    int hash_int;
+
+#ifndef GMX_LIB_MPI
+    /* We have a single physical node */
+    hash_int = 0;
+#else
+    int  resultlen;
+    char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
+
+    /* This procedure can only differentiate nodes with different names.
+     * Architectures where different physical nodes have identical names,
+     * such as IBM Blue Gene, should use an architecture specific solution.
+     */
+    MPI_Get_processor_name(mpi_hostname, &resultlen);
+
+    /* The string hash function returns an unsigned int. We cast to an int.
+     * Negative numbers are converted to positive by setting the sign bit to 0.
+     * This makes the hash one bit smaller.
+     * A 63-bit hash (with 64-bit int) should be enough for unique node hashes,
+     * even on a million node machine. 31 bits might not be enough though!
+     */
+    hash_int =
+        (int)gmx_string_fullhash_func(mpi_hostname, gmx_string_hash_init);
+    if (hash_int < 0)
+    {
+        hash_int -= INT_MIN;
+    }
+#endif
+
+    return hash_int;
+}
+
+/* TODO: this function should be fully replaced by gmx_physicalnode_id_hash */
  int gmx_hostname_num()
  {
  #ifndef GMX_MPI
diff --git a/src/gmxlib/string2.c b/src/gmxlib/string2.c

index eadbf21a591804e985e429c36f6aae463e883821..71a2618cc53dd0e2cdabe06af8e536f981393d03 100644 (file)
--- a/src/gmxlib/string2.c
+++ b/src/gmxlib/string2.c
@@ -390,6 +390,18 @@ const unsigned int
      gmx_string_hash_init = 5381;
  
  
+unsigned int
+gmx_string_fullhash_func(const char *s, unsigned int hash_init)
+{
+    int c;
+
+    while ((c = (*s++)) != '\0')
+    {
+        hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
+    }
+    return hash_init;
+}
+
  unsigned int
  gmx_string_hash_func(const char *s, unsigned int hash_init)
  {
author	Berk Hess <hess@kth.se>
	Thu, 17 Oct 2013 10:11:55 +0000 (12:11 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Tue, 5 Nov 2013 18:21:41 +0000 (19:21 +0100)
include/gpu_utils.h		patch \| blob \| history
include/network.h		patch \| blob \| history
include/string2.h		patch \| blob \| history
src/gmxlib/gmx_detect_hardware.c		patch \| blob \| history
src/gmxlib/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gmxlib/network.c		patch \| blob \| history
src/gmxlib/string2.c		patch \| blob \| history