Clarified GPU selection output and mdrun help

author Szilard Pall <pall.szilard@gmail.com>

Fri, 8 Nov 2013 17:05:25 +0000 (18:05 +0100)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Wed, 13 Nov 2013 11:36:21 +0000 (12:36 +0100)
author Szilard Pall <pall.szilard@gmail.com>
Fri, 8 Nov 2013 17:05:25 +0000 (18:05 +0100)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Wed, 13 Nov 2013 11:36:21 +0000 (12:36 +0100)
diff --git a/include/types/hw_info.h b/include/types/hw_info.h

index db9ce3b7fcf88fdf360b73bfb20602750ccde3bd..90081fb9ca837d3fb8d0cb87bddceb0a291b2def 100644 (file)
--- a/include/types/hw_info.h
+++ b/include/types/hw_info.h
@@ -101,8 +101,8 @@ typedef struct
      char     *gpu_id;        /* GPU id's to use, each specified as chars */
      gmx_bool  bUserSet;      /* true if the GPUs in cuda_dev_use are manually provided by the user */
  
-    int       ncuda_dev_use; /* number of devices selected to be used */
-    int      *cuda_dev_use;  /* index of the devices selected to be used */
+    int       ncuda_dev_use; /* number of device (IDs) selected to be used */
+    int      *cuda_dev_use;  /* device index list providing GPU to PP rank mapping, GPUs can be listed multiple times when ranks share them */
  } gmx_gpu_opt_t;
  
  /* Threading and GPU options, can be set automatically or by the user */
diff --git a/src/gmxlib/gmx_detect_hardware.c b/src/gmxlib/gmx_detect_hardware.c

index e918a78c213e7a5b418a33f16d04637f2d088860..d1ebf1584759d9c5de0925131fad256e179389f1 100644 (file)
--- a/src/gmxlib/gmx_detect_hardware.c
+++ b/src/gmxlib/gmx_detect_hardware.c
@@ -77,6 +77,8 @@ static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
  
  /* FW decl. */
  static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count);
+static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
+                                    const gmx_gpu_opt_t  *gpu_opt);
  
  static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
  {
@@ -155,9 +157,18 @@ static void print_gpu_use_stats(FILE                 *fplog,
      }
      else
      {
-        sprintf(sbuf, "%d GPU%s %sselected for this run: ",
-                ngpu_use, (ngpu_use > 1) ? "s" : "",
-                gpu_opt->bUserSet ? "user-" : "auto-");
+        int ngpu_use_uniq;
+
+        ngpu_use_uniq = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
+
+        sprintf(sbuf, "%d GPU%s %sselected for this run.\n"
+                "Mapping of GPU%s to the %d PP rank%s in this node: ",
+                ngpu_use_uniq, (ngpu_use_uniq > 1) ? "s" : "",
+                gpu_opt->bUserSet ? "user-" : "auto-",
+                (ngpu_use > 1) ? "s" : "",
+                cr->nrank_pp_intranode,
+                (cr->nrank_pp_intranode > 1) ? "s" : "");
+
          for (i = 0; i < ngpu_use; i++)
          {
              sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i));
@@ -376,9 +387,8 @@ void gmx_check_hw_runconf_consistency(FILE *fplog,
  
              if (same_count > 0)
              {
-                md_print_warn(cr, fplog,
-                              "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
-                              "      multiple %s%s; this can cause performance loss.\n",
+                md_print_info(cr, fplog,
+                              "NOTE: You assigned %s to multiple %s%s.\n",
                                same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
              }
          }
@@ -395,6 +405,13 @@ void gmx_check_hw_runconf_consistency(FILE *fplog,
  
  }
  
+/* Return 0 if none of the GPU (per node) are shared among PP ranks.
+ *
+ * Sharing GPUs among multiple PP ranks is possible when the user passes
+ * GPU IDs. Here we check for sharing and return a non-zero value when
+ * this is detected. Note that the return value represents the number of
+ * PP rank pairs that share a device.
+ */
  int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
  {
      int      same_count    = 0;
@@ -417,6 +434,43 @@ int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
      return same_count;
  }
  
+/* Count and return the number of unique GPUs (per node) selected.
+ *
+ * As sharing GPUs among multiple PP ranks is possible when the user passes
+ * GPU IDs, the number of GPUs user (per node) can be different from the
+ * number of GPU IDs selected.
+ */
+static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
+                                    const gmx_gpu_opt_t  *gpu_opt)
+{
+    int  i, uniq_count, ngpu;
+    int *uniq_ids;
+
+    assert(gpu_info);
+    assert(gpu_opt);
+
+    ngpu        = gpu_info->ncuda_dev;
+    uniq_count  = 0;
+
+    snew(uniq_ids, ngpu);
+
+    /* Each element in uniq_ids will be set to 0 or 1. The n-th element set
+        * to 1 indicates that the respective GPU was selected to be used. */
+    for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
+    {
+        uniq_ids[get_gpu_device_id(gpu_info, gpu_opt, i)] = 1;
+    }
+    /* Count the devices used. */
+    for (i = 0; i < ngpu; i++)
+    {
+        uniq_count += uniq_ids[i];
+    }
+
+    sfree(uniq_ids);
+
+    return uniq_count;
+}
+
  
  /* Return the number of hardware threads supported by the current CPU.
   * We assume that this is equal with the number of CPUs reported to be
diff --git a/src/kernel/mdrun.c b/src/kernel/mdrun.c

index 29cb7e3e4326d9430744aaf3f9458687b59844ae..2ccc6f67469c0aab5263ec48636818785073f661 100644 (file)
--- a/src/kernel/mdrun.c
+++ b/src/kernel/mdrun.c
@@ -121,15 +121,20 @@ int cmain(int argc, char *argv[])
          "[PAR]",
          "With GPUs (only supported with the Verlet cut-off scheme), the number",
          "of GPUs should match the number of MPI processes or MPI threads,",
-        "excluding PME-only processes/threads. With thread-MPI the number",
+        "excluding PME-only processes/threads. With thread-MPI, unless set on the command line, the number",
          "of MPI threads will automatically be set to the number of GPUs detected.",
-        "When you want to use a subset of the available GPUs, you can use",
-        "the [TT]-gpu_id[tt] option, where GPU id's are passed as a string,",
-        "e.g. 02 for using GPUs 0 and 2. When you want different GPU id's",
-        "on different nodes of a compute cluster, use the GMX_GPU_ID environment",
-        "variable instead. The format for GMX_GPU_ID is identical to ",
-        "[TT]-gpu_id[tt], but an environment variable can have different values",
-        "on different nodes of a cluster.",
+        "To use a subset of the available GPUs, or to manually provide a mapping of",
+        "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is",
+        "a string of digits (without delimiter) representing device id-s of the GPUs to be used.",
+        "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node",
+        "respectively. To select different sets of GPU-s",
+        "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment",
+        "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ",
+        "[TT]-gpu_id[tt], with the difference that an environment variable can have",
+        "different values on different compute nodes. Multiple MPI ranks on each node",
+        "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)",
+        "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.",
+        "This works within a single simulation, or a multi-simulation, with any form of MPI.",
          "[PAR]",
          "When using PME with separate PME nodes or with a GPU, the two major",
          "compute tasks, the non-bonded force calculation and the PME calculation",
@@ -483,7 +488,7 @@ int cmain(int argc, char *argv[])
          { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
            "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
          { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
-          "List of GPU id's to use" },
+          "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
          { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
            "Check for all bonded interactions with DD" },
          { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
author	Szilard Pall <pall.szilard@gmail.com>
	Fri, 8 Nov 2013 17:05:25 +0000 (18:05 +0100)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Wed, 13 Nov 2013 11:36:21 +0000 (12:36 +0100)
include/types/hw_info.h		patch \| blob \| history
src/gmxlib/gmx_detect_hardware.c		patch \| blob \| history
src/kernel/mdrun.c		patch \| blob \| history