char *gpu_id; /* GPU id's to use, each specified as chars */
gmx_bool bUserSet; /* true if the GPUs in cuda_dev_use are manually provided by the user */
- int ncuda_dev_use; /* number of devices selected to be used */
- int *cuda_dev_use; /* index of the devices selected to be used */
+ int ncuda_dev_use; /* number of device (IDs) selected to be used */
+ int *cuda_dev_use; /* device index list providing GPU to PP rank mapping, GPUs can be listed multiple times when ranks share them */
} gmx_gpu_opt_t;
/* Threading and GPU options, can be set automatically or by the user */
/* FW decl. */
static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count);
+static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
+ const gmx_gpu_opt_t *gpu_opt);
static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
{
}
else
{
- sprintf(sbuf, "%d GPU%s %sselected for this run: ",
- ngpu_use, (ngpu_use > 1) ? "s" : "",
- gpu_opt->bUserSet ? "user-" : "auto-");
+ int ngpu_use_uniq;
+
+ ngpu_use_uniq = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
+
+ sprintf(sbuf, "%d GPU%s %sselected for this run.\n"
+ "Mapping of GPU%s to the %d PP rank%s in this node: ",
+ ngpu_use_uniq, (ngpu_use_uniq > 1) ? "s" : "",
+ gpu_opt->bUserSet ? "user-" : "auto-",
+ (ngpu_use > 1) ? "s" : "",
+ cr->nrank_pp_intranode,
+ (cr->nrank_pp_intranode > 1) ? "s" : "");
+
for (i = 0; i < ngpu_use; i++)
{
sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i));
if (same_count > 0)
{
- md_print_warn(cr, fplog,
- "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
- " multiple %s%s; this can cause performance loss.\n",
+ md_print_info(cr, fplog,
+ "NOTE: You assigned %s to multiple %s%s.\n",
same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
}
}
}
+/* Return 0 if none of the GPU (per node) are shared among PP ranks.
+ *
+ * Sharing GPUs among multiple PP ranks is possible when the user passes
+ * GPU IDs. Here we check for sharing and return a non-zero value when
+ * this is detected. Note that the return value represents the number of
+ * PP rank pairs that share a device.
+ */
int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
{
int same_count = 0;
return same_count;
}
+/* Count and return the number of unique GPUs (per node) selected.
+ *
+ * As sharing GPUs among multiple PP ranks is possible when the user passes
+ * GPU IDs, the number of GPUs user (per node) can be different from the
+ * number of GPU IDs selected.
+ */
+static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
+ const gmx_gpu_opt_t *gpu_opt)
+{
+ int i, uniq_count, ngpu;
+ int *uniq_ids;
+
+ assert(gpu_info);
+ assert(gpu_opt);
+
+ ngpu = gpu_info->ncuda_dev;
+ uniq_count = 0;
+
+ snew(uniq_ids, ngpu);
+
+ /* Each element in uniq_ids will be set to 0 or 1. The n-th element set
+ * to 1 indicates that the respective GPU was selected to be used. */
+ for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
+ {
+ uniq_ids[get_gpu_device_id(gpu_info, gpu_opt, i)] = 1;
+ }
+ /* Count the devices used. */
+ for (i = 0; i < ngpu; i++)
+ {
+ uniq_count += uniq_ids[i];
+ }
+
+ sfree(uniq_ids);
+
+ return uniq_count;
+}
+
/* Return the number of hardware threads supported by the current CPU.
* We assume that this is equal with the number of CPUs reported to be
"[PAR]",
"With GPUs (only supported with the Verlet cut-off scheme), the number",
"of GPUs should match the number of MPI processes or MPI threads,",
- "excluding PME-only processes/threads. With thread-MPI the number",
+ "excluding PME-only processes/threads. With thread-MPI, unless set on the command line, the number",
"of MPI threads will automatically be set to the number of GPUs detected.",
- "When you want to use a subset of the available GPUs, you can use",
- "the [TT]-gpu_id[tt] option, where GPU id's are passed as a string,",
- "e.g. 02 for using GPUs 0 and 2. When you want different GPU id's",
- "on different nodes of a compute cluster, use the GMX_GPU_ID environment",
- "variable instead. The format for GMX_GPU_ID is identical to ",
- "[TT]-gpu_id[tt], but an environment variable can have different values",
- "on different nodes of a cluster.",
+ "To use a subset of the available GPUs, or to manually provide a mapping of",
+ "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is",
+ "a string of digits (without delimiter) representing device id-s of the GPUs to be used.",
+ "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node",
+ "respectively. To select different sets of GPU-s",
+ "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment",
+ "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ",
+ "[TT]-gpu_id[tt], with the difference that an environment variable can have",
+ "different values on different compute nodes. Multiple MPI ranks on each node",
+ "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)",
+ "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.",
+ "This works within a single simulation, or a multi-simulation, with any form of MPI.",
"[PAR]",
"When using PME with separate PME nodes or with a GPU, the two major",
"compute tasks, the non-bonded force calculation and the PME calculation",
{ "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
"Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
{ "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
- "List of GPU id's to use" },
+ "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
{ "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
"Check for all bonded interactions with DD" },
{ "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},