Clarified GPU selection output and mdrun help

[alexxy/gromacs.git] / src / kernel / mdrun.c
diff --git a/src/kernel/mdrun.c b/src/kernel/mdrun.c

index 29cb7e3e4326d9430744aaf3f9458687b59844ae..2ccc6f67469c0aab5263ec48636818785073f661 100644 (file)
--- a/src/kernel/mdrun.c
+++ b/src/kernel/mdrun.c
@@ -121,15 +121,20 @@ int cmain(int argc, char *argv[])
          "[PAR]",
          "With GPUs (only supported with the Verlet cut-off scheme), the number",
          "of GPUs should match the number of MPI processes or MPI threads,",
-        "excluding PME-only processes/threads. With thread-MPI the number",
+        "excluding PME-only processes/threads. With thread-MPI, unless set on the command line, the number",
          "of MPI threads will automatically be set to the number of GPUs detected.",
-        "When you want to use a subset of the available GPUs, you can use",
-        "the [TT]-gpu_id[tt] option, where GPU id's are passed as a string,",
-        "e.g. 02 for using GPUs 0 and 2. When you want different GPU id's",
-        "on different nodes of a compute cluster, use the GMX_GPU_ID environment",
-        "variable instead. The format for GMX_GPU_ID is identical to ",
-        "[TT]-gpu_id[tt], but an environment variable can have different values",
-        "on different nodes of a cluster.",
+        "To use a subset of the available GPUs, or to manually provide a mapping of",
+        "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is",
+        "a string of digits (without delimiter) representing device id-s of the GPUs to be used.",
+        "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node",
+        "respectively. To select different sets of GPU-s",
+        "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment",
+        "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ",
+        "[TT]-gpu_id[tt], with the difference that an environment variable can have",
+        "different values on different compute nodes. Multiple MPI ranks on each node",
+        "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)",
+        "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.",
+        "This works within a single simulation, or a multi-simulation, with any form of MPI.",
          "[PAR]",
          "When using PME with separate PME nodes or with a GPU, the two major",
          "compute tasks, the non-bonded force calculation and the PME calculation",
@@ -483,7 +488,7 @@ int cmain(int argc, char *argv[])
          { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
            "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
          { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
-          "List of GPU id's to use" },
+          "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
          { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
            "Check for all bonded interactions with DD" },
          { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},