2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "types/enums.h"
44 #include "types/hw_info.h"
45 #include "types/commrec.h"
46 #include "gmx_fatal.h"
47 #include "gmx_fatal_collective.h"
49 #include "gpu_utils.h"
51 #include "gmx_detect_hardware.h"
53 #include "md_logging.h"
55 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
59 /* Although we can't have more than 10 GPU different ID-s passed by the user as
60 * the id-s are assumed to be represented by single digits, as multiple
61 * processes can share a GPU, we can end up with more than 10 IDs.
62 * To account for potential extreme cases we'll set the limit to a pretty
63 * ridiculous number. */
64 static unsigned int max_gpu_ids_user = 64;
67 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
69 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
74 ndev = gpu_info->ncuda_dev;
77 for (i = 0; i < ndev; i++)
79 get_gpu_device_info_string(stmp, gpu_info, i);
89 static void print_gpu_detection_stats(FILE *fplog,
90 const gmx_gpu_info_t *gpu_info,
93 char onhost[266],stmp[STRLEN];
96 ngpu = gpu_info->ncuda_dev;
98 #if defined GMX_MPI && !defined GMX_THREAD_MPI
99 /* We only print the detection on one, of possibly multiple, nodes */
100 strncpy(onhost," on host ",10);
101 gmx_gethostname(onhost+9,256);
103 /* We detect all relevant GPUs */
104 strncpy(onhost,"",1);
109 sprint_gpus(stmp, gpu_info, TRUE);
110 md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
111 ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
115 md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
119 static void print_gpu_use_stats(FILE *fplog,
120 const gmx_gpu_info_t *gpu_info,
123 char sbuf[STRLEN], stmp[STRLEN];
124 int i, ngpu, ngpu_all;
126 ngpu = gpu_info->ncuda_dev_use;
127 ngpu_all = gpu_info->ncuda_dev;
129 /* Issue note if GPUs are available but not used */
130 if (ngpu_all > 0 && ngpu < 1)
133 "%d compatible GPU%s detected in the system, but none will be used.\n"
134 "Consider trying GPU acceleration with the Verlet scheme!",
135 ngpu_all, (ngpu_all > 1) ? "s" : "");
139 sprintf(sbuf, "%d GPU%s %sselected to be used for this run: ",
140 ngpu, (ngpu > 1) ? "s" : "",
141 gpu_info->bUserSet ? "user-" : "auto-");
142 for (i = 0; i < ngpu; i++)
144 sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, i));
152 md_print_info(cr, fplog, "%s\n\n", sbuf);
155 /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
156 * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
157 static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
162 len_idstr = strlen(idstr);
164 if (len_idstr > max_gpu_ids_user)
166 gmx_fatal(FARGS,"%d GPU IDs provided, but only at most %d are supported",
167 len_idstr, max_gpu_ids_user);
172 for (i = 0; i < *nid; i++)
174 if (idstr[i] < '0' || idstr[i] > '9')
176 gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n", idstr[i]);
178 idlist[i] = idstr[i] - '0';
182 static void parse_gpu_id_csv_string(const char *idstr, int *nid, int *idlist)
184 /* XXX implement cvs format to support more than 10 different GPUs in a box. */
185 gmx_incons("Not implemented yet");
188 void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
189 const t_commrec *cr, int ntmpi_requested,
192 int npppn, ntmpi_pp, ngpu;
193 char sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
195 gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
200 btMPI = bMPI = FALSE;
201 bNthreadsAuto = FALSE;
202 #if defined(GMX_THREAD_MPI)
204 bNthreadsAuto = (ntmpi_requested < 1);
205 #elif defined(GMX_LIB_MPI)
215 /* GPU emulation detection is done later, but we need here as well
216 * -- uncool, but there's no elegant workaround */
217 bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL);
218 bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
222 /* check the acceleration mdrun is compiled with against hardware capabilities */
223 /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
224 * Might not hurt to add an extra check over MPI. */
225 gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
228 /* Below we only do consistency checks for PP and GPUs,
229 * this is irrelevant for PME only nodes, so in that case we return here.
231 if (!(cr->duty & DUTY_PP))
236 /* Need to ensure that we have enough GPUs:
237 * - need one GPU per PP node
238 * - no GPU oversubscription with tMPI
239 * => keep on the GPU support, otherwise turn off (or bail if forced)
241 /* number of PP processes per node */
242 npppn = cr->nnodes_pp_intra;
245 th_or_proc_plural[0] = '\0';
248 sprintf(th_or_proc, "thread-MPI thread");
251 sprintf(th_or_proc_plural, "s");
256 sprintf(th_or_proc, "MPI process");
259 sprintf(th_or_proc_plural, "es");
261 sprintf(pernode, " per node");
265 /* neither MPI nor tMPI */
266 sprintf(th_or_proc, "process");
271 print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
274 if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
276 ngpu = hwinfo->gpu_info.ncuda_dev_use;
277 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
279 /* number of tMPI threads atuo-adjusted */
280 if (btMPI && bNthreadsAuto && SIMMASTER(cr))
284 if (hwinfo->gpu_info.bUserSet)
286 /* The user manually provided more GPUs than threads we could
287 * automatically start. */
289 "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
290 "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
291 ngpu, gpu_plural, npppn, th_or_proc_plural,
292 ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
296 /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
297 md_print_warn(cr,fplog,
298 "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
299 " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
300 ngpu, gpu_plural, npppn, th_or_proc_plural,
301 ShortProgram(), npppn, npppn > 1 ? "s" : "",
302 bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
304 if (cr->nodeid_intra == 0)
306 limit_num_gpus_used(hwinfo, npppn);
307 ngpu = hwinfo->gpu_info.ncuda_dev_use;
308 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
316 if (hwinfo->gpu_info.bUserSet)
319 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
320 "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
321 th_or_proc, btMPI ? "s" : "es" , pernode,
322 ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
328 md_print_warn(cr,fplog,
329 "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
330 " PP %s%s%s than GPU%s available.\n"
331 " Each PP %s can only use one GPU, so only %d GPU%s%s will be used.",
333 th_or_proc, th_or_proc_plural, pernode, gpu_plural,
334 th_or_proc, npppn, gpu_plural, pernode);
336 if (bMPI || (btMPI && cr->nodeid_intra == 0))
338 limit_num_gpus_used(hwinfo, npppn);
339 ngpu = hwinfo->gpu_info.ncuda_dev_use;
340 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
345 /* Avoid duplicate error messages.
346 * Unfortunately we can only do this at the physical node
347 * level, since the hardware setup and MPI process count
348 * might be differ over physical nodes.
350 if (cr->nodeid_intra == 0)
353 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
354 "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
355 th_or_proc, btMPI ? "s" : "es" , pernode,
356 ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
361 /* Avoid other ranks to continue after inconsistency */
362 MPI_Barrier(cr->mpi_comm_mygroup);
369 if (hwinfo->gpu_info.bUserSet && (cr->nodeid_intra == 0))
371 int i, j, same_count;
372 gmx_bool bSomeSame, bAllDifferent;
376 bAllDifferent = TRUE;
378 for (i = 0; i < ngpu - 1; i++)
380 for (j = i + 1; j < ngpu; j++)
382 bSomeSame |= hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
383 bAllDifferent &= hwinfo->gpu_info.cuda_dev_use[i] != hwinfo->gpu_info.cuda_dev_use[j];
384 same_count += hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
388 if (btMPI && !bAllDifferent)
391 "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
392 "Use MPI if you are sure that you want to assign GPU to multiple threads.");
397 md_print_warn(cr,fplog,
398 "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
399 " multiple %s%s; this should be avoided as it generally\n"
400 " causes performance loss.",
401 same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
404 print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
408 /* Return the number of hardware threads supported by the current CPU.
409 * We assume that this is equal with the number of CPUs reported to be
410 * online by the OS at the time of the call.
412 static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
416 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
419 GetSystemInfo( &sysinfo );
420 ret = sysinfo.dwNumberOfProcessors;
421 #elif defined HAVE_SYSCONF
422 /* We are probably on Unix.
423 * Now check if we have the argument to use before executing the call
425 #if defined(_SC_NPROCESSORS_ONLN)
426 ret = sysconf(_SC_NPROCESSORS_ONLN);
427 #elif defined(_SC_NPROC_ONLN)
428 ret = sysconf(_SC_NPROC_ONLN);
429 #elif defined(_SC_NPROCESSORS_CONF)
430 ret = sysconf(_SC_NPROCESSORS_CONF);
431 #elif defined(_SC_NPROC_CONF)
432 ret = sysconf(_SC_NPROC_CONF);
433 #endif /* End of check for sysconf argument values */
436 /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
442 fprintf(debug, "Detected %d processors, will use this as the number "
443 "of supported hardware threads.\n", ret);
447 if (ret != gmx_omp_get_num_procs())
449 md_print_warn(cr, fplog,
450 "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
451 "Consider setting the launch configuration manually!",
452 ret, gmx_omp_get_num_procs());
459 void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
461 gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
466 char sbuf[STRLEN], stmp[STRLEN];
468 gmx_gpu_info_t gpuinfo_auto, gpuinfo_user;
473 /* detect CPUID info; no fuss, we don't detect system-wide
474 * -- sloppy, but that's it for now */
475 if (gmx_cpuid_init(&hwinfo->cpuid_info) != 0)
477 gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
480 /* detect number of hardware threads */
481 hwinfo->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
484 hwinfo->gpu_info.ncuda_dev_use = 0;
485 hwinfo->gpu_info.cuda_dev_use = NULL;
486 hwinfo->gpu_info.ncuda_dev = 0;
487 hwinfo->gpu_info.cuda_dev = NULL;
495 /* Bail if binary is not compiled with GPU on */
496 if (bForceUseGPU && !bGPUBin)
498 gmx_fatal_collective(FARGS, cr, NULL, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
501 /* run the detection if the binary was compiled with GPU support */
502 if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION")==NULL)
504 char detection_error[STRLEN];
506 if (detect_cuda_gpus(&hwinfo->gpu_info, detection_error) != 0)
508 if (detection_error != NULL && detection_error[0] != '\0')
510 sprintf(sbuf, ":\n %s\n", detection_error);
516 md_print_warn(cr, fplog,
517 "NOTE: Error occurred during GPU detection%s"
518 " Can not use GPU acceleration, will fall back to CPU kernels.\n",
523 if (bForceUseGPU || bTryUseGPU)
525 env = getenv("GMX_GPU_ID");
526 if (env != NULL && gpu_id != NULL)
528 gmx_fatal(FARGS,"GMX_GPU_ID and -gpu_id can not be used at the same time");
535 /* parse GPU IDs if the user passed any */
538 int *gpuid, *checkres;
541 snew(gpuid, max_gpu_ids_user);
542 snew(checkres, max_gpu_ids_user);
544 parse_gpu_id_plain_string(env, &nid, gpuid);
548 gmx_fatal(FARGS, "Empty GPU ID string passed\n");
551 res = check_select_cuda_gpus(checkres, &hwinfo->gpu_info, gpuid, nid);
555 print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
557 sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
558 for (i = 0; i < nid; i++)
560 if (checkres[i] != egpuCompatible)
562 sprintf(stmp, " GPU #%d: %s\n",
563 gpuid[i], gpu_detect_res_str[checkres[i]]);
567 gmx_fatal(FARGS, "%s", sbuf);
570 hwinfo->gpu_info.bUserSet = TRUE;
577 pick_compatible_gpus(&hwinfo->gpu_info);
578 hwinfo->gpu_info.bUserSet = FALSE;
581 /* decide whether we can use GPU */
582 hwinfo->bCanUseGPU = (hwinfo->gpu_info.ncuda_dev_use > 0);
583 if (!hwinfo->bCanUseGPU && bForceUseGPU)
585 gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
590 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
596 ndev_use = hwinfo->gpu_info.ncuda_dev_use;
598 if (count > ndev_use)
600 /* won't increase the # of GPUs */
607 sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
612 /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
613 hwinfo->gpu_info.ncuda_dev_use = count;
616 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
620 gmx_cpuid_done(hwinfo->cpuid_info);
621 free_gpu_info(&hwinfo->gpu_info);