#include "main.h"
#include "md_logging.h"
+#include "thread_mpi/threads.h"
+
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
-
#if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
#include "windows.h"
#endif
static const char * invalid_gpuid_hint =
"A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
+/* The globally shared hwinfo structure. */
+static gmx_hw_info_t *hwinfo_g;
+/* A reference counter for the hwinfo structure */
+static int n_hwinfo = 0;
+/* A lock to protect the hwinfo structure */
+static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
+
+
/* FW decl. */
-void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
+static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
{
const t_commrec *cr, int ntmpi_requested,
gmx_bool bUseGPU)
{
- int npppn, ntmpi_pp, ngpu;
- char sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
- char gpu_plural[2];
- gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
+ int npppn, ntmpi_pp, ngpu;
+ char sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
+ char gpu_plural[2];
+ gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
+ int ret;
+ static tMPI_Thread_mutex_t cons_lock = TMPI_THREAD_MUTEX_INITIALIZER;
+
assert(hwinfo);
assert(cr);
- btMPI = bMPI = FALSE;
- bNthreadsAuto = FALSE;
+ /* Below we only do consistency checks for PP and GPUs,
+ * this is irrelevant for PME only nodes, so in that case we return
+ * here.
+ */
+ if (!(cr->duty & DUTY_PP))
+ {
+ return;
+ }
+
+ /* We run this function only once, but must make sure that all threads
+ that are alive run this function, so they get consistent data. We
+ achieve this by mutual exclusion and returning if the structure is
+ already properly checked & set */
+ ret = tMPI_Thread_mutex_lock(&cons_lock);
+ if (ret != 0)
+ {
+ gmx_fatal(FARGS, "Error locking cons mutex: %s", strerror(errno));
+ }
+
+ if (!hwinfo->bConsistencyChecked)
+ {
+ btMPI = bMPI = FALSE;
+ bNthreadsAuto = FALSE;
#if defined(GMX_THREAD_MPI)
- btMPI = TRUE;
- bNthreadsAuto = (ntmpi_requested < 1);
+ btMPI = TRUE;
+ bNthreadsAuto = (ntmpi_requested < 1);
#elif defined(GMX_LIB_MPI)
- bMPI = TRUE;
+ bMPI = TRUE;
#endif
#ifdef GMX_GPU
- bGPUBin = TRUE;
+ bGPUBin = TRUE;
#else
- bGPUBin = FALSE;
+ bGPUBin = FALSE;
#endif
- /* GPU emulation detection is done later, but we need here as well
- * -- uncool, but there's no elegant workaround */
- bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL);
- bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
+ /* GPU emulation detection is done later, but we need here as well
+ * -- uncool, but there's no elegant workaround */
+ bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL);
+ bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
- if (SIMMASTER(cr))
- {
- /* check the acceleration mdrun is compiled with against hardware capabilities */
- /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
- * Might not hurt to add an extra check over MPI. */
+ /* check the acceleration mdrun is compiled with against hardware
+ capabilities */
+ /* TODO: Here we assume homogeneous hardware which is not necessarily
+ the case! Might not hurt to add an extra check over MPI. */
gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
- }
-
- /* Below we only do consistency checks for PP and GPUs,
- * this is irrelevant for PME only nodes, so in that case we return here.
- */
- if (!(cr->duty & DUTY_PP))
- {
- return;
- }
- /* Need to ensure that we have enough GPUs:
- * - need one GPU per PP node
- * - no GPU oversubscription with tMPI
- * => keep on the GPU support, otherwise turn off (or bail if forced)
- * */
- /* number of PP processes per node */
- npppn = cr->nrank_pp_intranode;
-
- pernode[0] = '\0';
- th_or_proc_plural[0] = '\0';
- if (btMPI)
- {
- sprintf(th_or_proc, "thread-MPI thread");
- if (npppn > 1)
+ /* Need to ensure that we have enough GPUs:
+ * - need one GPU per PP node
+ * - no GPU oversubscription with tMPI
+ * => keep on the GPU support, otherwise turn off (or bail if forced)
+ * */
+ /* number of PP processes per node */
+ npppn = cr->nrank_pp_intranode;
+
+ pernode[0] = '\0';
+ th_or_proc_plural[0] = '\0';
+ if (btMPI)
{
- sprintf(th_or_proc_plural, "s");
+ sprintf(th_or_proc, "thread-MPI thread");
+ if (npppn > 1)
+ {
+ sprintf(th_or_proc_plural, "s");
+ }
}
- }
- else if (bMPI)
- {
- sprintf(th_or_proc, "MPI process");
- if (npppn > 1)
+ else if (bMPI)
{
- sprintf(th_or_proc_plural, "es");
+ sprintf(th_or_proc, "MPI process");
+ if (npppn > 1)
+ {
+ sprintf(th_or_proc_plural, "es");
+ }
+ sprintf(pernode, " per node");
+ }
+ else
+ {
+ /* neither MPI nor tMPI */
+ sprintf(th_or_proc, "process");
}
- sprintf(pernode, " per node");
- }
- else
- {
- /* neither MPI nor tMPI */
- sprintf(th_or_proc, "process");
- }
-
- if (bGPUBin)
- {
- print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
- }
- if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
- {
- ngpu = hwinfo->gpu_info.ncuda_dev_use;
- sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+ if (bGPUBin)
+ {
+ print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+ }
- /* number of tMPI threads atuo-adjusted */
- if (btMPI && bNthreadsAuto && SIMMASTER(cr))
+ if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
{
- if (npppn < ngpu)
+ ngpu = hwinfo->gpu_info.ncuda_dev_use;
+ sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+
+ /* number of tMPI threads atuo-adjusted */
+ if (btMPI && bNthreadsAuto)
{
- if (hwinfo->gpu_info.bUserSet)
+ if (npppn < ngpu)
{
- /* The user manually provided more GPUs than threads we could
- * automatically start. */
- gmx_fatal(FARGS,
- "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
- "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
- ngpu, gpu_plural, npppn, th_or_proc_plural,
- ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
- }
- else
- {
- /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
- md_print_warn(cr, fplog,
- "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
- " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+ if (hwinfo->gpu_info.bUserSet)
+ {
+ /* The user manually provided more GPUs than threads we
+ could automatically start. */
+ gmx_fatal(FARGS,
+ "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
+ "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
ngpu, gpu_plural, npppn, th_or_proc_plural,
- ShortProgram(), npppn, npppn > 1 ? "s" : "",
- bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
-
- if (cr->rank_pp_intranode == 0)
+ ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
+ }
+ else
{
- limit_num_gpus_used(hwinfo, npppn);
- ngpu = hwinfo->gpu_info.ncuda_dev_use;
- sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+ /* There are more GPUs than tMPI threads; we have to
+ limit the number GPUs used. */
+ md_print_warn(cr, fplog,
+ "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
+ " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+ ngpu, gpu_plural, npppn,
+ th_or_proc_plural,
+ ShortProgram(), npppn,
+ npppn > 1 ? "s" : "",
+ bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
+
+ if (cr->rank_pp_intranode == 0)
+ {
+ limit_num_gpus_used(hwinfo, npppn);
+ ngpu = hwinfo->gpu_info.ncuda_dev_use;
+ sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+ }
}
}
}
- }
- if (ngpu != npppn)
- {
- if (hwinfo->gpu_info.bUserSet)
+ if (ngpu != npppn)
{
- gmx_fatal(FARGS,
- "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
- "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
- th_or_proc, btMPI ? "s" : "es", pernode,
- ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
- }
- else
- {
- if (ngpu > npppn)
+ if (hwinfo->gpu_info.bUserSet)
{
- md_print_warn(cr, fplog,
- "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
- " PP %s%s%s than GPU%s available.\n"
- " Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
- ShortProgram(),
- th_or_proc, th_or_proc_plural, pernode, gpu_plural,
- th_or_proc, npppn, gpu_plural, pernode);
-
- if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
- {
- limit_num_gpus_used(hwinfo, npppn);
- ngpu = hwinfo->gpu_info.ncuda_dev_use;
- sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
- }
+ gmx_fatal(FARGS,
+ "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
+ "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
+ th_or_proc, btMPI ? "s" : "es", pernode,
+ ShortProgram(), npppn, th_or_proc,
+ th_or_proc_plural, pernode, ngpu, gpu_plural);
}
else
{
- /* Avoid duplicate error messages.
- * Unfortunately we can only do this at the physical node
- * level, since the hardware setup and MPI process count
- * might be differ over physical nodes.
- */
- if (cr->rank_pp_intranode == 0)
+ if (ngpu > npppn)
{
- gmx_fatal(FARGS,
- "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
- "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
- th_or_proc, btMPI ? "s" : "es", pernode,
- ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
+ md_print_warn(cr, fplog,
+ "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
+ " PP %s%s%s than GPU%s available.\n"
+ " Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
+ ShortProgram(), th_or_proc,
+ th_or_proc_plural, pernode, gpu_plural,
+ th_or_proc, npppn, gpu_plural, pernode);
+
+ if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
+ {
+ limit_num_gpus_used(hwinfo, npppn);
+ ngpu = hwinfo->gpu_info.ncuda_dev_use;
+ sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+ }
}
-#ifdef GMX_MPI
else
{
- /* Avoid other ranks to continue after inconsistency */
- MPI_Barrier(cr->mpi_comm_mygroup);
+ /* Avoid duplicate error messages.
+ * Unfortunately we can only do this at the physical node
+ * level, since the hardware setup and MPI process count
+ * might be differ over physical nodes.
+ */
+ if (cr->rank_pp_intranode == 0)
+ {
+ gmx_fatal(FARGS,
+ "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
+ "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+ th_or_proc, btMPI ? "s" : "es", pernode,
+ ShortProgram(), npppn, th_or_proc,
+ th_or_proc_plural, pernode, ngpu,
+ gpu_plural);
+ }
}
-#endif
}
}
- }
- hwinfo->gpu_info.bDevShare = FALSE;
- if (hwinfo->gpu_info.bUserSet && (cr->rank_pp_intranode == 0))
- {
- int i, j, same_count;
- gmx_bool bSomeSame, bAllDifferent;
+ {
+ int same_count;
- same_count = 0; /* number of GPUs shared among ranks */
- bSomeSame = FALSE;
- bAllDifferent = TRUE;
+ same_count = gmx_count_gpu_dev_shared(&(hwinfo->gpu_info));
- for (i = 0; i < ngpu - 1; i++)
- {
- for (j = i + 1; j < ngpu; j++)
+ if (btMPI && same_count > 0)
{
- bSomeSame |= hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
- bAllDifferent &= hwinfo->gpu_info.cuda_dev_use[i] != hwinfo->gpu_info.cuda_dev_use[j];
- same_count += hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
+ gmx_fatal(FARGS,
+ "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
+ "Use MPI if you are sure that you want to assign GPU to multiple threads.");
+ }
+
+ if (same_count > 0)
+ {
+ md_print_warn(cr, fplog,
+ "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
+ " multiple %s%s; this should be avoided as it can cause\n"
+ " performance loss.\n",
+ same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
}
}
+ print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
+ }
+ hwinfo->bConsistencyChecked = TRUE;
+ }
- /* store the number of shared/oversubscribed GPUs */
- hwinfo->gpu_info.bDevShare = bSomeSame;
+ ret = tMPI_Thread_mutex_unlock(&cons_lock);
+ if (ret != 0)
+ {
+ gmx_fatal(FARGS, "Error unlocking cons mutex: %s", strerror(errno));
+ }
- if (btMPI && !bAllDifferent)
- {
- gmx_fatal(FARGS,
- "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
- "Use MPI if you are sure that you want to assign GPU to multiple threads.");
- }
+#ifdef GMX_MPI
+ if (PAR(cr))
+ {
+ /* Avoid other ranks to continue after
+ inconsistency */
+ MPI_Barrier(cr->mpi_comm_mygroup);
+ }
+#endif
+
+}
+
+int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info)
+{
+ int same_count = 0;
+ int ngpu = gpu_info->ncuda_dev_use;
- if (bSomeSame)
+ if (gpu_info->bUserSet)
+ {
+ int i, j;
+
+ for (i = 0; i < ngpu - 1; i++)
+ {
+ for (j = i + 1; j < ngpu; j++)
{
- md_print_warn(cr, fplog,
- "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
- " multiple %s%s; this should be avoided as it can cause\n"
- " performance loss.\n",
- same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
+ same_count += (gpu_info->cuda_dev_use[i] ==
+ gpu_info->cuda_dev_use[j]);
}
}
- print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
}
+
+ return same_count;
}
+
/* Return the number of hardware threads supported by the current CPU.
* We assume that this is equal with the number of CPUs reported to be
* online by the OS at the time of the call.
return ret;
}
-void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
- const t_commrec *cr,
- gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
- const char *gpu_id)
+gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
+ gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
+ const char *gpu_id)
{
int i;
const char *env;
gmx_hw_info_t *hw;
gmx_gpu_info_t gpuinfo_auto, gpuinfo_user;
gmx_bool bGPUBin;
+ int ret;
- assert(hwinfo);
-
- /* detect CPUID info; no fuss, we don't detect system-wide
- * -- sloppy, but that's it for now */
- if (gmx_cpuid_init(&hwinfo->cpuid_info) != 0)
+ /* make sure no one else is doing the same thing */
+ ret = tMPI_Thread_mutex_lock(&hw_info_lock);
+ if (ret != 0)
{
- gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
+ gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
}
- /* detect number of hardware threads */
- hwinfo->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
+ /* only initialize the hwinfo structure if it is not already initalized */
+ if (n_hwinfo == 0)
+ {
+ snew(hwinfo_g, 1);
+ hwinfo_g->bConsistencyChecked = FALSE;
- /* detect GPUs */
- hwinfo->gpu_info.ncuda_dev_use = 0;
- hwinfo->gpu_info.cuda_dev_use = NULL;
- hwinfo->gpu_info.ncuda_dev = 0;
- hwinfo->gpu_info.cuda_dev = NULL;
+ /* detect CPUID info; no fuss, we don't detect system-wide
+ * -- sloppy, but that's it for now */
+ if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0)
+ {
+ gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
+ }
+
+ /* detect number of hardware threads */
+ hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
+
+ /* detect GPUs */
+ hwinfo_g->gpu_info.ncuda_dev_use = 0;
+ hwinfo_g->gpu_info.cuda_dev_use = NULL;
+ hwinfo_g->gpu_info.ncuda_dev = 0;
+ hwinfo_g->gpu_info.cuda_dev = NULL;
#ifdef GMX_GPU
- bGPUBin = TRUE;
+ bGPUBin = TRUE;
#else
- bGPUBin = FALSE;
+ bGPUBin = FALSE;
#endif
- /* Bail if binary is not compiled with GPU acceleration, but this is either
- * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
- if (bForceUseGPU && !bGPUBin)
- {
- gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
- }
- if (gpu_id != NULL && !bGPUBin)
- {
- gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
- }
-
- /* run the detection if the binary was compiled with GPU support */
- if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
- {
- char detection_error[STRLEN];
-
- if (detect_cuda_gpus(&hwinfo->gpu_info, detection_error) != 0)
+ /* Bail if binary is not compiled with GPU acceleration, but this is either
+ * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
+ if (bForceUseGPU && !bGPUBin)
{
- if (detection_error != NULL && detection_error[0] != '\0')
- {
- sprintf(sbuf, ":\n %s\n", detection_error);
- }
- else
- {
- sprintf(sbuf, ".");
- }
- md_print_warn(cr, fplog,
- "NOTE: Error occurred during GPU detection%s"
- " Can not use GPU acceleration, will fall back to CPU kernels.\n",
- sbuf);
+ gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
}
- }
-
- if (bForceUseGPU || bTryUseGPU)
- {
- env = getenv("GMX_GPU_ID");
- if (env != NULL && gpu_id != NULL)
+ if (gpu_id != NULL && !bGPUBin)
{
- gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+ gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
}
- if (env == NULL)
+
+ /* run the detection if the binary was compiled with GPU support */
+ if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
{
- env = gpu_id;
+ char detection_error[STRLEN];
+
+ if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
+ {
+ if (detection_error != NULL && detection_error[0] != '\0')
+ {
+ sprintf(sbuf, ":\n %s\n", detection_error);
+ }
+ else
+ {
+ sprintf(sbuf, ".");
+ }
+ md_print_warn(cr, fplog,
+ "NOTE: Error occurred during GPU detection%s"
+ " Can not use GPU acceleration, will fall back to CPU kernels.\n",
+ sbuf);
+ }
}
- /* parse GPU IDs if the user passed any */
- if (env != NULL)
+ if (bForceUseGPU || bTryUseGPU)
{
- int *gpuid, *checkres;
- int nid, res;
+ env = getenv("GMX_GPU_ID");
+ if (env != NULL && gpu_id != NULL)
+ {
+ gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+ }
+ if (env == NULL)
+ {
+ env = gpu_id;
+ }
- snew(gpuid, max_gpu_ids_user);
- snew(checkres, max_gpu_ids_user);
+ /* parse GPU IDs if the user passed any */
+ if (env != NULL)
+ {
+ int *gpuid, *checkres;
+ int nid, res;
- parse_gpu_id_plain_string(env, &nid, gpuid);
+ snew(gpuid, max_gpu_ids_user);
+ snew(checkres, max_gpu_ids_user);
- if (nid == 0)
- {
- gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n", invalid_gpuid_hint);
- }
+ parse_gpu_id_plain_string(env, &nid, gpuid);
- res = check_select_cuda_gpus(checkres, &hwinfo->gpu_info, gpuid, nid);
+ if (nid == 0)
+ {
+ gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
+ invalid_gpuid_hint);
+ }
- if (!res)
- {
- print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+ res = check_select_cuda_gpus(checkres, &hwinfo_g->gpu_info,
+ gpuid, nid);
- sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
- for (i = 0; i < nid; i++)
+ if (!res)
{
- if (checkres[i] != egpuCompatible)
+ print_gpu_detection_stats(fplog, &hwinfo_g->gpu_info, cr);
+
+ sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
+ for (i = 0; i < nid; i++)
{
- sprintf(stmp, " GPU #%d: %s\n",
- gpuid[i], gpu_detect_res_str[checkres[i]]);
- strcat(sbuf, stmp);
+ if (checkres[i] != egpuCompatible)
+ {
+ sprintf(stmp, " GPU #%d: %s\n",
+ gpuid[i], gpu_detect_res_str[checkres[i]]);
+ strcat(sbuf, stmp);
+ }
}
+ gmx_fatal(FARGS, "%s", sbuf);
}
- gmx_fatal(FARGS, "%s", sbuf);
- }
- hwinfo->gpu_info.bUserSet = TRUE;
+ hwinfo_g->gpu_info.bUserSet = TRUE;
- sfree(gpuid);
- sfree(checkres);
- }
- else
- {
- pick_compatible_gpus(&hwinfo->gpu_info);
- hwinfo->gpu_info.bUserSet = FALSE;
- }
+ sfree(gpuid);
+ sfree(checkres);
+ }
+ else
+ {
+ pick_compatible_gpus(&hwinfo_g->gpu_info);
+ hwinfo_g->gpu_info.bUserSet = FALSE;
+ }
- /* decide whether we can use GPU */
- hwinfo->bCanUseGPU = (hwinfo->gpu_info.ncuda_dev_use > 0);
- if (!hwinfo->bCanUseGPU && bForceUseGPU)
- {
- gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
+ /* decide whether we can use GPU */
+ hwinfo_g->bCanUseGPU = (hwinfo_g->gpu_info.ncuda_dev_use > 0);
+ if (!hwinfo_g->bCanUseGPU && bForceUseGPU)
+ {
+ gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
+ }
}
}
+ /* increase the reference counter */
+ n_hwinfo++;
+
+ ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
+ if (ret != 0)
+ {
+ gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
+ }
+
+ return hwinfo_g;
}
-void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
+static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
{
int ndev_use;
void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
{
- if (hwinfo)
+ int ret;
+
+ ret = tMPI_Thread_mutex_lock(&hw_info_lock);
+ if (ret != 0)
+ {
+ gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
+ }
+
+ /* decrease the reference counter */
+ n_hwinfo--;
+
+
+ if (hwinfo != hwinfo_g)
+ {
+ gmx_incons("hwinfo < hwinfo_g");
+ }
+
+ if (n_hwinfo < 0)
+ {
+ gmx_incons("n_hwinfo < 0");
+ }
+
+ if (n_hwinfo == 0)
+ {
+ gmx_cpuid_done(hwinfo_g->cpuid_info);
+ free_gpu_info(&hwinfo_g->gpu_info);
+ sfree(hwinfo_g);
+ }
+
+ ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
+ if (ret != 0)
{
- gmx_cpuid_done(hwinfo->cpuid_info);
- free_gpu_info(&hwinfo->gpu_info);
- sfree(hwinfo);
+ gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
}
}