#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/snprintf.h"
#if HAVE_NVML
#include <nvml.h>
#endif /* HAVE_NVML_APPLICATION_CLOCKS */
}
-gmx_bool init_gpu(const gmx::MDLogger &mdlog, int mygpu, char *result_str,
- const struct gmx_gpu_info_t *gpu_info,
- const struct gmx_gpu_opt_t *gpu_opt)
+void init_gpu(const gmx::MDLogger &mdlog, int rank, int mygpu,
+ const struct gmx_gpu_info_t *gpu_info,
+ const struct gmx_gpu_opt_t *gpu_opt)
{
cudaError_t stat;
char sbuf[STRLEN];
int gpuid;
assert(gpu_info);
- assert(result_str);
+ assert(gpu_opt);
if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
{
- sprintf(sbuf, "Trying to initialize an non-existent GPU: "
- "there are %d selected GPU(s), but #%d was requested.",
- gpu_opt->n_dev_use, mygpu);
+ snprintf(sbuf, STRLEN, "On rank %d trying to initialize an non-existent GPU: "
+ "there are %d selected GPU(s), but #%d was requested.",
+ rank, gpu_opt->n_dev_use, mygpu);
gmx_incons(sbuf);
}
gpuid = gpu_info->gpu_dev[gpu_opt->dev_use[mygpu]].id;
stat = cudaSetDevice(gpuid);
- strncpy(result_str, cudaGetErrorString(stat), STRLEN);
+ if (stat != cudaSuccess)
+ {
+ snprintf(sbuf, STRLEN, "On rank %d failed to initialize GPU #%d",
+ rank, mygpu);
+ CU_RET_ERR(stat, sbuf);
+ }
if (debug)
{
}
//Ignoring return value as NVML errors should be treated not critical.
- if (stat == cudaSuccess)
- {
- init_gpu_application_clocks(mdlog, gpuid, gpu_info);
- }
- return (stat == cudaSuccess);
+ init_gpu_application_clocks(mdlog, gpuid, gpu_info);
}
gmx_bool free_cuda_gpu(
* gpu_info.gpu_dev array.
*
* \param mdlog log file to write to
+ * \param[in] rank MPI rank of this process (for error output)
* \param[in] mygpu index of the GPU to initialize
- * \param[out] result_str the message related to the error that occurred
- * during the initialization (if there was any).
* \param[in] gpu_info GPU info of all detected devices in the system.
* \param[in] gpu_opt options for using the GPUs in gpu_info
- * \returns true if no error occurs during initialization.
+ *
+ * Issues a fatal error for any critical errors that occur during
+ * initialization.
*/
GPU_FUNC_QUALIFIER
-gmx_bool init_gpu(const gmx::MDLogger &GPU_FUNC_ARGUMENT(mdlog),
- int GPU_FUNC_ARGUMENT(mygpu),
- char *GPU_FUNC_ARGUMENT(result_str),
- const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info),
- const gmx_gpu_opt_t *GPU_FUNC_ARGUMENT(gpu_opt)) GPU_FUNC_TERM_WITH_RETURN(-1)
+void init_gpu(const gmx::MDLogger &GPU_FUNC_ARGUMENT(mdlog),
+ int GPU_FUNC_ARGUMENT(rank),
+ int GPU_FUNC_ARGUMENT(mygpu),
+ const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info),
+ const gmx_gpu_opt_t *GPU_FUNC_ARGUMENT(gpu_opt)) GPU_FUNC_TERM
/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
*
}
//! This function is documented in the header file
-gmx_bool init_gpu(const gmx::MDLogger & /*mdlog*/,
- int mygpu,
- char *result_str,
- const gmx_gpu_info_t gmx_unused *gpu_info,
- const gmx_gpu_opt_t *gpu_opt
- )
+void init_gpu(const gmx::MDLogger & /*mdlog*/,
+ int rank,
+ int mygpu,
+ const gmx_gpu_info_t *gpu_info,
+ const gmx_gpu_opt_t *gpu_opt
+ )
{
- assert(result_str);
-
- result_str[0] = 0;
+ assert(gpu_opt);
if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
{
char sbuf[STRLEN];
- sprintf(sbuf, "Trying to initialize an non-existent GPU: "
+ sprintf(sbuf, "On rank %d trying to initialize an non-existent GPU: "
"there are %d selected GPU(s), but #%d was requested.",
- gpu_opt->n_dev_use, mygpu);
+ rank, gpu_opt->n_dev_use, mygpu);
gmx_incons(sbuf);
}
setenv("CUDA_CACHE_DISABLE", "1", 0);
#endif
}
-
- return TRUE;
}
//! This function is documented in the header file
bool emulateGpu,
const gmx_gpu_opt_t *gpu_opt)
{
- char gpu_err_str[STRLEN];
-
*bUseGPU = FALSE;
/* Enable GPU mode when GPUs are available or no GPU emulation is requested.
if (gpu_opt->n_dev_use > 0 && !emulateGpu)
{
/* Each PP node will use the intra-node id-th device from the
- * list of detected/selected GPUs. */
- if (!init_gpu(mdlog, cr->rank_pp_intranode, gpu_err_str,
- &hwinfo->gpu_info, gpu_opt))
- {
- /* At this point the init should never fail as we made sure that
- * we have all the GPUs we need. If it still does, we'll bail. */
- /* TODO the decorating of gpu_err_str is nicer if it
- happens inside init_gpu. Out here, the decorating with
- the MPI rank makes sense. */
- gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
- cr->nodeid,
- get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
- cr->rank_pp_intranode),
- gpu_err_str);
- }
+ * list of detected/selected GPUs.
+ *
+ * At this point the init should never fail as we made sure that
+ * we have all the GPUs we need. If it still does, we'll exit.
+ *
+ * TODO The error reporting will be nicer when the logger is
+ * aware of MPI ranks. */
+ init_gpu(mdlog, cr->nodeid, cr->rank_pp_intranode,
+ &hwinfo->gpu_info, gpu_opt);
/* Here we actually turn on hardware GPU acceleration */
*bUseGPU = TRUE;