#define GMX_GPU_UTILS_GMXOPENCL_H
/*! \brief Declare to OpenCL SDKs that we intend to use OpenCL API
- features that were deprecated in 2.0, so that they don't warn about
- it. */
+ features that were deprecated in 1.2 or 2.0, so that they don't
+ warn about it. */
+///@{
+# define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+# define CL_USE_DEPRECATED_OPENCL_1_2_APIS
# define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+///@}
# ifdef __APPLE__
# include <OpenCL/opencl.h>
# else
}
}
+bool canDetectGpus()
+{
+ cudaError_t stat;
+ int driverVersion = -1;
+ stat = cudaDriverGetVersion(&driverVersion);
+ GMX_ASSERT(stat != cudaErrorInvalidValue, "An impossible null pointer was passed to cudaDriverGetVersion");
+ GMX_RELEASE_ASSERT(stat == cudaSuccess,
+ gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
+ cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+ bool foundDriver = (driverVersion > 0);
+ return foundDriver;
+}
int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
{
Check /*<< Only check whether the task has completed */
};
+/*! \brief Return whether GPUs can be detected
+ *
+ * Returns true when this is a build of \Gromacs configured to support
+ * GPU usage, and a valid device driver or ICD was detected by the GPU
+ * runtime.
+ *
+ * Does not throw. */
+GPU_FUNC_QUALIFIER
+bool canDetectGpus() GPU_FUNC_TERM_WITH_RETURN(false);
+
/*! \brief Detect all GPUs in the system.
*
- * Will detect every GPU supported by the device driver in use. Also
- * check for the compatibility of each and fill the gpu_info->gpu_dev array
- * with the required information on each the device: ID, device properties,
- * status.
+ * Will detect every GPU supported by the device driver in use. If
+ * the device driver is missing or unsuitable, returns the same error
+ * as for "no valid devices detected," so generally calling code
+ * should have checked the return value from canDetectGpus() first,
+ * in order to understand the behaviour of this routine. This routine
+ * also checks for the compatibility of each and fill the
+ * gpu_info->gpu_dev array with the required information on each the
+ * device: ID, device properties, status.
*
* \param[in] gpu_info pointer to structure holding GPU information.
* \param[out] err_str The error message of any GPU API error that caused
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
/*! \brief Helper macro for error handling */
#define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
}
+//! This function is documented in the header file
+bool canDetectGpus()
+{
+ cl_uint numPlatforms = -1;
+ cl_int status = clGetPlatformIDs(0, nullptr, &numPlatforms);
+ GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
+ if (status == CL_PLATFORM_NOT_FOUND_KHR)
+ {
+ // No valid ICDs found
+ return false;
+ }
+ GMX_RELEASE_ASSERT(status == CL_SUCCESS,
+ gmx::formatString("An unexpected value was returned from clGetPlatformIDs %u: %s",
+ status, ocl_get_error_string(status).c_str()).c_str());
+ bool foundPlatform = (numPlatforms > 0);
+ return foundPlatform;
+}
+
//! This function is documented in the header file
int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
{
int rank_world;
MPI_Comm physicalnode_comm;
#endif
- int rank_local;
+ bool isMasterRankOfNode;
hwinfo_g->gpu_info.bDetectGPUs =
(bGPUBinary && getenv("GMX_DISABLE_GPU_DETECTION") == nullptr);
MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(),
rank_world, &physicalnode_comm);
- MPI_Comm_rank(physicalnode_comm, &rank_local);
+ {
+ int rankOnNode = -1;
+ MPI_Comm_rank(physicalnode_comm, &rankOnNode);
+ isMasterRankOfNode = (rankOnNode == 0);
+ }
GMX_UNUSED_VALUE(cr);
#else
- /* Here there should be only one process, check this */
+ // Here there should be only one process, because if we are using
+ // thread-MPI, only one thread is active so far. So we check this.
GMX_RELEASE_ASSERT(cr->nnodes == 1 && cr->sim_nodeid == 0, "Only a single (master) process should execute here");
-
- rank_local = 0;
+ isMasterRankOfNode = true;
#endif
/* With CUDA detect only on one rank per host, with OpenCL need do
* the detection on all PP ranks */
bool isOpenclPpRank = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasDuty(cr, DUTY_PP));
- if (rank_local == 0 || isOpenclPpRank)
+ bool gpusCanBeDetected = false;
+ if (isMasterRankOfNode || isOpenclPpRank)
+ {
+ gpusCanBeDetected = canDetectGpus();
+ // No need to tell the user anything at this point, they get a
+ // hardware report later.
+ }
+
+ if (gpusCanBeDetected)
{
char detection_error[STRLEN] = "", sbuf[STRLEN];
dev_size = hwinfo_g->gpu_info.n_dev*sizeof_gpu_dev_info();
- if (rank_local > 0)
+ if (!isMasterRankOfNode)
{
hwinfo_g->gpu_info.gpu_dev =
(struct gmx_device_info_t *)malloc(dev_size);
}
}
- if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
- hwinfo->gpu_info.n_dev > 0))
+ if (bGPUBinary && hwinfo->gpu_info.n_dev > 0)
{
s += gmx::formatString(" GPU info:\n");
s += gmx::formatString(" Number of GPUs detected: %d\n",
hwinfo->gpu_info.n_dev);
- if (hwinfo->gpu_info.n_dev > 0)
- {
- s += sprint_gpus(hwinfo->gpu_info) + "\n";
- }
+ s += sprint_gpus(hwinfo->gpu_info) + "\n";
}
return s;
}