* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*
* In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
*/
-static int c_cudaMaxDeviceCount = 32;
+static const int c_cudaMaxDeviceCount = 32;
/** Dummy kernel used for sanity checking. */
-static __global__ void dummy_kernel(void) {}
+static __global__ void dummy_kernel() {}
static cudaError_t checkCompiledTargetCompatibility(int deviceId, const cudaDeviceProp& deviceProp)
{
"\nWARNING: The %s binary does not include support for the CUDA architecture of "
"the GPU ID #%d (compute capability %d.%d) detected during detection. "
"By default, GROMACS supports all architectures of compute "
- "capability >= 3.0, so your GPU "
+ "capability >= 3.5, so your GPU "
"might be rare, or some architectures were disabled in the build. \n"
"Consult the install guide for how to use the GMX_CUDA_TARGET_SM and "
"GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture. \n",
- gmx::getProgramContext().displayName(), deviceId, deviceProp.major, deviceProp.minor);
+ gmx::getProgramContext().displayName(),
+ deviceId,
+ deviceProp.major,
+ deviceProp.minor);
}
return stat;
cu_err = cudaSetDevice(deviceInfo.id);
if (cu_err != cudaSuccess)
{
- fprintf(stderr, "Error %d while switching to device #%d: %s\n", cu_err, deviceInfo.id,
- cudaGetErrorString(cu_err));
+ fprintf(stderr,
+ "Error while switching to device #%d. %s\n",
+ deviceInfo.id,
+ gmx::getDeviceErrorString(cu_err).c_str());
return DeviceStatus::NonFunctional;
}
// launchGpuKernel error is not fatal and should continue with marking the device bad
fprintf(stderr,
"Error occurred while running dummy kernel sanity check on device #%d:\n %s\n",
- deviceInfo.id, formatExceptionMessageToString(ex).c_str());
+ deviceInfo.id,
+ formatExceptionMessageToString(ex).c_str());
return DeviceStatus::NonFunctional;
}
stat = cudaDriverGetVersion(&driverVersion);
GMX_ASSERT(stat != cudaErrorInvalidValue,
"An impossible null pointer was passed to cudaDriverGetVersion");
- GMX_RELEASE_ASSERT(
- stat == cudaSuccess,
- gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
- cudaGetErrorName(stat), cudaGetErrorString(stat))
- .c_str());
+ GMX_RELEASE_ASSERT(stat == cudaSuccess,
+ ("An unexpected value was returned from cudaDriverGetVersion. "
+ + gmx::getDeviceErrorString(stat))
+ .c_str());
bool foundDriver = (driverVersion > 0);
if (!foundDriver)
{
{
int numDevices;
cudaError_t stat = cudaGetDeviceCount(&numDevices);
- if (stat != cudaSuccess)
- {
- GMX_THROW(gmx::InternalError(
- "Invalid call of findDevices() when CUDA API returned an error, perhaps "
- "canPerformDeviceDetection() was not called appropriately beforehand."));
- }
+ gmx::checkDeviceError(stat,
+ "Invalid call of findDevices() when CUDA API returned an error, perhaps "
+ "canPerformDeviceDetection() was not called appropriately beforehand.");
/* things might go horribly wrong if cudart is not compatible with the driver */
numDevices = std::min(numDevices, c_cudaMaxDeviceCount);
// We expect to start device support/sanity checks with a clean runtime error state
- gmx::ensureNoPendingCudaError("");
+ gmx::ensureNoPendingDeviceError("Trying to find available CUDA devices.");
std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList(numDevices);
for (int i = 0; i < numDevices; i++)
//
// Here we also clear the CUDA API error state so potential
// errors during sanity checks don't propagate.
- if ((stat = cudaGetLastError()) != cudaSuccess)
- {
- gmx_warning("An error occurred while sanity checking device #%d; %s: %s",
- deviceInfoList[i]->id, cudaGetErrorName(stat), cudaGetErrorString(stat));
- }
+ const std::string errorMessage = gmx::formatString(
+ "An error occurred while sanity checking device #%d.", deviceInfoList[i]->id);
+ gmx::ensureNoPendingDeviceError(errorMessage);
}
}
stat = cudaPeekAtLastError();
- GMX_RELEASE_ASSERT(stat == cudaSuccess,
- gmx::formatString("We promise to return with clean CUDA state, but "
- "non-success state encountered: %s: %s",
- cudaGetErrorName(stat), cudaGetErrorString(stat))
- .c_str());
+ GMX_RELEASE_ASSERT(
+ stat == cudaSuccess,
+ ("We promise to return with clean CUDA state, but non-success state encountered. "
+ + gmx::getDeviceErrorString(stat))
+ .c_str());
return deviceInfoList;
}
if (stat != cudaSuccess)
{
auto message = gmx::formatString("Failed to initialize GPU #%d", deviceId);
- CU_RET_ERR(stat, message.c_str());
+ CU_RET_ERR(stat, message);
}
if (debug)
{
if (debug)
{
- fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
+ fprintf(stderr, "Cleaning up context on GPU ID #%d.\n", gpuid);
}
stat = cudaDeviceReset();
if (stat != cudaSuccess)
{
- gmx_warning("Failed to free GPU #%d: %s", gpuid, cudaGetErrorString(stat));
+ gmx_warning("Failed to free GPU #%d. %s", gpuid, gmx::getDeviceErrorString(stat).c_str());
}
}
}
if (!gpuExists)
{
- return gmx::formatString("#%d: %s, stat: %s", deviceInfo.id, "N/A",
- c_deviceStateString[deviceInfo.status]);
+ return gmx::formatString(
+ "#%d: %s, stat: %s", deviceInfo.id, "N/A", c_deviceStateString[deviceInfo.status]);
}
else
{
return gmx::formatString("#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
- deviceInfo.id, deviceInfo.prop.name, deviceInfo.prop.major,
- deviceInfo.prop.minor, deviceInfo.prop.ECCEnabled ? "yes" : " no",
+ deviceInfo.id,
+ deviceInfo.prop.name,
+ deviceInfo.prop.major,
+ deviceInfo.prop.minor,
+ deviceInfo.prop.ECCEnabled ? "yes" : " no",
c_deviceStateString[deviceInfo.status]);
}
}