stat = cudaDriverGetVersion(&cuda_drv_ver);
CU_RET_ERR(stat, "cudaDriverGetVersion failed");
+
bOldDriver = (cuda_drv_ver < 5000);
- if (nb->dev_info->prop.ECCEnabled == 1)
+ if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver)
{
+ /* Polling wait should be used instead of cudaStreamSynchronize only if:
+ * - ECC is ON & driver is old (checked above),
+ * - we're on x86/x86_64,
+ * - atomics are available, and
+ * - GPUs are not being shared.
+ */
+ bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
+
if (bStreamSync)
{
nb->bUseStreamSync = true;
/* only warn if polling should be used */
- if (bOldDriver && !gpu_info->bDevShare)
+ if (bShouldUsePollSync)
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
}
else
{
- /* Can/should turn of cudaStreamSynchronize wait only if
- * - we're on x86/x86_64
- * - atomics are available
- * - GPUs are not being shared
- * - and driver is old. */
- nb->bUseStreamSync =
- (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
- true : false;
-
- if (nb->bUseStreamSync)
+ nb->bUseStreamSync = !bShouldUsePollSync;
+
+ if (bShouldUsePollSync)
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
- " cause performance loss. Switching to the alternative polling GPU waiting.\n"
+ " cause performance loss. Switching to the alternative polling GPU wait.\n"
" If you encounter issues, switch back to standard GPU waiting by setting\n"
" the GMX_CUDA_STREAMSYNC environment variable.\n");
}
- else if (bOldDriver)
+ else
{
/* Tell the user that the ECC+old driver combination can be bad */
sprintf(sbuf,
- "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
- " driver can cause performance loss.\n"
- " However, the polling waiting workaround can not be used because\n%s\n"
+ "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n"
+ " A known bug in this driver version can cause performance loss.\n"
+ " However, the polling wait workaround can not be used because\n%s\n"
" Consider updating the driver or turning ECC off.",
- (!bX86 || !bTMPIAtomics) ?
- " atomic operations are not supported by the platform/CPU+compiler." :
- " GPU(s) are being oversubscribed.");
+ (bX86 && bTMPIAtomics) ?
+ " GPU(s) are being oversubscribed." :
+ " atomic operations are not supported by the platform/CPU+compiler.");
md_print_warn(fplog, sbuf);
}
}