Commit
84597ae introduced an incorrect condition for testing
whether mdrun should and can switch to polling GPU wait.
This minor bug results in swithching to polling wait even when this
is unnecessary (i.e. when the driver in use is >=v5.0), This is
rather harmless, but the polling wait is fragile, so we should use
it only when really necessary.
This commit fixes the switching as well as reorganizes the conditionals
to make it more clear that switching is needed when ECC is on and the
NVIDIA GPU driver is older than v5.0.
Change-Id: I9106cb34c9a8fde2f9b54045d9b03ee5f5d50d8b
stat = cudaDriverGetVersion(&cuda_drv_ver);
CU_RET_ERR(stat, "cudaDriverGetVersion failed");
stat = cudaDriverGetVersion(&cuda_drv_ver);
CU_RET_ERR(stat, "cudaDriverGetVersion failed");
bOldDriver = (cuda_drv_ver < 5000);
bOldDriver = (cuda_drv_ver < 5000);
- if (nb->dev_info->prop.ECCEnabled == 1)
+ if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver)
+ /* Polling wait should be used instead of cudaStreamSynchronize only if:
+ * - ECC is ON & driver is old (checked above),
+ * - we're on x86/x86_64,
+ * - atomics are available, and
+ * - GPUs are not being shared.
+ */
+ bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
+
if (bStreamSync)
{
nb->bUseStreamSync = true;
/* only warn if polling should be used */
if (bStreamSync)
{
nb->bUseStreamSync = true;
/* only warn if polling should be used */
- if (bOldDriver && !gpu_info->bDevShare)
+ if (bShouldUsePollSync)
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
- /* Can/should turn of cudaStreamSynchronize wait only if
- * - we're on x86/x86_64
- * - atomics are available
- * - GPUs are not being shared
- * - and driver is old. */
- nb->bUseStreamSync =
- (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
- true : false;
-
- if (nb->bUseStreamSync)
+ nb->bUseStreamSync = !bShouldUsePollSync;
+
+ if (bShouldUsePollSync)
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
{
md_print_warn(fplog,
"NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
- " cause performance loss. Switching to the alternative polling GPU waiting.\n"
+ " cause performance loss. Switching to the alternative polling GPU wait.\n"
" If you encounter issues, switch back to standard GPU waiting by setting\n"
" the GMX_CUDA_STREAMSYNC environment variable.\n");
}
" If you encounter issues, switch back to standard GPU waiting by setting\n"
" the GMX_CUDA_STREAMSYNC environment variable.\n");
}
{
/* Tell the user that the ECC+old driver combination can be bad */
sprintf(sbuf,
{
/* Tell the user that the ECC+old driver combination can be bad */
sprintf(sbuf,
- "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
- " driver can cause performance loss.\n"
- " However, the polling waiting workaround can not be used because\n%s\n"
+ "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n"
+ " A known bug in this driver version can cause performance loss.\n"
+ " However, the polling wait workaround can not be used because\n%s\n"
" Consider updating the driver or turning ECC off.",
" Consider updating the driver or turning ECC off.",
- (!bX86 || !bTMPIAtomics) ?
- " atomic operations are not supported by the platform/CPU+compiler." :
- " GPU(s) are being oversubscribed.");
+ (bX86 && bTMPIAtomics) ?
+ " GPU(s) are being oversubscribed." :
+ " atomic operations are not supported by the platform/CPU+compiler.");
md_print_warn(fplog, sbuf);
}
}
md_print_warn(fplog, sbuf);
}
}