fix unnecessary switch to polling GPU wait
[alexxy/gromacs.git] / src / mdlib / nbnxn_cuda / nbnxn_cuda_data_mgmt.cu
index a7ef47ce2e301396c1ed2086b9fdce22c12d7df6..882063167495fbd1775ad6658c0a52577df759d5 100644 (file)
@@ -549,16 +549,25 @@ void nbnxn_cuda_init(FILE *fplog,
 
     stat = cudaDriverGetVersion(&cuda_drv_ver);
     CU_RET_ERR(stat, "cudaDriverGetVersion failed");
+
     bOldDriver = (cuda_drv_ver < 5000);
 
-    if (nb->dev_info->prop.ECCEnabled == 1)
+    if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver)
     {
+        /* Polling wait should be used instead of cudaStreamSynchronize only if:
+         *   - ECC is ON & driver is old (checked above),
+         *   - we're on x86/x86_64,
+         *   - atomics are available, and
+         *   - GPUs are not being shared.
+         */
+        bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
+
         if (bStreamSync)
         {
             nb->bUseStreamSync = true;
 
             /* only warn if polling should be used */
-            if (bOldDriver && !gpu_info->bDevShare)
+            if (bShouldUsePollSync)
             {
                 md_print_warn(fplog,
                               "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
@@ -567,34 +576,27 @@ void nbnxn_cuda_init(FILE *fplog,
         }
         else
         {
-            /* Can/should turn of cudaStreamSynchronize wait only if
-             *   - we're on x86/x86_64
-             *   - atomics are available
-             *   - GPUs are not being shared
-             *   - and driver is old. */
-            nb->bUseStreamSync =
-                (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
-                true : false;
-
-            if (nb->bUseStreamSync)
+            nb->bUseStreamSync = !bShouldUsePollSync;
+
+            if (bShouldUsePollSync)
             {
                 md_print_warn(fplog,
                               "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
-                              "      cause performance loss. Switching to the alternative polling GPU waiting.\n"
+                              "      cause performance loss. Switching to the alternative polling GPU wait.\n"
                               "      If you encounter issues, switch back to standard GPU waiting by setting\n"
                               "      the GMX_CUDA_STREAMSYNC environment variable.\n");
             }
-            else if (bOldDriver)
+            else
             {
                 /* Tell the user that the ECC+old driver combination can be bad */
                 sprintf(sbuf,
-                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
-                        "      driver can cause performance loss.\n"
-                        "      However, the polling waiting workaround can not be used because\n%s\n"
+                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n"
+                        "      A known bug in this driver version can cause performance loss.\n"
+                        "      However, the polling wait workaround can not be used because\n%s\n"
                         "      Consider updating the driver or turning ECC off.",
-                        (!bX86 || !bTMPIAtomics) ?
-                           "         atomic operations are not supported by the platform/CPU+compiler." :
-                           "         GPU(s) are being oversubscribed.");
+                        (bX86 && bTMPIAtomics) ?
+                            "      GPU(s) are being oversubscribed." :
+                            "      atomic operations are not supported by the platform/CPU+compiler.");
                 md_print_warn(fplog, sbuf);
             }
         }