fix unnecessary switch to polling GPU wait

author Szilárd Páll <pszilard@cbr.su.se>

Thu, 7 Mar 2013 02:50:47 +0000 (03:50 +0100)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Thu, 30 May 2013 09:26:28 +0000 (11:26 +0200)
author Szilárd Páll <pszilard@cbr.su.se>
Thu, 7 Mar 2013 02:50:47 +0000 (03:50 +0100)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 30 May 2013 09:26:28 +0000 (11:26 +0200)
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index a7ef47ce2e301396c1ed2086b9fdce22c12d7df6..882063167495fbd1775ad6658c0a52577df759d5 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -549,16 +549,25 @@ void nbnxn_cuda_init(FILE *fplog,
  
      stat = cudaDriverGetVersion(&cuda_drv_ver);
      CU_RET_ERR(stat, "cudaDriverGetVersion failed");
+
      bOldDriver = (cuda_drv_ver < 5000);
  
-    if (nb->dev_info->prop.ECCEnabled == 1)
+    if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver)
      {
+        /* Polling wait should be used instead of cudaStreamSynchronize only if:
+         *   - ECC is ON & driver is old (checked above),
+         *   - we're on x86/x86_64,
+         *   - atomics are available, and
+         *   - GPUs are not being shared.
+         */
+        bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
+
          if (bStreamSync)
          {
              nb->bUseStreamSync = true;
  
              /* only warn if polling should be used */
-            if (bOldDriver && !gpu_info->bDevShare)
+            if (bShouldUsePollSync)
              {
                  md_print_warn(fplog,
                                "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
@@ -567,34 +576,27 @@ void nbnxn_cuda_init(FILE *fplog,
          }
          else
          {
-            /* Can/should turn of cudaStreamSynchronize wait only if
-             *   - we're on x86/x86_64
-             *   - atomics are available
-             *   - GPUs are not being shared
-             *   - and driver is old. */
-            nb->bUseStreamSync =
-                (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
-                true : false;
-
-            if (nb->bUseStreamSync)
+            nb->bUseStreamSync = !bShouldUsePollSync;
+
+            if (bShouldUsePollSync)
              {
                  md_print_warn(fplog,
                                "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
-                              "      cause performance loss. Switching to the alternative polling GPU waiting.\n"
+                              "      cause performance loss. Switching to the alternative polling GPU wait.\n"
                                "      If you encounter issues, switch back to standard GPU waiting by setting\n"
                                "      the GMX_CUDA_STREAMSYNC environment variable.\n");
              }
-            else if (bOldDriver)
+            else
              {
                  /* Tell the user that the ECC+old driver combination can be bad */
                  sprintf(sbuf,
-                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
-                        "      driver can cause performance loss.\n"
-                        "      However, the polling waiting workaround can not be used because\n%s\n"
+                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n"
+                        "      A known bug in this driver version can cause performance loss.\n"
+                        "      However, the polling wait workaround can not be used because\n%s\n"
                          "      Consider updating the driver or turning ECC off.",
-                        (!bX86 || !bTMPIAtomics) ?
-                           "         atomic operations are not supported by the platform/CPU+compiler." :
-                           "         GPU(s) are being oversubscribed.");
+                        (bX86 && bTMPIAtomics) ?
+                            "      GPU(s) are being oversubscribed." :
+                            "      atomic operations are not supported by the platform/CPU+compiler.");
                  md_print_warn(fplog, sbuf);
              }
          }
author	Szilárd Páll <pszilard@cbr.su.se>
	Thu, 7 Mar 2013 02:50:47 +0000 (03:50 +0100)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Thu, 30 May 2013 09:26:28 +0000 (11:26 +0200)