use legacy CUDA non-bonded kernels only on Fermi

author Szilárd Páll <pszilard@cbr.su.se>

Wed, 16 Jan 2013 20:05:28 +0000 (21:05 +0100)

committer Szilard Pall <pszilard@cbr.su.se>

Sun, 10 Feb 2013 16:45:36 +0000 (17:45 +0100)
author Szilárd Páll <pszilard@cbr.su.se>
Wed, 16 Jan 2013 20:05:28 +0000 (21:05 +0100)
committer Szilard Pall <pszilard@cbr.su.se>
Sun, 10 Feb 2013 16:45:36 +0000 (17:45 +0100)
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index d5074d2f38ae6120d2b337cd3c4f1f01876639c9..3107e25460782f5cc707867d1361dca285b8c80a 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -76,18 +76,31 @@ extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_c
  /* We should actually be using md_print_warn in md_logging.c,
   * but we can't include mpi.h in CUDA code.
   */
-static void md_print_warn(FILE *fplog, const char *buf)
+static void md_print_warn(FILE       *fplog,
+                          const char *fmt, ...)
  {
+    va_list ap;
+
      if (fplog != NULL)
      {
          /* We should only print to stderr on the master node,
           * in most cases fplog is only set on the master node, so this works.
           */
-        fprintf(stderr, "\n%s\n", buf);
-        fprintf(fplog,  "\n%s\n", buf);
+        va_start(ap, fmt);
+        fprintf(stderr, "\n");
+        vfprintf(stderr, fmt, ap);
+        fprintf(stderr, "\n");
+        va_end(ap);
+
+        va_start(ap, fmt);
+        fprintf(fplog, "\n");
+        vfprintf(fplog, fmt, ap);
+        fprintf(fplog, "\n");
+        va_end(ap);
      }
  }
  
+
  /* Fw. decl. */
  static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb);
  
@@ -336,23 +349,24 @@ static void init_timings(wallclock_gpu_t *t)
  }
  
  /* Decide which kernel version to use (default or legacy) based on:
- *  - CUDA version
+ *  - CUDA version used for compilation
   *  - non-bonded kernel selector environment variables
- *  - GPU SM version TODO ???
+ *  - GPU architecture version
   */
-static int pick_nbnxn_kernel_version()
+static int pick_nbnxn_kernel_version(FILE            *fplog,
+                                     cuda_dev_info_t *devinfo)
  {
-    bool bLegacyKernel, bDefaultKernel, bCUDA40, bCUDA32;
+    bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32;
      char sbuf[STRLEN];
      int  kver;
  
-    /* legacy kernel (former k2), kept for now for backward compatibility,
-       faster than the default with  CUDA 3.2/4.0 (TODO: on Kepler?). */
-    bLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
+    /* Legacy kernel (former k2), kept for backward compatibility as it is
+       faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */
+    bForceLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
      /* default kernel (former k3). */
-    bDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
+    bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
  
-    if ((unsigned)(bLegacyKernel + bDefaultKernel) > 1)
+    if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1)
      {
          gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n"
                    "of the following environment variables: \n"
@@ -371,17 +385,18 @@ static int pick_nbnxn_kernel_version()
      /* default is default ;) */
      kver = eNbnxnCuKDefault;
  
-    if (bCUDA32 || bCUDA40)
+    /* Consider switching to legacy kernels only on Fermi */
+    if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40))
      {
          /* use legacy kernel unless something else is forced by an env. var */
-        if (bDefaultKernel)
+        if (bForceDefaultKernel)
          {
-            fprintf(stderr,
-                    "\nNOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
-                    "      non-bonded kernels perform best. However, the default kernels were\n"
-                    "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
-                    "      For best performance upgrade your CUDA toolkit.",
-                    sbuf);
+            md_print_warn(fplog,
+                          "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
+                          "      non-bonded kernels perform best. However, the default kernels were\n"
+                          "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
+                          "      For best performance upgrade your CUDA toolkit.\n",
+                          sbuf);
          }
          else
          {
@@ -390,11 +405,11 @@ static int pick_nbnxn_kernel_version()
      }
      else
      {
-        /* issue not if the non-default kernel is forced by an env. var */
-        if (bLegacyKernel)
+        /* issue note if the non-default kernel is forced by an env. var */
+        if (bForceLegacyKernel)
          {
-            fprintf(stderr,
-                    "\nNOTE: Legacy non-bonded CUDA kernels were selected by the GMX_CUDA_NB_LEGACY\n"
+            md_print_warn(fplog,
+                    "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n"
                      "      env. var. Consider using using the default kernels which should be faster!\n");
  
              kver = eNbnxnCuKLegacy;
@@ -577,7 +592,7 @@ void nbnxn_cuda_init(FILE *fplog,
      }
  
      /* set the kernel type for the current GPU */
-    nb->kernel_ver = pick_nbnxn_kernel_version();
+    nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info);
      /* pick L1 cache configuration */
      nbnxn_cuda_set_cacheconfig(nb->dev_info);
author	Szilárd Páll <pszilard@cbr.su.se>
	Wed, 16 Jan 2013 20:05:28 +0000 (21:05 +0100)
committer	Szilard Pall <pszilard@cbr.su.se>
	Sun, 10 Feb 2013 16:45:36 +0000 (17:45 +0100)