From: Szilárd Páll <pszilard@cbr.su.se>
Date: Wed, 16 Jan 2013 20:05:28 +0000 (+0100)
Subject: use legacy CUDA non-bonded kernels only on Fermi
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=e279fc25a95055119a8ed5272ee327d5452bedf4;p=alexxy%2Fgromacs.git

use legacy CUDA non-bonded kernels only on Fermi

This solves a remainig todo as well as clarifies some variable naming
and documentation.

Change-Id: Ifad80cb7e740d1329a9c84df992d899400efeca2
---

diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
index d5074d2f38..3107e25460 100644
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -76,18 +76,31 @@ extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_c
 /* We should actually be using md_print_warn in md_logging.c,
  * but we can't include mpi.h in CUDA code.
  */
-static void md_print_warn(FILE *fplog, const char *buf)
+static void md_print_warn(FILE       *fplog,
+                          const char *fmt, ...)
 {
+    va_list ap;
+
     if (fplog != NULL)
     {
         /* We should only print to stderr on the master node,
          * in most cases fplog is only set on the master node, so this works.
          */
-        fprintf(stderr, "\n%s\n", buf);
-        fprintf(fplog,  "\n%s\n", buf);
+        va_start(ap, fmt);
+        fprintf(stderr, "\n");
+        vfprintf(stderr, fmt, ap);
+        fprintf(stderr, "\n");
+        va_end(ap);
+
+        va_start(ap, fmt);
+        fprintf(fplog, "\n");
+        vfprintf(fplog, fmt, ap);
+        fprintf(fplog, "\n");
+        va_end(ap);
     }
 }
 
+
 /* Fw. decl. */
 static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb);
 
@@ -336,23 +349,24 @@ static void init_timings(wallclock_gpu_t *t)
 }
 
 /* Decide which kernel version to use (default or legacy) based on:
- *  - CUDA version
+ *  - CUDA version used for compilation
  *  - non-bonded kernel selector environment variables
- *  - GPU SM version TODO ???
+ *  - GPU architecture version
  */
-static int pick_nbnxn_kernel_version()
+static int pick_nbnxn_kernel_version(FILE            *fplog,
+                                     cuda_dev_info_t *devinfo)
 {
-    bool bLegacyKernel, bDefaultKernel, bCUDA40, bCUDA32;
+    bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32;
     char sbuf[STRLEN];
     int  kver;
 
-    /* legacy kernel (former k2), kept for now for backward compatibility,
-       faster than the default with  CUDA 3.2/4.0 (TODO: on Kepler?). */
-    bLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
+    /* Legacy kernel (former k2), kept for backward compatibility as it is
+       faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */
+    bForceLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
     /* default kernel (former k3). */
-    bDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
+    bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
 
-    if ((unsigned)(bLegacyKernel + bDefaultKernel) > 1)
+    if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1)
     {
         gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n"
                   "of the following environment variables: \n"
@@ -371,17 +385,18 @@ static int pick_nbnxn_kernel_version()
     /* default is default ;) */
     kver = eNbnxnCuKDefault;
 
-    if (bCUDA32 || bCUDA40)
+    /* Consider switching to legacy kernels only on Fermi */
+    if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40))
     {
         /* use legacy kernel unless something else is forced by an env. var */
-        if (bDefaultKernel)
+        if (bForceDefaultKernel)
         {
-            fprintf(stderr,
-                    "\nNOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
-                    "      non-bonded kernels perform best. However, the default kernels were\n"
-                    "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
-                    "      For best performance upgrade your CUDA toolkit.",
-                    sbuf);
+            md_print_warn(fplog,
+                          "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
+                          "      non-bonded kernels perform best. However, the default kernels were\n"
+                          "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
+                          "      For best performance upgrade your CUDA toolkit.\n",
+                          sbuf);
         }
         else
         {
@@ -390,11 +405,11 @@ static int pick_nbnxn_kernel_version()
     }
     else
     {
-        /* issue not if the non-default kernel is forced by an env. var */
-        if (bLegacyKernel)
+        /* issue note if the non-default kernel is forced by an env. var */
+        if (bForceLegacyKernel)
         {
-            fprintf(stderr,
-                    "\nNOTE: Legacy non-bonded CUDA kernels were selected by the GMX_CUDA_NB_LEGACY\n"
+            md_print_warn(fplog,
+                    "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n"
                     "      env. var. Consider using using the default kernels which should be faster!\n");
 
             kver = eNbnxnCuKLegacy;
@@ -577,7 +592,7 @@ void nbnxn_cuda_init(FILE *fplog,
     }
 
     /* set the kernel type for the current GPU */
-    nb->kernel_ver = pick_nbnxn_kernel_version();
+    nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info);
     /* pick L1 cache configuration */
     nbnxn_cuda_set_cacheconfig(nb->dev_info);