/* We should actually be using md_print_warn in md_logging.c,
* but we can't include mpi.h in CUDA code.
*/
-static void md_print_warn(FILE *fplog, const char *buf)
+static void md_print_warn(FILE *fplog,
+ const char *fmt, ...)
{
+ va_list ap;
+
if (fplog != NULL)
{
/* We should only print to stderr on the master node,
* in most cases fplog is only set on the master node, so this works.
*/
- fprintf(stderr, "\n%s\n", buf);
- fprintf(fplog, "\n%s\n", buf);
+ va_start(ap, fmt);
+ fprintf(stderr, "\n");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+
+ va_start(ap, fmt);
+ fprintf(fplog, "\n");
+ vfprintf(fplog, fmt, ap);
+ fprintf(fplog, "\n");
+ va_end(ap);
}
}
+
/* Fw. decl. */
static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb);
}
/* Decide which kernel version to use (default or legacy) based on:
- * - CUDA version
+ * - CUDA version used for compilation
* - non-bonded kernel selector environment variables
- * - GPU SM version TODO ???
+ * - GPU architecture version
*/
-static int pick_nbnxn_kernel_version()
+static int pick_nbnxn_kernel_version(FILE *fplog,
+ cuda_dev_info_t *devinfo)
{
- bool bLegacyKernel, bDefaultKernel, bCUDA40, bCUDA32;
+ bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32;
char sbuf[STRLEN];
int kver;
- /* legacy kernel (former k2), kept for now for backward compatibility,
- faster than the default with CUDA 3.2/4.0 (TODO: on Kepler?). */
- bLegacyKernel = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
+ /* Legacy kernel (former k2), kept for backward compatibility as it is
+ faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */
+ bForceLegacyKernel = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
/* default kernel (former k3). */
- bDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
+ bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
- if ((unsigned)(bLegacyKernel + bDefaultKernel) > 1)
+ if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1)
{
gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n"
"of the following environment variables: \n"
/* default is default ;) */
kver = eNbnxnCuKDefault;
- if (bCUDA32 || bCUDA40)
+ /* Consider switching to legacy kernels only on Fermi */
+ if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40))
{
/* use legacy kernel unless something else is forced by an env. var */
- if (bDefaultKernel)
+ if (bForceDefaultKernel)
{
- fprintf(stderr,
- "\nNOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
- " non-bonded kernels perform best. However, the default kernels were\n"
- " selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
- " For best performance upgrade your CUDA toolkit.",
- sbuf);
+ md_print_warn(fplog,
+ "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
+ " non-bonded kernels perform best. However, the default kernels were\n"
+ " selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
+ " For best performance upgrade your CUDA toolkit.\n",
+ sbuf);
}
else
{
}
else
{
- /* issue not if the non-default kernel is forced by an env. var */
- if (bLegacyKernel)
+ /* issue note if the non-default kernel is forced by an env. var */
+ if (bForceLegacyKernel)
{
- fprintf(stderr,
- "\nNOTE: Legacy non-bonded CUDA kernels were selected by the GMX_CUDA_NB_LEGACY\n"
+ md_print_warn(fplog,
+ "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n"
" env. var. Consider using using the default kernels which should be faster!\n");
kver = eNbnxnCuKLegacy;
}
/* set the kernel type for the current GPU */
- nb->kernel_ver = pick_nbnxn_kernel_version();
+ nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info);
/* pick L1 cache configuration */
nbnxn_cuda_set_cacheconfig(nb->dev_info);