to performance loss due to a known CUDA driver bug present in API v5.0 NVIDIA drivers (pre-30x.xx).
Cannot be set simultaneously with ``GMX_NO_CUDA_STREAMSYNC``.
+``GMX_DISABLE_CUDALAUNCH``
+ disable the use of the lower-latency cudaLaunchKernel API even when supported (CUDA >=v7.0).
+ Should only be used for benchmarking purposes.
+
``GMX_CYCLE_ALL``
times all code during runs. Incompatible with threads.
/*********************************/
+/* XXX switch between chevron and cudaLaunch (supported only in CUDA >=7.0)
+ -- only for benchmarking purposes */
+static const bool bUseCudaLaunchKernel =
+ (GMX_CUDA_VERSION >= 7000) && (getenv("GMX_DISABLE_CUDALAUNCH") == NULL);
/* XXX always/never run the energy/pruning kernels -- only for benchmarking purposes */
static bool always_ener = (getenv("GMX_GPU_ALWAYS_ENER") != NULL);
shmem);
}
- nb_kernel<<< dim_grid, dim_block, shmem, stream>>> (*adat, *nbp, *plist, bCalcFshift);
+ if (bUseCudaLaunchKernel)
+ {
+ gmx_unused void* kernel_args[4];
+ kernel_args[0] = adat;
+ kernel_args[1] = nbp;
+ kernel_args[2] = plist;
+ kernel_args[3] = &bCalcFshift;
+
+#if GMX_CUDA_VERSION >= 7000
+ cudaLaunchKernel((void *)nb_kernel, dim_grid, dim_block, kernel_args, shmem, stream);
+#endif
+ }
+ else
+ {
+ nb_kernel<<< dim_grid, dim_block, shmem, stream>>> (*adat, *nbp, *plist, bCalcFshift);
+ }
CU_LAUNCH_ERR("k_calc_nb");
if (bDoTime)