Disabled CUDA timings

author Berk Hess <hess@kth.se>

Fri, 24 Nov 2017 15:09:22 +0000 (16:09 +0100)

committer Aleksei Iupinov <a.yupinov@gmail.com>

Fri, 24 Nov 2017 17:03:47 +0000 (18:03 +0100)
author Berk Hess <hess@kth.se>
Fri, 24 Nov 2017 15:09:22 +0000 (16:09 +0100)
committer Aleksei Iupinov <a.yupinov@gmail.com>
Fri, 24 Nov 2017 17:03:47 +0000 (18:03 +0100)
diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst

index 4d6d780bf2175668d58df384850e3c153c3b79fc..4cbd001f1e9cf73a2f64147c2eb497d0a37e2585 100644 (file)
--- a/docs/user-guide/environment-variables.rst
+++ b/docs/user-guide/environment-variables.rst
@@ -78,6 +78,15 @@ Output Control
          Defaults to 1, which prints frame count e.g. when reading trajectory
          files. Set to 0 for quiet operation.
  
+``GMX_ENABLE_GPU_TIMING``
+        Enables GPU timings in the log file for CUDA. Note that CUDA timings
+        are incorrect with multiple streams, as happens with domain
+        decomposition or with both non-bondeds and PME on the GPU (this is
+        also the main reason why they are not turned on by default).
+
+``GMX_DISABLE_GPU_TIMING``
+        Disables GPU timings in the log file for OpenCL.
+
  Debugging
  ---------
  ``GMX_PRINT_DEBUG_LINES``
@@ -460,9 +469,6 @@ compilation of OpenCL kernels, but they are also used in device selection.
          Forces the use of twin-range cutoff kernel. Equivalent of
          CUDA environment variable ``GMX_CUDA_NB_EWALD_TWINCUT``
  
-``GMX_DISABLE_OCL_TIMING``
-        Disables timing for OpenCL operations
-
  ``GMX_OCL_FILE_PATH``
          Use this parameter to force |Gromacs| to load the OpenCL
          kernels from a custom location. Use it only if you want to
diff --git a/src/gromacs/ewald/pme.cu b/src/gromacs/ewald/pme.cu

index c7e3bc71a41a98647e456a9d67b784a217555566..63f3fc40c97a7e23e5831f5029cf1057905f71e7 100644 (file)
--- a/src/gromacs/ewald/pme.cu
+++ b/src/gromacs/ewald/pme.cu
@@ -441,11 +441,11 @@ void pme_gpu_init_internal(PmeGpu *pmeGPU)
       * TODO: PME could also try to pick up nice grid sizes (with factors of 2, 3, 5, 7).
       */
  
-    pmeGPU->archSpecific->useTiming = (getenv("GMX_DISABLE_CUDA_TIMING") == nullptr) &&
-        (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
-    /* TODO: multiple CUDA streams on same GPU cause nonsense cudaEvent_t timings.
-     * This should probably also check for gpuId exclusivity?
+    /* WARNING: CUDA timings are incorrect with multiple streams.
+     *          This is the main reason why they are disabled by default.
       */
+    // TODO: Consider turning on by default when we can detect nr of streams.
+    pmeGPU->archSpecific->useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
  
      /* Creating a PME CUDA stream */
      cudaError_t stat;
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index cd2f00b5e975b1b8ef29839b53642b0158a5fb37..fe86c52557037f991e11667d988d9dc6d6f04d4d 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -500,13 +500,11 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
      stat = cudaEventCreateWithFlags(&nb->misc_ops_and_local_H2D_done, cudaEventDisableTiming);
      CU_RET_ERR(stat, "cudaEventCreate on misc_ops_and_local_H2D_done failed");
  
-    /* CUDA timing disabled as event timers don't work:
-       - with multiple streams = domain-decomposition;
-       - when turned off by GMX_DISABLE_CUDA_TIMING/GMX_DISABLE_GPU_TIMING.
+    /* WARNING: CUDA timings are incorrect with multiple streams.
+     *          This is the main reason why they are disabled by default.
       */
-    nb->bDoTime = (!nb->bUseTwoStreams &&
-                   (getenv("GMX_DISABLE_CUDA_TIMING") == NULL) &&
-                   (getenv("GMX_DISABLE_GPU_TIMING") == NULL));
+    // TODO: Consider turning on by default when we can detect nr of streams.
+    nb->bDoTime = (getenv("GMX_ENABLE_GPU_TIMING") != NULL);
  
      if (nb->bDoTime)
      {
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index db5a61062582bef6806729f37acb5a84dd485fc7..559fcfd461595c555d6b5c7eca7993ac30f56987 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -764,10 +764,8 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
  
      init_plist(nb->plist[eintLocal]);
  
-    /* OpenCL timing disabled if GMX_DISABLE_OCL_TIMING is defined. */
-    /* TODO deprecate the first env var in the 2017 release. */
-    nb->bDoTime = (getenv("GMX_DISABLE_OCL_TIMING") == NULL &&
-                   getenv("GMX_DISABLE_GPU_TIMING") == NULL);
+    /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
+    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == NULL);
  
      /* Create queues only after bDoTime has been initialized */
      if (nb->bDoTime)
author	Berk Hess <hess@kth.se>
	Fri, 24 Nov 2017 15:09:22 +0000 (16:09 +0100)
committer	Aleksei Iupinov <a.yupinov@gmail.com>
	Fri, 24 Nov 2017 17:03:47 +0000 (18:03 +0100)
docs/user-guide/environment-variables.rst		patch \| blob \| history
src/gromacs/ewald/pme.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history