From c04377fde97785e4fa02bf229b56a67345271be7 Mon Sep 17 00:00:00 2001
From: Andrey Alekseenko <al42and@gmail.com>
Date: Wed, 3 Nov 2021 20:08:15 +0000
Subject: [PATCH] Unify handling of GMX_ENABLE_GPU_TIMING and
 GMX_DISABLE_GPU_TIMING

---
 docs/user-guide/environment-variables.rst |  5 +++--
 src/gromacs/gpu_utils/gpu_utils.cpp       | 23 +++++++++++++++++++++++
 src/gromacs/gpu_utils/gpu_utils.h         |  8 ++++++++
 src/gromacs/mdrun/runner.cpp              | 21 +++------------------
 src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 13 +------------
 5 files changed, 38 insertions(+), 32 deletions(-)
diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst
index 1698fcedb2..1f323174e1 100644
--- a/docs/user-guide/environment-variables.rst
+++ b/docs/user-guide/environment-variables.rst
@@ -82,8 +82,8 @@ Output Control
         files. Set to 0 for quiet operation.
 
 ``GMX_ENABLE_GPU_TIMING``
-        Enables GPU timings in the log file for CUDA. Note that CUDA timings
-        are incorrect with multiple streams, as happens with domain
+        Enables GPU timings in the log file for CUDA and SYCL. Note that CUDA
+        timings are incorrect with multiple streams, as happens with domain
         decomposition or with both non-bondeds and PME on the GPU (this is
         also the main reason why they are not turned on by default).
 
@@ -211,6 +211,7 @@ Performance and Run Control
 ``GMX_DISABLE_GPU_TIMING``
         timing of asynchronously executed GPU operations can have a
         non-negligible overhead with short step times. Disabling timing can improve performance in these cases.
+        Timings are disabled by default with CUDA and SYCL.
 
 ``GMX_DISABLE_GPU_DETECTION``
         when set, disables GPU detection even if :ref:`gmx mdrun` was compiled
diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp
index 501d6b625f..2d8846caa3 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils.cpp
@@ -59,3 +59,26 @@ const char* enumValueToString(GpuApiCallBehavior enumValue)
     };
     return s_gpuApiCallBehaviorNames[enumValue];
 }
+
+bool decideGpuTimingsUsage()
+{
+    if (GMX_GPU_CUDA || GMX_GPU_SYCL)
+    {
+        /* CUDA: timings are incorrect with multiple streams.
+         * This is the main reason why they are disabled by default.
+         * TODO: Consider turning on by default when we can detect nr of streams.
+         *
+         * SYCL: compilers and runtimes change rapidly, so we disable timings by default
+         * to avoid any possible overhead. */
+        return (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
+    }
+    else if (GMX_GPU_OPENCL)
+    {
+        return (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
+    }
+    else
+    {
+        // CPU-only build
+        return false;
+    }
+}
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h
index f9c5353b74..c5651b6b60 100644
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -129,4 +129,12 @@ CUDA_FUNC_QUALIFIER
 void setupGpuDevicePeerAccess(const std::vector<int>& CUDA_FUNC_ARGUMENT(gpuIdsToUse),
                               const gmx::MDLogger&    CUDA_FUNC_ARGUMENT(mdlog)) CUDA_FUNC_TERM;
 
+/*! \brief Check the platform-defaults and environment variable to decide whether GPU timings
+ * should be enabled.
+ *
+ * Currently, timings are enabled for OpenCL, but disabled for CUDA and SYCL. This can be overridden
+ * by \c GMX_ENABLE_GPU_TIMING and \c GMX_DISABLE_GPU_TIMING environment variables.
+ */
+bool decideGpuTimingsUsage();
+
 #endif
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index 313b4ebb34..8fc6d89a72 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1403,22 +1403,6 @@ int Mdrunner::mdrunner()
     int                deviceId   = -1;
     DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
 
-    // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
-    bool useTiming = true;
-
-    if (GMX_GPU_CUDA)
-    {
-        /* WARNING: CUDA timings are incorrect with multiple streams.
-         *          This is the main reason why they are disabled by default.
-         */
-        // TODO: Consider turning on by default when we can detect nr of streams.
-        useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
-    }
-    else if (GMX_GPU_OPENCL)
-    {
-        useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
-    }
-
     // TODO Currently this is always built, yet DD partition code
     // checks if it is built before using it. Probably it should
     // become an MDModule that is made only when another module
@@ -1506,8 +1490,9 @@ int Mdrunner::mdrunner()
         {
             dd_setup_dlb_resource_sharing(cr, deviceId);
         }
-        deviceStreamManager = std::make_unique<DeviceStreamManager>(
-                *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
+        const bool useGpuTiming = decideGpuTimingsUsage();
+        deviceStreamManager     = std::make_unique<DeviceStreamManager>(
+                *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useGpuTiming);
     }
 
     // If the user chose a task assignment, give them some hints
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
index 874f9a614f..2263593cb8 100644
--- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
@@ -431,18 +431,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
     nb->timers = new Nbnxm::GpuTimers();
     snew(nb->timings, 1);
 
-    /* WARNING: CUDA timings are incorrect with multiple streams.
-     * This is the main reason why they are disabled by default.
-     * Can be enabled by setting GMX_ENABLE_GPU_TIMING environment variable.
-     * TODO: Consider turning on by default when we can detect nr of streams.
-     *
-     * OpenCL timing is enabled by default and can be disabled by
-     * GMX_DISABLE_GPU_TIMING environment variable.
-     *
-     * Timing is disabled in SYCL.
-     */
-    nb->bDoTime = (GMX_GPU_CUDA && (getenv("GMX_ENABLE_GPU_TIMING") != nullptr))
-                  || (GMX_GPU_OPENCL && (getenv("GMX_DISABLE_GPU_TIMING") == nullptr));
+    nb->bDoTime = decideGpuTimingsUsage();
 
     if (nb->bDoTime)
     {
-- 
2.22.0