From c04377fde97785e4fa02bf229b56a67345271be7 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Wed, 3 Nov 2021 20:08:15 +0000 Subject: [PATCH] Unify handling of GMX_ENABLE_GPU_TIMING and GMX_DISABLE_GPU_TIMING --- docs/user-guide/environment-variables.rst | 5 +++-- src/gromacs/gpu_utils/gpu_utils.cpp | 23 +++++++++++++++++++++++ src/gromacs/gpu_utils/gpu_utils.h | 8 ++++++++ src/gromacs/mdrun/runner.cpp | 21 +++------------------ src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 13 +------------ 5 files changed, 38 insertions(+), 32 deletions(-) diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst index 1698fcedb2..1f323174e1 100644 --- a/docs/user-guide/environment-variables.rst +++ b/docs/user-guide/environment-variables.rst @@ -82,8 +82,8 @@ Output Control files. Set to 0 for quiet operation. ``GMX_ENABLE_GPU_TIMING`` - Enables GPU timings in the log file for CUDA. Note that CUDA timings - are incorrect with multiple streams, as happens with domain + Enables GPU timings in the log file for CUDA and SYCL. Note that CUDA + timings are incorrect with multiple streams, as happens with domain decomposition or with both non-bondeds and PME on the GPU (this is also the main reason why they are not turned on by default). @@ -211,6 +211,7 @@ Performance and Run Control ``GMX_DISABLE_GPU_TIMING`` timing of asynchronously executed GPU operations can have a non-negligible overhead with short step times. Disabling timing can improve performance in these cases. + Timings are disabled by default with CUDA and SYCL. ``GMX_DISABLE_GPU_DETECTION`` when set, disables GPU detection even if :ref:`gmx mdrun` was compiled diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp index 501d6b625f..2d8846caa3 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cpp +++ b/src/gromacs/gpu_utils/gpu_utils.cpp @@ -59,3 +59,26 @@ const char* enumValueToString(GpuApiCallBehavior enumValue) }; return s_gpuApiCallBehaviorNames[enumValue]; } + +bool decideGpuTimingsUsage() +{ + if (GMX_GPU_CUDA || GMX_GPU_SYCL) + { + /* CUDA: timings are incorrect with multiple streams. + * This is the main reason why they are disabled by default. + * TODO: Consider turning on by default when we can detect nr of streams. + * + * SYCL: compilers and runtimes change rapidly, so we disable timings by default + * to avoid any possible overhead. */ + return (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); + } + else if (GMX_GPU_OPENCL) + { + return (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); + } + else + { + // CPU-only build + return false; + } +} diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h index f9c5353b74..c5651b6b60 100644 --- a/src/gromacs/gpu_utils/gpu_utils.h +++ b/src/gromacs/gpu_utils/gpu_utils.h @@ -129,4 +129,12 @@ CUDA_FUNC_QUALIFIER void setupGpuDevicePeerAccess(const std::vector& CUDA_FUNC_ARGUMENT(gpuIdsToUse), const gmx::MDLogger& CUDA_FUNC_ARGUMENT(mdlog)) CUDA_FUNC_TERM; +/*! \brief Check the platform-defaults and environment variable to decide whether GPU timings + * should be enabled. + * + * Currently, timings are enabled for OpenCL, but disabled for CUDA and SYCL. This can be overridden + * by \c GMX_ENABLE_GPU_TIMING and \c GMX_DISABLE_GPU_TIMING environment variables. + */ +bool decideGpuTimingsUsage(); + #endif diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 313b4ebb34..8fc6d89a72 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1403,22 +1403,6 @@ int Mdrunner::mdrunner() int deviceId = -1; DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId); - // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?) - bool useTiming = true; - - if (GMX_GPU_CUDA) - { - /* WARNING: CUDA timings are incorrect with multiple streams. - * This is the main reason why they are disabled by default. - */ - // TODO: Consider turning on by default when we can detect nr of streams. - useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); - } - else if (GMX_GPU_OPENCL) - { - useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); - } - // TODO Currently this is always built, yet DD partition code // checks if it is built before using it. Probably it should // become an MDModule that is made only when another module @@ -1506,8 +1490,9 @@ int Mdrunner::mdrunner() { dd_setup_dlb_resource_sharing(cr, deviceId); } - deviceStreamManager = std::make_unique( - *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming); + const bool useGpuTiming = decideGpuTimingsUsage(); + deviceStreamManager = std::make_unique( + *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useGpuTiming); } // If the user chose a task assignment, give them some hints diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index 874f9a614f..2263593cb8 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -431,18 +431,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, nb->timers = new Nbnxm::GpuTimers(); snew(nb->timings, 1); - /* WARNING: CUDA timings are incorrect with multiple streams. - * This is the main reason why they are disabled by default. - * Can be enabled by setting GMX_ENABLE_GPU_TIMING environment variable. - * TODO: Consider turning on by default when we can detect nr of streams. - * - * OpenCL timing is enabled by default and can be disabled by - * GMX_DISABLE_GPU_TIMING environment variable. - * - * Timing is disabled in SYCL. - */ - nb->bDoTime = (GMX_GPU_CUDA && (getenv("GMX_ENABLE_GPU_TIMING") != nullptr)) - || (GMX_GPU_OPENCL && (getenv("GMX_DISABLE_GPU_TIMING") == nullptr)); + nb->bDoTime = decideGpuTimingsUsage(); if (nb->bDoTime) { -- 2.22.0