From bd2b668ee0a42d1e328ac86400df7793132f8ac3 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Fri, 24 Nov 2017 16:09:22 +0100 Subject: [PATCH] Disabled CUDA timings CUDA timings are incorrect with multiple streams and currently we can not query for other streams in the non-bonded or PME module, so disabled them by default. Added a GMX_ENABLE_GPU_TIMING env.var. Removed deprecated env.vars. Change-Id: I55ab98d7fea8fa90782e8346ad73b2d2a2171a1d --- docs/user-guide/environment-variables.rst | 12 +++++++++--- src/gromacs/ewald/pme.cu | 8 ++++---- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu | 10 ++++------ src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp | 6 ++---- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst index 4d6d780bf2..4cbd001f1e 100644 --- a/docs/user-guide/environment-variables.rst +++ b/docs/user-guide/environment-variables.rst @@ -78,6 +78,15 @@ Output Control Defaults to 1, which prints frame count e.g. when reading trajectory files. Set to 0 for quiet operation. +``GMX_ENABLE_GPU_TIMING`` + Enables GPU timings in the log file for CUDA. Note that CUDA timings + are incorrect with multiple streams, as happens with domain + decomposition or with both non-bondeds and PME on the GPU (this is + also the main reason why they are not turned on by default). + +``GMX_DISABLE_GPU_TIMING`` + Disables GPU timings in the log file for OpenCL. + Debugging --------- ``GMX_PRINT_DEBUG_LINES`` @@ -460,9 +469,6 @@ compilation of OpenCL kernels, but they are also used in device selection. Forces the use of twin-range cutoff kernel. Equivalent of CUDA environment variable ``GMX_CUDA_NB_EWALD_TWINCUT`` -``GMX_DISABLE_OCL_TIMING`` - Disables timing for OpenCL operations - ``GMX_OCL_FILE_PATH`` Use this parameter to force |Gromacs| to load the OpenCL kernels from a custom location. Use it only if you want to diff --git a/src/gromacs/ewald/pme.cu b/src/gromacs/ewald/pme.cu index c7e3bc71a4..63f3fc40c9 100644 --- a/src/gromacs/ewald/pme.cu +++ b/src/gromacs/ewald/pme.cu @@ -441,11 +441,11 @@ void pme_gpu_init_internal(PmeGpu *pmeGPU) * TODO: PME could also try to pick up nice grid sizes (with factors of 2, 3, 5, 7). */ - pmeGPU->archSpecific->useTiming = (getenv("GMX_DISABLE_CUDA_TIMING") == nullptr) && - (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); - /* TODO: multiple CUDA streams on same GPU cause nonsense cudaEvent_t timings. - * This should probably also check for gpuId exclusivity? + /* WARNING: CUDA timings are incorrect with multiple streams. + * This is the main reason why they are disabled by default. */ + // TODO: Consider turning on by default when we can detect nr of streams. + pmeGPU->archSpecific->useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); /* Creating a PME CUDA stream */ cudaError_t stat; diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu index cd2f00b5e9..fe86c52557 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu @@ -500,13 +500,11 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t **p_nb, stat = cudaEventCreateWithFlags(&nb->misc_ops_and_local_H2D_done, cudaEventDisableTiming); CU_RET_ERR(stat, "cudaEventCreate on misc_ops_and_local_H2D_done failed"); - /* CUDA timing disabled as event timers don't work: - - with multiple streams = domain-decomposition; - - when turned off by GMX_DISABLE_CUDA_TIMING/GMX_DISABLE_GPU_TIMING. + /* WARNING: CUDA timings are incorrect with multiple streams. + * This is the main reason why they are disabled by default. */ - nb->bDoTime = (!nb->bUseTwoStreams && - (getenv("GMX_DISABLE_CUDA_TIMING") == NULL) && - (getenv("GMX_DISABLE_GPU_TIMING") == NULL)); + // TODO: Consider turning on by default when we can detect nr of streams. + nb->bDoTime = (getenv("GMX_ENABLE_GPU_TIMING") != NULL); if (nb->bDoTime) { diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp index db5a610625..559fcfd461 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp @@ -764,10 +764,8 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb, init_plist(nb->plist[eintLocal]); - /* OpenCL timing disabled if GMX_DISABLE_OCL_TIMING is defined. */ - /* TODO deprecate the first env var in the 2017 release. */ - nb->bDoTime = (getenv("GMX_DISABLE_OCL_TIMING") == NULL && - getenv("GMX_DISABLE_GPU_TIMING") == NULL); + /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */ + nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == NULL); /* Create queues only after bDoTime has been initialized */ if (nb->bDoTime) -- 2.22.0