From 8437de319b8706b5651f774df32f74ff461ababf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Tue, 26 Jan 2021 16:33:16 +0000 Subject: [PATCH] Change default Ewald kernel to tabulated on NVIDIA CC 7.0 Partially addressed #3845 --- docs/release-notes/2021/major/performance.rst | 7 ++++++ .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 4 ++-- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 22 +++++++++++++------ src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h | 5 +++-- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 10 +++++---- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/release-notes/2021/major/performance.rst b/docs/release-notes/2021/major/performance.rst index 9c499fde53..06888c99c9 100644 --- a/docs/release-notes/2021/major/performance.rst +++ b/docs/release-notes/2021/major/performance.rst @@ -46,3 +46,10 @@ Allow offloading GPU update and constraints without direct GPU communication Allow domain-decomposition and separate PME rank parallel runs to offload update and constraints to a GPU with CUDA without requiring the (experimental) direct GPU communication features to be also enabled. + +Tune CUDA short-range nonbonded kernel parameters on NVIDIA Volta and Ampere A100 +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Recent compilers allowed re-tuning the nonbonded kernel defaults on NVIDIA Volta and +Ampere A100GPUs which improves performance of the Ewald kernels, especially those that +also compute energies. diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index b1d6774a26..bedabd85c8 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -193,7 +193,7 @@ static void init_nbparam(NBParamGpu* nbp, } else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) { - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); + nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, deviceContext.deviceInfo()); } else { diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index 0c3586f7d6..0af6e94e61 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -58,6 +58,7 @@ #include "nbnxm_gpu_data_mgmt.h" +#include "gromacs/hardware/device_information.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/timing/gpu_timing.h" #include "gromacs/utility/cstringutil.h" @@ -95,7 +96,8 @@ void inline printEnvironmentVariableDeprecationMessage(bool isEnvi } } -int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) +int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic, + const DeviceInformation gmx_unused& deviceInfo) { bool bTwinCut = (ic.rcoulomb != ic.rvdw); int kernel_type; @@ -129,13 +131,19 @@ int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) "requested through environment variables."); } - /* By default, use analytical Ewald - * TODO: tabulated does not work in OpenCL, it needs fixing, see init_nbparam() in nbnxn_ocl_data_mgmt.cpp - * + /* By default, use analytical Ewald except with CUDA on NVIDIA CC 7.0 and 8.0. */ - bool bUseAnalyticalEwald = true; + const bool c_useTabulatedEwaldDefault = +#if GMX_GPU_CUDA + (deviceInfo.prop.major == 7 && deviceInfo.prop.minor == 0) + || (deviceInfo.prop.major == 8 && deviceInfo.prop.minor == 0); +#else + false; +#endif + bool bUseAnalyticalEwald = !c_useTabulatedEwaldDefault; if (forceAnalyticalEwald) { + bUseAnalyticalEwald = true; if (debug) { fprintf(debug, "Using analytical Ewald GPU kernels\n"); @@ -198,7 +206,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); + nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, nb->deviceContext_->deviceInfo()); GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h index 761737ddf0..b417566db4 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team. - * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -68,7 +68,8 @@ void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, const DeviceContext& deviceContext); /*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */ -int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic); +int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic, + const DeviceInformation& deviceInfo); /*! \brief Copies all parameters related to the cut-off from ic to nbp */ diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index 29989c8095..ac99cf926d 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -132,7 +132,8 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceConte static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t* ic, int combRule, int* gpu_eeltype, - int* gpu_vdwtype) + int* gpu_vdwtype, + const DeviceContext& deviceContext) { if (ic->vdwtype == evdwCUT) { @@ -185,7 +186,7 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_ } else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) { - *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); + *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, deviceContext.deviceInfo()); } else { @@ -206,7 +207,8 @@ static void init_nbparam(NBParamGpu* nbp, { set_cutoff_parameters(nbp, ic, listParams); - map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype), &(nbp->vdwtype)); + map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype), + &(nbp->vdwtype), deviceContext); if (ic->vdwtype == evdwPME) { -- 2.22.0