Allow domain-decomposition and separate PME rank parallel runs to offload update and
constraints to a GPU with CUDA without requiring the (experimental) direct GPU
communication features to be also enabled.
+
+Tune CUDA short-range nonbonded kernel parameters on NVIDIA Volta and Ampere A100
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Recent compilers allowed re-tuning the nonbonded kernel defaults on NVIDIA Volta and
+Ampere A100GPUs which improves performance of the Ewald kernels, especially those that
+also compute energies.
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
{
- nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
+ nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, deviceContext.deviceInfo());
}
else
{
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "nbnxm_gpu_data_mgmt.h"
+#include "gromacs/hardware/device_information.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/utility/cstringutil.h"
}
}
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
+int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
+ const DeviceInformation gmx_unused& deviceInfo)
{
bool bTwinCut = (ic.rcoulomb != ic.rvdw);
int kernel_type;
"requested through environment variables.");
}
- /* By default, use analytical Ewald
- * TODO: tabulated does not work in OpenCL, it needs fixing, see init_nbparam() in nbnxn_ocl_data_mgmt.cpp
- *
+ /* By default, use analytical Ewald except with CUDA on NVIDIA CC 7.0 and 8.0.
*/
- bool bUseAnalyticalEwald = true;
+ const bool c_useTabulatedEwaldDefault =
+#if GMX_GPU_CUDA
+ (deviceInfo.prop.major == 7 && deviceInfo.prop.minor == 0)
+ || (deviceInfo.prop.major == 8 && deviceInfo.prop.minor == 0);
+#else
+ false;
+#endif
+ bool bUseAnalyticalEwald = !c_useTabulatedEwaldDefault;
if (forceAnalyticalEwald)
{
+ bUseAnalyticalEwald = true;
if (debug)
{
fprintf(debug, "Using analytical Ewald GPU kernels\n");
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
- nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
+ nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, nb->deviceContext_->deviceInfo());
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
const DeviceContext& deviceContext);
/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic);
+int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic,
+ const DeviceInformation& deviceInfo);
/*! \brief Copies all parameters related to the cut-off from ic to nbp
*/
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t* ic,
int combRule,
int* gpu_eeltype,
- int* gpu_vdwtype)
+ int* gpu_vdwtype,
+ const DeviceContext& deviceContext)
{
if (ic->vdwtype == evdwCUT)
{
}
else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
{
- *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
+ *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic, deviceContext.deviceInfo());
}
else
{
{
set_cutoff_parameters(nbp, ic, listParams);
- map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype), &(nbp->vdwtype));
+ map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype),
+ &(nbp->vdwtype), deviceContext);
if (ic->vdwtype == evdwPME)
{