automatically setting booleans. GMX_BUILD_HELP and GMX_HWLOC are now
disabled by default, while GMX_LOAD_PLUGINS is enabled by default.
+Unification of several CUDA and OpenCL environment variables
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The environment variables that had exactly the same meaning in OpenCL and CUDA were unified:
+
+* GMX_CUDA_NB_ANA_EWALD and GMX_OCL_NB_ANA_EWALD into GMX_GPU_NB_ANA_EWALD
+* GMX_CUDA_NB_TAB_EWALD and GMX_OCL_NB_TAB_EWALD into GMX_GPU_NB_TAB_EWALD
+* GMX_CUDA_NB_EWALD_TWINCUT and GMX_OCL_NB_EWALD_TWINCUT into GMX_GPU_NB_EWALD_TWINCUT
to localized bonded interaction distribution; optimal value dependent on
system and hardware, default value is 4.
-``GMX_CUDA_NB_EWALD_TWINCUT``
+``GMX_GPU_NB_EWALD_TWINCUT``
force the use of twin-range cutoff kernel even if :mdp:`rvdw` equals
:mdp:`rcoulomb` after PP-PME load balancing. The switch to twin-range kernels is automated,
so this variable should be used only for benchmarking.
-``GMX_CUDA_NB_ANA_EWALD``
+``GMX_GPU_NB_ANA_EWALD``
force the use of analytical Ewald kernels. Should be used only for benchmarking.
-``GMX_CUDA_NB_TAB_EWALD``
+``GMX_GPU_NB_TAB_EWALD``
force the use of tabulated Ewald kernels. Should be used only for benchmarking.
``GMX_DISABLE_CUDA_TIMING``
Enables i-atom data (type or LJ parameter) prefetch allowing
testing on platforms where this behavior is not default.
-``GMX_OCL_NB_ANA_EWALD``
- Forces the use of analytical Ewald kernels. Equivalent of
- CUDA environment variable ``GMX_CUDA_NB_ANA_EWALD``
-
-``GMX_OCL_NB_TAB_EWALD``
- Forces the use of tabulated Ewald kernel. Equivalent
- of CUDA environment variable ``GMX_OCL_NB_TAB_EWALD``
-
-``GMX_OCL_NB_EWALD_TWINCUT``
- Forces the use of twin-range cutoff kernel. Equivalent of
- CUDA environment variable ``GMX_CUDA_NB_EWALD_TWINCUT``
-
``GMX_OCL_FILE_PATH``
Use this parameter to force |Gromacs| to load the OpenCL
kernels from a custom location. Use it only if you want to
if(GMX_USE_CUDA)
add_subdirectory(cuda)
+ gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
endif()
if(GMX_USE_OPENCL)
add_subdirectory(opencl)
set(NBNXM_OPENCL_KERNELS ${NBNXM_OPENCL_KERNELS} PARENT_SCOPE)
+ gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
endif()
set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${NBNXM_SOURCES} PARENT_SCOPE)
constexpr static int c_bufOpsThreadsPerBlock = 128;
/*! Nonbonded kernel function pointer type */
-typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, bool);
+typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const gpu_plist, bool);
/*********************************/
int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
cu_atomdata_t* adat = nb->atdat;
- cu_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cu_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
{
cu_atomdata_t* adat = nb->atdat;
NBParamGpu* nbp = nb->nbparam;
- cu_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cu_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
{
cu_atomdata_t* adat = nb->atdat;
NBParamGpu* nbp = nb->nbparam;
- cu_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cu_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
#include "gromacs/nbnxm/gridset.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_gpu.h"
+#include "gromacs/nbnxm/nbnxm_gpu_data_mgmt.h"
#include "gromacs/nbnxm/pairlistsets.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/timing/gpu_timing.h"
/* Fw. decl. */
static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
-/* Fw. decl, */
-static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam);
-
-/*! \brief Initialized the Ewald Coulomb correction GPU table.
-
- Tabulates the Ewald Coulomb force and initializes the size/scale
- and the table GPU array. If called with an already allocated table,
- it just re-uploads the table.
- */
-static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
- NBParamGpu* nbp,
- const DeviceContext& deviceContext)
-{
- if (nbp->coulomb_tab != nullptr)
- {
- nbnxn_cuda_free_nbparam_table(nbp);
- }
-
- nbp->coulomb_tab_scale = tables.scale;
- initParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(),
- tables.tableF.size(), deviceContext);
-}
-
-
/*! Initializes the atomdata structure first time, it only gets filled at
pair-search. */
static void init_atomdata_first(cu_atomdata_t* ad, int ntypes, const DeviceContext& deviceContext)
ad->nalloc = -1;
}
-/*! Selects the Ewald kernel type, analytical on SM 3.0 and later, tabulated on
- earlier GPUs, single or twin cut-off. */
-static int pick_ewald_kernel_type(const interaction_const_t& ic)
-{
- bool bTwinCut = (ic.rcoulomb != ic.rvdw);
- bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald;
- int kernel_type;
-
- /* Benchmarking/development environment variables to force the use of
- analytical or tabulated Ewald kernel. */
- bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != nullptr);
- bForceTabulatedEwald = (getenv("GMX_CUDA_NB_TAB_EWALD") != nullptr);
-
- if (bForceAnalyticalEwald && bForceTabulatedEwald)
- {
- gmx_incons(
- "Both analytical and tabulated Ewald CUDA non-bonded kernels "
- "requested through environment variables.");
- }
-
- /* By default use analytical Ewald. */
- bUseAnalyticalEwald = true;
- if (bForceAnalyticalEwald)
- {
- if (debug)
- {
- fprintf(debug, "Using analytical Ewald CUDA kernels\n");
- }
- }
- else if (bForceTabulatedEwald)
- {
- bUseAnalyticalEwald = false;
-
- if (debug)
- {
- fprintf(debug, "Using tabulated Ewald CUDA kernels\n");
- }
- }
-
- /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
- forces it (use it for debugging/benchmarking only). */
- if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == nullptr))
- {
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB;
- }
- else
- {
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN;
- }
-
- return kernel_type;
-}
-
-/*! Copies all parameters related to the cut-off from ic to nbp */
-static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
-{
- nbp->ewald_beta = ic->ewaldcoeff_q;
- nbp->sh_ewald = ic->sh_ewald;
- nbp->epsfac = ic->epsfac;
- nbp->two_k_rf = 2.0 * ic->k_rf;
- nbp->c_rf = ic->c_rf;
- nbp->rvdw_sq = ic->rvdw * ic->rvdw;
- nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
- nbp->rlistOuter_sq = listParams.rlistOuter * listParams.rlistOuter;
- nbp->rlistInner_sq = listParams.rlistInner * listParams.rlistInner;
- nbp->useDynamicPruning = listParams.useDynamicPruning;
-
- nbp->sh_lj_ewald = ic->sh_lj_ewald;
- nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj;
-
- nbp->rvdw_switch = ic->rvdw_switch;
- nbp->dispersion_shift = ic->dispersion_shift;
- nbp->repulsion_shift = ic->repulsion_shift;
- nbp->vdw_switch = ic->vdw_switch;
-}
-
/*! Initializes the nonbonded parameter data structure. */
static void init_nbparam(NBParamGpu* nbp,
const interaction_const_t* ic,
}
else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
{
- nbp->eeltype = pick_ewald_kernel_type(*ic);
+ nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
}
else
{
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
- nbp->eeltype = pick_ewald_kernel_type(*ic);
+ nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
}
-/*! Initializes the pair list data structure. */
-static void init_plist(cu_plist_t* pl)
-{
- /* initialize to nullptr pointers to data that is not allocated here and will
- need reallocation in nbnxn_gpu_init_pairlist */
- pl->sci = nullptr;
- pl->cj4 = nullptr;
- pl->imask = nullptr;
- pl->excl = nullptr;
-
- /* size -1 indicates that the respective array hasn't been initialized yet */
- pl->na_c = -1;
- pl->nsci = -1;
- pl->sci_nalloc = -1;
- pl->ncj4 = -1;
- pl->cj4_nalloc = -1;
- pl->nimask = -1;
- pl->imask_nalloc = -1;
- pl->nexcl = -1;
- pl->excl_nalloc = -1;
- pl->haveFreshList = false;
-}
-
-/*! Initializes the timings data structure. */
-static void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
-{
- int i, j;
-
- t->nb_h2d_t = 0.0;
- t->nb_d2h_t = 0.0;
- t->nb_c = 0;
- t->pl_h2d_t = 0.0;
- t->pl_h2d_c = 0;
- for (i = 0; i < 2; i++)
- {
- for (j = 0; j < 2; j++)
- {
- t->ktime[i][j].t = 0.0;
- t->ktime[i][j].c = 0;
- }
- }
- t->pruneTime.c = 0;
- t->pruneTime.t = 0.0;
- t->dynamicPruneTime.c = 0;
- t->dynamicPruneTime.t = 0.0;
-}
-
/*! Initializes simulation constant data. */
static void cuda_init_const(NbnxmGpu* nb,
const interaction_const_t* ic,
char sbuf[STRLEN];
bool bDoTime = (nb->bDoTime && !h_plist->sci.empty());
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
- cu_plist_t* d_plist = nb->plist[iloc];
+ gpu_plist* d_plist = nb->plist[iloc];
if (d_plist->na_c < 0)
{
{
if (d_plist->na_c != h_plist->na_ci)
{
- sprintf(sbuf, "In cu_init_plist: the #atoms per cell has changed (from %d to %d)",
+ sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
d_plist->na_c, h_plist->na_ci);
gmx_incons(sbuf);
}
}
}
-static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam)
-{
- if (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN)
- {
- destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
- }
-}
-
void gpu_free(NbnxmGpu* nb)
{
cudaError_t stat;
atdat = nb->atdat;
nbparam = nb->nbparam;
- nbnxn_cuda_free_nbparam_table(nbparam);
+ if ((!nbparam->coulomb_tab)
+ && (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN))
+ {
+ destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
+ }
stat = cudaEventDestroy(nb->nonlocal_done);
CU_RET_ERR(stat, "cudaEventDestroy failed on timers->nonlocal_done");
__global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
# endif /* CALC_ENERGIES */
#endif /* PRUNE_NBL */
- (const cu_atomdata_t atdat, const NBParamGpu nbparam, const cu_plist_t plist, bool bCalcFshift)
+ (const cu_atomdata_t atdat, const NBParamGpu nbparam, const Nbnxm::gpu_plist plist, bool bCalcFshift)
#ifdef FUNCTION_DECLARATION_ONLY
; /* Only do function declaration, omit the function body. */
#else
#ifndef FUNCTION_DECLARATION_ONLY
/* Instantiate external template functions */
template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
#endif
*/
template<bool haveFreshList>
__launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
- void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
- const NBParamGpu nbparam,
- const cu_plist_t plist,
- int numParts,
- int part)
+ void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
+ const NBParamGpu nbparam,
+ const Nbnxm::gpu_plist plist,
+ int numParts,
+ int part)
#ifdef FUNCTION_DECLARATION_ONLY
; /* Only do function declaration, omit the function body. */
// Add extern declarations so each translation unit understands that
// there will be a definition provided.
extern template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
extern template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
#else
{
bool bShiftVecUploaded;
};
-/** \internal
- * \brief Pair list data.
- */
-using cu_plist_t = Nbnxm::gpu_plist;
-
/** \internal
* \brief Typedef of actual timer type.
*/
/*! \brief parameters required for the non-bonded calc. */
NBParamGpu* nbparam = nullptr;
/*! \brief pair-list data structures (local and non-local) */
- gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { { nullptr } };
/*! \brief staging area where fshift/energies get downloaded */
nb_staging_t nbst;
/*! \brief local and non-local GPU streams */
gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
gmx_wallcycle gmx_unused* wcycle) GPU_FUNC_TERM_WITH_RETURN(0.0);
-/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
-GPU_FUNC_QUALIFIER
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic)
- GPU_FUNC_TERM_WITH_RETURN(-1);
-
/*! \brief Initialization for X buffer operations on GPU.
* Called on the NS step and performs (re-)allocations and memory copies. !*/
CUDA_FUNC_QUALIFIER
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Define common implementation of nbnxm_gpu_data_mgmt.h
+ *
+ * \author Anca Hamuraru <anca@streamcomputing.eu>
+ * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
+ * \author Teemu Virolainen <teemu@streamcomputing.eu>
+ * \author Szilárd Páll <pall.szilard@gmail.com>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_nbnxm
+ */
+#include "gmxpre.h"
+
+#include "config.h"
+
+#if GMX_GPU == GMX_GPU_CUDA
+# include "cuda/nbnxm_cuda_types.h"
+#endif
+
+#if GMX_GPU == GMX_GPU_OPENCL
+# include "opencl/nbnxm_ocl_types.h"
+#endif
+
+#include "nbnxm_gpu_data_mgmt.h"
+
+#include "gromacs/timing/gpu_timing.h"
+
+#include "nbnxm_gpu.h"
+
+namespace Nbnxm
+{
+
+void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
+ NBParamGpu* nbp,
+ const DeviceContext& deviceContext)
+{
+ if (!nbp->coulomb_tab)
+ {
+ destroyParamLookupTable(&nbp->coulomb_tab, nbp->coulomb_tab_texobj);
+ }
+
+ nbp->coulomb_tab_scale = tables.scale;
+ initParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(),
+ tables.tableF.size(), deviceContext);
+}
+
+int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
+{
+ bool bTwinCut = (ic.rcoulomb != ic.rvdw);
+ bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald;
+ int kernel_type;
+
+ /* Benchmarking/development environment variables to force the use of
+ analytical or tabulated Ewald kernel. */
+ bForceAnalyticalEwald = (getenv("GMX_GPU_NB_ANA_EWALD") != nullptr);
+ bForceTabulatedEwald = (getenv("GMX_GPU_NB_TAB_EWALD") != nullptr);
+
+ if (bForceAnalyticalEwald && bForceTabulatedEwald)
+ {
+ gmx_incons(
+ "Both analytical and tabulated Ewald GPU non-bonded kernels "
+ "requested through environment variables.");
+ }
+
+ /* By default, use analytical Ewald
+ * TODO: tabulated does not work in OpenCL, it needs fixing, see init_nbparam() in nbnxn_ocl_data_mgmt.cpp
+ *
+ */
+ bUseAnalyticalEwald = true;
+ if (bForceAnalyticalEwald)
+ {
+ if (debug)
+ {
+ fprintf(debug, "Using analytical Ewald GPU kernels\n");
+ }
+ }
+ else if (bForceTabulatedEwald)
+ {
+ bUseAnalyticalEwald = false;
+
+ if (debug)
+ {
+ fprintf(debug, "Using tabulated Ewald GPU kernels\n");
+ }
+ }
+
+ /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
+ forces it (use it for debugging/benchmarking only). */
+ if (!bTwinCut && (getenv("GMX_GPU_NB_EWALD_TWINCUT") == nullptr))
+ {
+ kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB;
+ }
+ else
+ {
+ kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN;
+ }
+
+ return kernel_type;
+}
+
+void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+{
+ nbp->ewald_beta = ic->ewaldcoeff_q;
+ nbp->sh_ewald = ic->sh_ewald;
+ nbp->epsfac = ic->epsfac;
+ nbp->two_k_rf = 2.0 * ic->k_rf;
+ nbp->c_rf = ic->c_rf;
+ nbp->rvdw_sq = ic->rvdw * ic->rvdw;
+ nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
+ nbp->rlistOuter_sq = listParams.rlistOuter * listParams.rlistOuter;
+ nbp->rlistInner_sq = listParams.rlistInner * listParams.rlistInner;
+ nbp->useDynamicPruning = listParams.useDynamicPruning;
+
+ nbp->sh_lj_ewald = ic->sh_lj_ewald;
+ nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj;
+
+ nbp->rvdw_switch = ic->rvdw_switch;
+ nbp->dispersion_shift = ic->dispersion_shift;
+ nbp->repulsion_shift = ic->repulsion_shift;
+ nbp->vdw_switch = ic->vdw_switch;
+}
+
+void init_plist(gpu_plist* pl)
+{
+ /* initialize to nullptr pointers to data that is not allocated here and will
+ need reallocation in nbnxn_gpu_init_pairlist */
+ pl->sci = nullptr;
+ pl->cj4 = nullptr;
+ pl->imask = nullptr;
+ pl->excl = nullptr;
+
+ /* size -1 indicates that the respective array hasn't been initialized yet */
+ pl->na_c = -1;
+ pl->nsci = -1;
+ pl->sci_nalloc = -1;
+ pl->ncj4 = -1;
+ pl->cj4_nalloc = -1;
+ pl->nimask = -1;
+ pl->imask_nalloc = -1;
+ pl->nexcl = -1;
+ pl->excl_nalloc = -1;
+ pl->haveFreshList = false;
+}
+
+void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
+{
+ int i, j;
+
+ t->nb_h2d_t = 0.0;
+ t->nb_d2h_t = 0.0;
+ t->nb_c = 0;
+ t->pl_h2d_t = 0.0;
+ t->pl_h2d_c = 0;
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ t->ktime[i][j].t = 0.0;
+ t->ktime[i][j].c = 0;
+ }
+ }
+ t->pruneTime.c = 0;
+ t->pruneTime.t = 0.0;
+ t->dynamicPruneTime.c = 0;
+ t->dynamicPruneTime.t = 0.0;
+}
+
+} // namespace Nbnxm
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief Declare common functions for NBNXM GPU data management.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_nbnxm
+ */
+
+#ifndef GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H
+#define GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H
+
+struct interaction_const_t;
+struct NBParamGpu;
+struct PairlistParams;
+
+namespace Nbnxm
+{
+
+struct gpu_plist;
+
+/*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale and the table GPU array.
+ *
+ * If called with an already allocated table, it just re-uploads the
+ * table.
+ */
+void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
+ NBParamGpu* nbp,
+ const DeviceContext& deviceContext);
+
+/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
+int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic);
+
+/*! \brief Copies all parameters related to the cut-off from ic to nbp
+ */
+void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams);
+
+/*! \brief Initializes the pair list data structure.
+ */
+void init_plist(gpu_plist* pl);
+
+/*! \brief Initializes the timings data structure. */
+void init_timings(gmx_wallclock_gpu_nbnxn_t* t);
+
+} // namespace Nbnxm
+
+#endif // GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H
int adat_begin, adat_len;
cl_atomdata_t* adat = nb->atdat;
- cl_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
{
cl_atomdata_t* adat = nb->atdat;
NBParamGpu* nbp = nb->nbparam;
- cl_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
{
cl_atomdata_t* adat = nb->atdat;
NBParamGpu* nbp = nb->nbparam;
- cl_plist_t* plist = nb->plist[iloc];
+ gpu_plist* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
bool bDoTime = nb->bDoTime;
}
}
-
-/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
-{
- bool bTwinCut = (ic.rcoulomb != ic.rvdw);
- bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald;
- int kernel_type;
-
- /* Benchmarking/development environment variables to force the use of
- analytical or tabulated Ewald kernel. */
- bForceAnalyticalEwald = (getenv("GMX_OCL_NB_ANA_EWALD") != nullptr);
- bForceTabulatedEwald = (getenv("GMX_OCL_NB_TAB_EWALD") != nullptr);
-
- if (bForceAnalyticalEwald && bForceTabulatedEwald)
- {
- gmx_incons(
- "Both analytical and tabulated Ewald OpenCL non-bonded kernels "
- "requested through environment variables.");
- }
-
- /* OpenCL: By default, use analytical Ewald
- * TODO: tabulated does not work, it needs fixing, see init_nbparam() in nbnxn_ocl_data_mgmt.cpp
- *
- */
- /* By default use analytical Ewald. */
- bUseAnalyticalEwald = true;
- if (bForceAnalyticalEwald)
- {
- if (debug)
- {
- fprintf(debug, "Using analytical Ewald OpenCL kernels\n");
- }
- }
- else if (bForceTabulatedEwald)
- {
- bUseAnalyticalEwald = false;
-
- if (debug)
- {
- fprintf(debug, "Using tabulated Ewald OpenCL kernels\n");
- }
- }
-
- /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
- forces it (use it for debugging/benchmarking only). */
- if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == nullptr))
- {
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB;
- }
- else
- {
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN;
- }
-
- return kernel_type;
-}
-
} // namespace Nbnxm
#include "gromacs/nbnxm/gpu_jit_support.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_gpu.h"
+#include "gromacs/nbnxm/nbnxm_gpu_data_mgmt.h"
#include "gromacs/nbnxm/pairlistsets.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/timing/gpu_timing.h"
*/
static unsigned int gpu_min_ci_balanced_factor = 50;
-/*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale
- * and the table GPU array.
- *
- * If called with an already allocated table, it just re-uploads the
- * table.
- */
-static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
- NBParamGpu* nbp,
- const DeviceContext& deviceContext)
-{
- if (nbp->coulomb_tab != nullptr)
- {
- freeDeviceBuffer(&(nbp->coulomb_tab));
- }
-
- DeviceBuffer<real> coulomb_tab;
-
- initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext);
-
- nbp->coulomb_tab = coulomb_tab;
- nbp->coulomb_tab_scale = tables.scale;
-}
-
/*! \brief Initializes the atomdata structure first time, it only gets filled at
pair-search.
ad->nalloc = -1;
}
-/*! \brief Copies all parameters related to the cut-off from ic to nbp
- */
-static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
-{
- nbp->ewald_beta = ic->ewaldcoeff_q;
- nbp->sh_ewald = ic->sh_ewald;
- nbp->epsfac = ic->epsfac;
- nbp->two_k_rf = 2.0 * ic->k_rf;
- nbp->c_rf = ic->c_rf;
- nbp->rvdw_sq = ic->rvdw * ic->rvdw;
- nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
- nbp->rlistOuter_sq = listParams.rlistOuter * listParams.rlistOuter;
- nbp->rlistInner_sq = listParams.rlistInner * listParams.rlistInner;
- nbp->useDynamicPruning = listParams.useDynamicPruning;
-
- nbp->sh_lj_ewald = ic->sh_lj_ewald;
- nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj;
-
- nbp->rvdw_switch = ic->rvdw_switch;
- nbp->dispersion_shift = ic->dispersion_shift;
- nbp->repulsion_shift = ic->repulsion_shift;
- nbp->vdw_switch = ic->vdw_switch;
-}
-
/*! \brief Returns the kinds of electrostatics and Vdw OpenCL
* kernels that will be used.
*
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
}
-/*! \brief Initializes the pair list data structure.
- */
-static void init_plist(cl_plist_t* pl)
-{
- /* initialize to nullptr pointers to data that is not allocated here and will
- need reallocation in nbnxn_gpu_init_pairlist */
- pl->sci = nullptr;
- pl->cj4 = nullptr;
- pl->imask = nullptr;
- pl->excl = nullptr;
-
- /* size -1 indicates that the respective array hasn't been initialized yet */
- pl->na_c = -1;
- pl->nsci = -1;
- pl->sci_nalloc = -1;
- pl->ncj4 = -1;
- pl->cj4_nalloc = -1;
- pl->nimask = -1;
- pl->imask_nalloc = -1;
- pl->nexcl = -1;
- pl->excl_nalloc = -1;
- pl->haveFreshList = false;
-}
-
-/*! \brief Initializes the timings data structure.
- */
-static void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
-{
- int i, j;
-
- t->nb_h2d_t = 0.0;
- t->nb_d2h_t = 0.0;
- t->nb_c = 0;
- t->pl_h2d_t = 0.0;
- t->pl_h2d_c = 0;
- for (i = 0; i < 2; i++)
- {
- for (j = 0; j < 2; j++)
- {
- t->ktime[i][j].t = 0.0;
- t->ktime[i][j].c = 0;
- }
- }
-
- t->pruneTime.c = 0;
- t->pruneTime.t = 0.0;
- t->dynamicPruneTime.c = 0;
- t->dynamicPruneTime.t = 0.0;
-}
-
/*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
{
// which leads to the counter not being reset.
bool bDoTime = (nb->bDoTime && !h_plist->sci.empty());
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
- cl_plist_t* d_plist = nb->plist[iloc];
+ gpu_plist* d_plist = nb->plist[iloc];
if (d_plist->na_c < 0)
{
{
if (d_plist->na_c != h_plist->na_ci)
{
- sprintf(sbuf, "In cu_init_plist: the #atoms per cell has changed (from %d to %d)",
+ sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
d_plist->na_c, h_plist->na_ci);
gmx_incons(sbuf);
}
} cl_nbparam_params_t;
-/*! \internal
- * \brief Pair list data.
- */
-using cl_plist_t = Nbnxm::gpu_plist;
-
/** \internal
* \brief Typedef of actual timer type.
*/
//! parameters required for the non-bonded calc.
NBParamGpu* nbparam = nullptr;
//! pair-list data structures (local and non-local)
- gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { nullptr };
//! staging area where fshift/energies get downloaded
nb_staging_t nbst;