From 76ba2b3fbf18167a7ce014bd965d553b2e615ccc Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Thu, 10 Sep 2020 05:39:44 +0000 Subject: [PATCH] Unify more functions in CUDA and OpenCL implementations of NBNXM --- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 96 ---------------- src/gromacs/nbnxm/gpu_data_mgmt.h | 2 +- src/gromacs/nbnxm/kerneldispatch.cpp | 1 + src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 103 ++++++++++++++++++ src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h | 5 + .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 102 ----------------- 6 files changed, 110 insertions(+), 199 deletions(-) diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index ea1261ee2f..f754cc4795 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -226,25 +226,6 @@ static void init_nbparam(NBParamGpu* nbp, } } -/*! Re-generate the GPU Ewald force table, resets rlist, and update the - * electrostatic type switching to twin cut-off (or back) if needed. */ -void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic) -{ - if (!nbv || !nbv->useGpu()) - { - return; - } - NbnxmGpu* nb = nbv->gpu_nbv; - NBParamGpu* nbp = nbv->gpu_nbv->nbparam; - - set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); - - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); - - GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); -} - /*! Initializes simulation constant data. */ static void cuda_init_const(NbnxmGpu* nb, const interaction_const_t* ic, @@ -351,64 +332,6 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, return nb; } -void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc) -{ - char sbuf[STRLEN]; - bool bDoTime = (nb->bDoTime && !h_plist->sci.empty()); - const DeviceStream& deviceStream = *nb->deviceStreams[iloc]; - gpu_plist* d_plist = nb->plist[iloc]; - - if (d_plist->na_c < 0) - { - d_plist->na_c = h_plist->na_ci; - } - else - { - if (d_plist->na_c != h_plist->na_ci) - { - sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)", - d_plist->na_c, h_plist->na_ci); - gmx_incons(sbuf); - } - } - - gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc]; - - if (bDoTime) - { - iTimers.pl_h2d.openTimingRegion(deviceStream); - iTimers.didPairlistH2D = true; - } - - const DeviceContext& deviceContext = *nb->deviceContext_; - - reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc, - deviceContext); - copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc, - deviceContext); - copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit, - &d_plist->nimask, &d_plist->imask_nalloc, deviceContext); - - reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl, - &d_plist->excl_nalloc, deviceContext); - copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - if (bDoTime) - { - iTimers.pl_h2d.closeTimingRegion(deviceStream); - } - - /* the next use of thist list we be the first one, so we need to prune */ - d_plist->haveFreshList = true; -} - void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) { cu_atomdata_t* adat = nb->atdat; @@ -621,31 +544,12 @@ void gpu_free(NbnxmGpu* nb) } } -//! This function is documented in the header file -gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb) -{ - return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr; -} - -void gpu_reset_timings(nonbonded_verlet_t* nbv) -{ - if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime) - { - init_timings(nbv->gpu_nbv->timings); - } -} - int gpu_min_ci_balanced(NbnxmGpu* nb) { return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().prop.multiProcessorCount : 0; } -gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) -{ - return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); -} - void* gpu_get_xq(NbnxmGpu* nb) { assert(nb); diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index a1ee291ae8..a472cb437d 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -122,7 +122,7 @@ int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1); /** Returns if analytical Ewald GPU kernels are used. */ GPU_FUNC_QUALIFIER -gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE); +bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE); /** Returns an opaque pointer to the GPU command stream * Note: CUDA only. diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index ac8fec7f51..aaf9bd863e 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -61,6 +61,7 @@ #include "kernel_common.h" #include "nbnxm_gpu.h" +#include "nbnxm_gpu_data_mgmt.h" #include "nbnxm_simd.h" #include "pairlistset.h" #include "pairlistsets.h" diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index 105ceefb72..53ee861fd2 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -58,9 +58,12 @@ #include "nbnxm_gpu_data_mgmt.h" +#include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/timing/gpu_timing.h" +#include "gromacs/utility/cstringutil.h" #include "nbnxm_gpu.h" +#include "pairlistsets.h" namespace Nbnxm { @@ -155,6 +158,23 @@ void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const nbp->vdw_switch = ic->vdw_switch; } +void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic) +{ + if (!nbv || !nbv->useGpu()) + { + return; + } + NbnxmGpu* nb = nbv->gpu_nbv; + NBParamGpu* nbp = nb->nbparam; + + set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); + + nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); + + GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); + init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); +} + void init_plist(gpu_plist* pl) { /* initialize to nullptr pointers to data that is not allocated here and will @@ -200,4 +220,87 @@ void init_timings(gmx_wallclock_gpu_nbnxn_t* t) t->dynamicPruneTime.t = 0.0; } +//! This function is documented in the header file +void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc) +{ + char sbuf[STRLEN]; + // Timing accumulation should happen only if there was work to do + // because getLastRangeTime() gets skipped with empty lists later + // which leads to the counter not being reset. + bool bDoTime = (nb->bDoTime && !h_plist->sci.empty()); + const DeviceStream& deviceStream = *nb->deviceStreams[iloc]; + gpu_plist* d_plist = nb->plist[iloc]; + + if (d_plist->na_c < 0) + { + d_plist->na_c = h_plist->na_ci; + } + else + { + if (d_plist->na_c != h_plist->na_ci) + { + sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)", + d_plist->na_c, h_plist->na_ci); + gmx_incons(sbuf); + } + } + + gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc]; + + if (bDoTime) + { + iTimers.pl_h2d.openTimingRegion(deviceStream); + iTimers.didPairlistH2D = true; + } + + // TODO most of this function is same in CUDA and OpenCL, move into the header + const DeviceContext& deviceContext = *nb->deviceContext_; + + reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc, + deviceContext); + copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream, + GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); + + reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc, + deviceContext); + copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream, + GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); + + reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit, + &d_plist->nimask, &d_plist->imask_nalloc, deviceContext); + + reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl, + &d_plist->excl_nalloc, deviceContext); + copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream, + GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); + + if (bDoTime) + { + iTimers.pl_h2d.closeTimingRegion(deviceStream); + } + + /* need to prune the pair list during the next step */ + d_plist->haveFreshList = true; +} + +//! This function is documented in the header file +gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb) +{ + return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr; +} + +//! This function is documented in the header file +void gpu_reset_timings(nonbonded_verlet_t* nbv) +{ + if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime) + { + init_timings(nbv->gpu_nbv->timings); + } +} + +bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) +{ + return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); +} + } // namespace Nbnxm diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h index 8c17e7749d..761737ddf0 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h @@ -48,6 +48,11 @@ struct interaction_const_t; struct NBParamGpu; struct PairlistParams; +namespace gmx +{ +enum class InteractionLocality; +} + namespace Nbnxm { diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index f47d754e2f..19b861db0c 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -249,24 +249,6 @@ static void init_nbparam(NBParamGpu* nbp, } } -//! This function is documented in the header file -void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic) -{ - if (!nbv || !nbv->useGpu()) - { - return; - } - NbnxmGpu* nb = nbv->gpu_nbv; - NBParamGpu* nbp = nb->nbparam; - - set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); - - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); - - GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); -} - /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name) { @@ -478,69 +460,6 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial) GMX_ASSERT(cl_error == CL_SUCCESS, ("clFlush failed: " + ocl_get_error_string(cl_error)).c_str()); } -//! This function is documented in the header file -void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc) -{ - char sbuf[STRLEN]; - // Timing accumulation should happen only if there was work to do - // because getLastRangeTime() gets skipped with empty lists later - // which leads to the counter not being reset. - bool bDoTime = (nb->bDoTime && !h_plist->sci.empty()); - const DeviceStream& deviceStream = *nb->deviceStreams[iloc]; - gpu_plist* d_plist = nb->plist[iloc]; - - if (d_plist->na_c < 0) - { - d_plist->na_c = h_plist->na_ci; - } - else - { - if (d_plist->na_c != h_plist->na_ci) - { - sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)", - d_plist->na_c, h_plist->na_ci); - gmx_incons(sbuf); - } - } - - gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc]; - - if (bDoTime) - { - iTimers.pl_h2d.openTimingRegion(deviceStream); - iTimers.didPairlistH2D = true; - } - - // TODO most of this function is same in CUDA and OpenCL, move into the header - const DeviceContext& deviceContext = *nb->deviceContext_; - - reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc, - deviceContext); - copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc, - deviceContext); - copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit, - &d_plist->nimask, &d_plist->imask_nalloc, deviceContext); - - reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl, - &d_plist->excl_nalloc, deviceContext); - copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream, - GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr); - - if (bDoTime) - { - iTimers.pl_h2d.closeTimingRegion(deviceStream); - } - - /* need to prune the pair list during the next step */ - d_plist->haveFreshList = true; -} - //! This function is documented in the header file void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) { @@ -787,31 +706,10 @@ void gpu_free(NbnxmGpu* nb) } } -//! This function is documented in the header file -gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb) -{ - return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr; -} - -//! This function is documented in the header file -void gpu_reset_timings(nonbonded_verlet_t* nbv) -{ - if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime) - { - init_timings(nbv->gpu_nbv->timings); - } -} - //! This function is documented in the header file int gpu_min_ci_balanced(NbnxmGpu* nb) { return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().compute_units : 0; } -//! This function is documented in the header file -gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) -{ - return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); -} - } // namespace Nbnxm -- 2.22.0