From c4a672b93b655ff674b671fd10ebe728b2d19ec8 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Fri, 19 Mar 2021 16:48:38 +0000 Subject: [PATCH] Unify init_gpu function in NBNXM Refs. #2608 --- src/gromacs/ewald/pme_load_balancing.cpp | 2 +- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 193 ++----------- src/gromacs/nbnxm/gpu_data_mgmt.h | 6 +- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 259 +++++++++++++++--- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h | 5 +- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 211 +++----------- .../nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp | 139 +--------- 7 files changed, 290 insertions(+), 525 deletions(-) diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp index b7c3879d91..19778b8999 100644 --- a/src/gromacs/ewald/pme_load_balancing.cpp +++ b/src/gromacs/ewald/pme_load_balancing.cpp @@ -856,7 +856,7 @@ static void pme_load_balance(pme_load_balancing_t* pme_lb, /* We always re-initialize the tables whether they are used or not */ init_interaction_const_tables(nullptr, ic, set->rlistOuter, ir.tabext); - Nbnxm::gpu_pme_loadbal_update_param(nbv, ic); + Nbnxm::gpu_pme_loadbal_update_param(nbv, *ic); if (!pme_lb->bSepPMERanks) { diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index 804a8ea180..274f40448f 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -52,7 +52,6 @@ // TODO Remove this comment when the above order issue is resolved #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/gpu_utils/device_context.h" -#include "gromacs/gpu_utils/device_stream_manager.h" #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #include "gromacs/gpu_utils/pmalloc.h" @@ -92,174 +91,11 @@ namespace Nbnxm */ static unsigned int gpu_min_ci_balanced_factor = 44; -/*! Initializes the atomdata structure first time, it only gets filled at - pair-search. */ -static void init_atomdata_first(NBAtomData* ad, - int nTypes, - const DeviceContext& deviceContext, - const DeviceStream& localStream) +void gpu_init_platform_specific(NbnxmGpu* /* nb */) { - ad->numTypes = nTypes; - allocateDeviceBuffer(&ad->shiftVec, SHIFTS, deviceContext); - ad->shiftVecUploaded = false; - - allocateDeviceBuffer(&ad->fShift, SHIFTS, deviceContext); - allocateDeviceBuffer(&ad->eLJ, 1, deviceContext); - allocateDeviceBuffer(&ad->eElec, 1, deviceContext); - - clearDeviceBufferAsync(&ad->fShift, 0, SHIFTS, localStream); - clearDeviceBufferAsync(&ad->eElec, 0, 1, localStream); - clearDeviceBufferAsync(&ad->eLJ, 0, 1, localStream); - - /* initialize to nullptr poiters to data that is not allocated here and will - need reallocation in nbnxn_cuda_init_atomdata */ - ad->xq = nullptr; - ad->f = nullptr; - - /* size -1 indicates that the respective array hasn't been initialized yet */ - ad->numAtoms = -1; - ad->numAtomsAlloc = -1; -} - -/*! Initializes the nonbonded parameter data structure. */ -static void init_nbparam(NBParamGpu* nbp, - const interaction_const_t* ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t::Params& nbatParams, - const DeviceContext& deviceContext) -{ - const int ntypes = nbatParams.numTypes; - - set_cutoff_parameters(nbp, ic, listParams); - - /* The kernel code supports LJ combination rules (geometric and LB) for - * all kernel types, but we only generate useful combination rule kernels. - * We currently only use LJ combination rule (geometric and LB) kernels - * for plain cut-off LJ. On Maxwell the force only kernels speed up 15% - * with PME and 20% with RF, the other kernels speed up about half as much. - * For LJ force-switch the geometric rule would give 7% speed-up, but this - * combination is rarely used. LJ force-switch with LB rule is more common, - * but gives only 1% speed-up. - */ - nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.ljCombinationRule); - nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic, deviceContext.deviceInfo()); - - /* generate table for PME */ - nbp->coulomb_tab = nullptr; - if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) - { - GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); - } - - /* set up LJ parameter lookup table */ - if (!useLjCombRule(nbp->vdwType)) - { - static_assert(sizeof(decltype(nbp->nbfp)) == 2 * sizeof(decltype(*nbatParams.nbfp.data())), - "Mismatch in the size of host / device data types"); - initParamLookupTable(&nbp->nbfp, - &nbp->nbfp_texobj, - reinterpret_cast(nbatParams.nbfp.data()), - ntypes * ntypes, - deviceContext); - } - - /* set up LJ-PME parameter lookup table */ - if (ic->vdwtype == VanDerWaalsType::Pme) - { - static_assert(sizeof(decltype(nbp->nbfp_comb)) - == 2 * sizeof(decltype(*nbatParams.nbfp_comb.data())), - "Mismatch in the size of host / device data types"); - initParamLookupTable(&nbp->nbfp_comb, - &nbp->nbfp_comb_texobj, - reinterpret_cast(nbatParams.nbfp_comb.data()), - ntypes, - deviceContext); - } -} - -NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, - const interaction_const_t* ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t* nbat, - bool bLocalAndNonlocal) -{ - auto nb = new NbnxmGpu(); - nb->deviceContext_ = &deviceStreamManager.context(); - snew(nb->atdat, 1); - snew(nb->nbparam, 1); - snew(nb->plist[InteractionLocality::Local], 1); - if (bLocalAndNonlocal) - { - snew(nb->plist[InteractionLocality::NonLocal], 1); - } - - nb->bUseTwoStreams = bLocalAndNonlocal; - - nb->timers = new Nbnxm::GpuTimers(); - snew(nb->timings, 1); - - /* init nbst */ - pmalloc((void**)&nb->nbst.eLJ, sizeof(*nb->nbst.eLJ)); - pmalloc((void**)&nb->nbst.eElec, sizeof(*nb->nbst.eElec)); - pmalloc((void**)&nb->nbst.fShift, SHIFTS * sizeof(*nb->nbst.fShift)); - - init_plist(nb->plist[InteractionLocality::Local]); - - /* local/non-local GPU streams */ - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal), - "Local non-bonded stream should be initialized to use GPU for non-bonded."); - const DeviceStream& localStream = deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal); - nb->deviceStreams[InteractionLocality::Local] = &localStream; - if (nb->bUseTwoStreams) - { - init_plist(nb->plist[InteractionLocality::NonLocal]); - - /* Note that the device we're running on does not have to support - * priorities, because we are querying the priority range which in this - * case will be a single value. - */ - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal), - "Non-local non-bonded stream should be initialized to use GPU for " - "non-bonded with domain decomposition."); - nb->deviceStreams[InteractionLocality::NonLocal] = - &deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal); - ; - } - - /* WARNING: CUDA timings are incorrect with multiple streams. - * This is the main reason why they are disabled by default. - */ - // TODO: Consider turning on by default when we can detect nr of streams. - nb->bDoTime = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); - - if (nb->bDoTime) - { - init_timings(nb->timings); - } - /* set the kernel type for the current GPU */ /* pick L1 cache configuration */ cuda_set_cacheconfig(); - - const nbnxn_atomdata_t::Params& nbatParams = nbat->params(); - const DeviceContext& deviceContext = *nb->deviceContext_; - init_atomdata_first(nb->atdat, nbatParams.numTypes, deviceContext, localStream); - init_nbparam(nb->nbparam, ic, listParams, nbatParams, deviceContext); - - nb->atomIndicesSize = 0; - nb->atomIndicesSize_alloc = 0; - nb->ncxy_na = 0; - nb->ncxy_na_alloc = 0; - nb->ncxy_ind = 0; - nb->ncxy_ind_alloc = 0; - - if (debug) - { - fprintf(debug, "Initialized CUDA data structures.\n"); - } - - return nb; } void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) @@ -290,17 +126,17 @@ void gpu_free(NbnxmGpu* nb) return; } + delete nb->timers; + sfree(nb->timings); + NBAtomData* atdat = nb->atdat; NBParamGpu* nbparam = nb->nbparam; - if ((!nbparam->coulomb_tab) - && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin)) + if (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin) { destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); } - delete nb->timers; - if (!useLjCombRule(nb->nbparam->vdwType)) { destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj); @@ -319,8 +155,14 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&atdat->f); freeDeviceBuffer(&atdat->xq); - freeDeviceBuffer(&atdat->atomTypes); - freeDeviceBuffer(&atdat->ljComb); + if (useLjCombRule(nb->nbparam->vdwType)) + { + freeDeviceBuffer(&atdat->ljComb); + } + else + { + freeDeviceBuffer(&atdat->atomTypes); + } /* Free plist */ auto* plist = nb->plist[InteractionLocality::Local]; @@ -328,7 +170,7 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&plist->cj4); freeDeviceBuffer(&plist->imask); freeDeviceBuffer(&plist->excl); - sfree(plist); + delete plist; if (nb->bUseTwoStreams) { auto* plist_nl = nb->plist[InteractionLocality::NonLocal]; @@ -336,7 +178,7 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&plist_nl->cj4); freeDeviceBuffer(&plist_nl->imask); freeDeviceBuffer(&plist_nl->excl); - sfree(plist_nl); + delete plist_nl; } /* Free nbst */ @@ -349,9 +191,8 @@ void gpu_free(NbnxmGpu* nb) pfree(nb->nbst.fShift); nb->nbst.fShift = nullptr; - sfree(atdat); - sfree(nbparam); - sfree(nb->timings); + delete atdat; + delete nbparam; delete nb; if (debug) diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index 5b0c22085c..d9f81f1d8d 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -95,7 +95,7 @@ void gpu_init_atomdata(NbnxmGpu gmx_unused* nb, const nbnxn_atomdata_t gmx_unuse */ GPU_FUNC_QUALIFIER void gpu_pme_loadbal_update_param(const struct nonbonded_verlet_t gmx_unused* nbv, - const interaction_const_t gmx_unused* ic) GPU_FUNC_TERM; + const interaction_const_t gmx_unused& ic) GPU_FUNC_TERM; /** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */ GPU_FUNC_QUALIFIER @@ -129,13 +129,13 @@ bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM /** Return the enum value of electrostatics kernel type for given interaction parameters \p ic. */ GPU_FUNC_QUALIFIER -enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t gmx_unused* ic, +enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t gmx_unused& ic, const DeviceInformation gmx_unused& deviceInfo) GPU_FUNC_TERM_WITH_RETURN(ElecType::Count); /** Return the enum value of VdW kernel type for given \p ic and \p combRule. */ GPU_FUNC_QUALIFIER -enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t gmx_unused* ic, +enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t gmx_unused& ic, LJCombinationRule gmx_unused ljCombinationRule) GPU_FUNC_TERM_WITH_RETURN(VdwType::Count); diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index b86b785b94..c4efe8d458 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -62,6 +62,8 @@ #include "nbnxm_gpu_data_mgmt.h" +#include "gromacs/gpu_utils/device_stream_manager.h" +#include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/simulation_workload.h" @@ -169,29 +171,29 @@ enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic, } } -void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams) +void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const PairlistParams& listParams) { - nbp->ewald_beta = ic->ewaldcoeff_q; - nbp->sh_ewald = ic->sh_ewald; - nbp->epsfac = ic->epsfac; - nbp->two_k_rf = 2.0 * ic->reactionFieldCoefficient; - nbp->c_rf = ic->reactionFieldShift; - nbp->rvdw_sq = ic->rvdw * ic->rvdw; - nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb; + nbp->ewald_beta = ic.ewaldcoeff_q; + nbp->sh_ewald = ic.sh_ewald; + nbp->epsfac = ic.epsfac; + nbp->two_k_rf = 2.0 * ic.reactionFieldCoefficient; + nbp->c_rf = ic.reactionFieldShift; + nbp->rvdw_sq = ic.rvdw * ic.rvdw; + nbp->rcoulomb_sq = ic.rcoulomb * ic.rcoulomb; nbp->rlistOuter_sq = listParams.rlistOuter * listParams.rlistOuter; nbp->rlistInner_sq = listParams.rlistInner * listParams.rlistInner; nbp->useDynamicPruning = listParams.useDynamicPruning; - nbp->sh_lj_ewald = ic->sh_lj_ewald; - nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj; + nbp->sh_lj_ewald = ic.sh_lj_ewald; + nbp->ewaldcoeff_lj = ic.ewaldcoeff_lj; - nbp->rvdw_switch = ic->rvdw_switch; - nbp->dispersion_shift = ic->dispersion_shift; - nbp->repulsion_shift = ic->repulsion_shift; - nbp->vdw_switch = ic->vdw_switch; + nbp->rvdw_switch = ic.rvdw_switch; + nbp->dispersion_shift = ic.dispersion_shift; + nbp->repulsion_shift = ic.repulsion_shift; + nbp->vdw_switch = ic.vdw_switch; } -void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic) +void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t& ic) { if (!nbv || !nbv->useGpu()) { @@ -202,10 +204,10 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); - nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(*ic, nb->deviceContext_->deviceInfo()); + nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(ic, nb->deviceContext_->deviceInfo()); - GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); + GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); + init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, *nb->deviceContext_); } void init_plist(gpu_plist* pl) @@ -253,6 +255,191 @@ void init_timings(gmx_wallclock_gpu_nbnxn_t* t) t->dynamicPruneTime.t = 0.0; } +/*! \brief Initialize \p atomdata first time; it only gets filled at pair-search. */ +static void initAtomdataFirst(NBAtomData* atomdata, + int numTypes, + const DeviceContext& deviceContext, + const DeviceStream& localStream) +{ + atomdata->numTypes = numTypes; + allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext); + atomdata->shiftVecUploaded = false; + + allocateDeviceBuffer(&atomdata->fShift, SHIFTS, deviceContext); + allocateDeviceBuffer(&atomdata->eLJ, 1, deviceContext); + allocateDeviceBuffer(&atomdata->eElec, 1, deviceContext); + + clearDeviceBufferAsync(&atomdata->fShift, 0, SHIFTS, localStream); + clearDeviceBufferAsync(&atomdata->eElec, 0, 1, localStream); + clearDeviceBufferAsync(&atomdata->eLJ, 0, 1, localStream); + + /* initialize to nullptr pointers to data that is not allocated here and will + need reallocation in later */ + atomdata->xq = nullptr; + atomdata->f = nullptr; + + /* size -1 indicates that the respective array hasn't been initialized yet */ + atomdata->numAtoms = -1; + atomdata->numAtomsAlloc = -1; +} + +/*! \brief Initialize the nonbonded parameter data structure. */ +static void initNbparam(NBParamGpu* nbp, + const interaction_const_t& ic, + const PairlistParams& listParams, + const nbnxn_atomdata_t::Params& nbatParams, + const DeviceContext& deviceContext) +{ + const int numTypes = nbatParams.numTypes; + + set_cutoff_parameters(nbp, ic, listParams); + + nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.ljCombinationRule); + nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic, deviceContext.deviceInfo()); + + if (ic.vdwtype == VanDerWaalsType::Pme) + { + if (ic.ljpme_comb_rule == LongRangeVdW::Geom) + { + GMX_ASSERT(nbatParams.ljCombinationRule == LJCombinationRule::Geometric, + "Combination rule mismatch!"); + } + else + { + GMX_ASSERT(nbatParams.ljCombinationRule == LJCombinationRule::LorentzBerthelot, + "Combination rule mismatch!"); + } + } + + /* generate table for PME */ + if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) + { + GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); + init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, deviceContext); + } + else + { + // Need to initialize for OpenCL, since it is unconditionally used as a kernel argument. + allocateDeviceBuffer(&nbp->coulomb_tab, 1, deviceContext); + } + + /* set up LJ parameter lookup table */ + if (!useLjCombRule(nbp->vdwType)) + { + static_assert(sizeof(decltype(nbp->nbfp)) == 2 * sizeof(decltype(*nbatParams.nbfp.data())), + "Mismatch in the size of host / device data types"); + initParamLookupTable(&nbp->nbfp, + &nbp->nbfp_texobj, + reinterpret_cast(nbatParams.nbfp.data()), + numTypes * numTypes, + deviceContext); + } + else + { + // Need to initialize for OpenCL, since it is unconditionally used as a kernel argument. + allocateDeviceBuffer(&nbp->nbfp, 1, deviceContext); + } + + /* set up LJ-PME parameter lookup table */ + if (ic.vdwtype == VanDerWaalsType::Pme) + { + static_assert(sizeof(decltype(nbp->nbfp_comb)) + == 2 * sizeof(decltype(*nbatParams.nbfp_comb.data())), + "Mismatch in the size of host / device data types"); + initParamLookupTable(&nbp->nbfp_comb, + &nbp->nbfp_comb_texobj, + reinterpret_cast(nbatParams.nbfp_comb.data()), + numTypes, + deviceContext); + } + else + { + // Need to initialize for OpenCL, since it is unconditionally used as a kernel argument. + allocateDeviceBuffer(&nbp->nbfp_comb, 1, deviceContext); + } +} + +NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, + const interaction_const_t* ic, + const PairlistParams& listParams, + const nbnxn_atomdata_t* nbat, + const bool bLocalAndNonlocal) +{ + auto* nb = new NbnxmGpu(); + nb->deviceContext_ = &deviceStreamManager.context(); + nb->atdat = new NBAtomData; + nb->nbparam = new NBParamGpu; + nb->plist[InteractionLocality::Local] = new Nbnxm::gpu_plist; + if (bLocalAndNonlocal) + { + nb->plist[InteractionLocality::NonLocal] = new Nbnxm::gpu_plist; + } + + nb->bUseTwoStreams = bLocalAndNonlocal; + + GMX_ASSERT(!(GMX_GPU_SYCL && !nb->bDoTime), "GPU timing is not supported in SYCL"); + nb->timers = new Nbnxm::GpuTimers(); + snew(nb->timings, 1); + + /* WARNING: CUDA timings are incorrect with multiple streams. + * This is the main reason why they are disabled by default. + * Can be enabled by setting GMX_ENABLE_GPU_TIMING environment variable. + * TODO: Consider turning on by default when we can detect nr of streams. + * + * OpenCL timing is enabled by default and can be disabled by + * GMX_DISABLE_GPU_TIMING environment variable. + * + * Timing is disabled in SYCL. + */ + nb->bDoTime = (GMX_GPU_CUDA && (getenv("GMX_ENABLE_GPU_TIMING") != nullptr)) + || (GMX_GPU_OPENCL && (getenv("GMX_DISABLE_GPU_TIMING") == nullptr)); + + if (nb->bDoTime) + { + init_timings(nb->timings); + } + + /* init nbst */ + pmalloc(reinterpret_cast(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ)); + pmalloc(reinterpret_cast(&nb->nbst.eElec), sizeof(*nb->nbst.eElec)); + pmalloc(reinterpret_cast(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift)); + + init_plist(nb->plist[InteractionLocality::Local]); + + /* local/non-local GPU streams */ + GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal), + "Local non-bonded stream should be initialized to use GPU for non-bonded."); + const DeviceStream& localStream = deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal); + nb->deviceStreams[InteractionLocality::Local] = &localStream; + // In general, it's not strictly necessary to use 2 streams for SYCL, since they are + // out-of-order. But for the time being, it will be less disruptive to keep them. + if (nb->bUseTwoStreams) + { + init_plist(nb->plist[InteractionLocality::NonLocal]); + + GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal), + "Non-local non-bonded stream should be initialized to use GPU for " + "non-bonded with domain decomposition."); + nb->deviceStreams[InteractionLocality::NonLocal] = + &deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal); + } + + const nbnxn_atomdata_t::Params& nbatParams = nbat->params(); + const DeviceContext& deviceContext = *nb->deviceContext_; + + initNbparam(nb->nbparam, *ic, listParams, nbatParams, deviceContext); + initAtomdataFirst(nb->atdat, nbatParams.numTypes, deviceContext, localStream); + + gpu_init_platform_specific(nb); + + if (debug) + { + fprintf(debug, "Initialized NBNXM GPU data structures.\n"); + } + + return nb; +} + //! This function is documented in the header file void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc) { @@ -364,8 +551,14 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) { freeDeviceBuffer(&atdat->f); freeDeviceBuffer(&atdat->xq); - freeDeviceBuffer(&atdat->ljComb); - freeDeviceBuffer(&atdat->atomTypes); + if (useLjCombRule(nb->nbparam->vdwType)) + { + freeDeviceBuffer(&atdat->ljComb); + } + else + { + freeDeviceBuffer(&atdat->atomTypes); + } } @@ -467,20 +660,20 @@ bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) || (nb->nbparam->elecType == ElecType::EwaldAnaTwin)); } -enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t* ic, +enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t& ic, const DeviceInformation& deviceInfo) { - if (ic->eeltype == CoulombInteractionType::Cut) + if (ic.eeltype == CoulombInteractionType::Cut) { return ElecType::Cut; } - else if (EEL_RF(ic->eeltype)) + else if (EEL_RF(ic.eeltype)) { return ElecType::RF; } - else if ((EEL_PME(ic->eeltype) || ic->eeltype == CoulombInteractionType::Ewald)) + else if ((EEL_PME(ic.eeltype) || ic.eeltype == CoulombInteractionType::Ewald)) { - return nbnxn_gpu_pick_ewald_kernel_type(*ic, deviceInfo); + return nbnxn_gpu_pick_ewald_kernel_type(ic, deviceInfo); } else { @@ -488,16 +681,16 @@ enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t* ic GMX_THROW(gmx::InconsistentInputError( gmx::formatString("The requested electrostatics type %s is not implemented in " "the GPU accelerated kernels!", - enumValueToString(ic->eeltype)))); + enumValueToString(ic.eeltype)))); } } -enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t* ic, LJCombinationRule ljCombinationRule) +enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t& ic, LJCombinationRule ljCombinationRule) { - if (ic->vdwtype == VanDerWaalsType::Cut) + if (ic.vdwtype == VanDerWaalsType::Cut) { - switch (ic->vdw_modifier) + switch (ic.vdw_modifier) { case InteractionModifiers::None: case InteractionModifiers::PotShift: @@ -518,12 +711,12 @@ enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t* ic, LJCombinat GMX_THROW(gmx::InconsistentInputError( gmx::formatString("The requested VdW interaction modifier %s is not " "implemented in the GPU accelerated kernels!", - enumValueToString(ic->vdw_modifier)))); + enumValueToString(ic.vdw_modifier)))); } } - else if (ic->vdwtype == VanDerWaalsType::Pme) + else if (ic.vdwtype == VanDerWaalsType::Pme) { - if (ic->ljpme_comb_rule == LongRangeVdW::Geom) + if (ic.ljpme_comb_rule == LongRangeVdW::Geom) { assert(ljCombinationRule == LJCombinationRule::Geometric); return VdwType::EwaldGeom; @@ -538,7 +731,7 @@ enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t* ic, LJCombinat { GMX_THROW(gmx::InconsistentInputError(gmx::formatString( "The requested VdW type %s is not implemented in the GPU accelerated kernels!", - enumValueToString(ic->vdwtype)))); + enumValueToString(ic.vdwtype)))); } } diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h index 36efd356b9..333b059a63 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h @@ -44,6 +44,7 @@ #ifndef GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H #define GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H +class DeviceContext; struct interaction_const_t; struct NBParamGpu; struct PairlistParams; @@ -73,7 +74,7 @@ enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unu /*! \brief Copies all parameters related to the cut-off from ic to nbp */ -void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams); +void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const PairlistParams& listParams); /*! \brief Initializes the pair list data structure. */ @@ -82,6 +83,8 @@ void init_plist(gpu_plist* pl); /*! \brief Initializes the timings data structure. */ void init_timings(gmx_wallclock_gpu_nbnxn_t* t); +void gpu_init_platform_specific(NbnxmGpu* nb); + } // namespace Nbnxm #endif // GMX_NBNXM_NBNXM_GPU_DATA_MGMT_H diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index f666910d12..08da7ba983 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -52,7 +52,6 @@ #include -#include "gromacs/gpu_utils/device_stream_manager.h" #include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/hardware/device_management.h" @@ -99,104 +98,6 @@ namespace Nbnxm */ static unsigned int gpu_min_ci_balanced_factor = 50; - -/*! \brief Initializes the atomdata structure first time, it only gets filled at - pair-search. - */ -static void init_atomdata_first(NBAtomData* ad, - int ntypes, - const DeviceContext& deviceContext, - const DeviceStream& localStream) -{ - ad->numTypes = ntypes; - - allocateDeviceBuffer(&ad->shiftVec, SHIFTS, deviceContext); - ad->shiftVecUploaded = false; - - allocateDeviceBuffer(&ad->fShift, SHIFTS, deviceContext); - allocateDeviceBuffer(&ad->eLJ, 1, deviceContext); - allocateDeviceBuffer(&ad->eElec, 1, deviceContext); - - clearDeviceBufferAsync(&ad->fShift, 0, SHIFTS, localStream); - clearDeviceBufferAsync(&ad->eElec, 0, 1, localStream); - clearDeviceBufferAsync(&ad->eLJ, 0, 1, localStream); - - /* initialize to nullptr pointers to data that is not allocated here and will - need reallocation in nbnxn_gpu_init_atomdata */ - ad->xq = nullptr; - ad->f = nullptr; - - /* size -1 indicates that the respective array hasn't been initialized yet */ - ad->numAtoms = -1; - ad->numAtomsAlloc = -1; -} - - -/*! \brief Initializes the nonbonded parameter data structure. - */ -static void init_nbparam(NBParamGpu* nbp, - const interaction_const_t* ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t::Params& nbatParams, - const DeviceContext& deviceContext) -{ - set_cutoff_parameters(nbp, ic, listParams); - - nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.ljCombinationRule); - nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic, deviceContext.deviceInfo()); - - if (ic->vdwtype == VanDerWaalsType::Pme) - { - if (ic->ljpme_comb_rule == LongRangeVdW::Geom) - { - GMX_ASSERT(nbatParams.ljCombinationRule == LJCombinationRule::Geometric, - "Combination rule mismatch!"); - } - else - { - GMX_ASSERT(nbatParams.ljCombinationRule == LJCombinationRule::LorentzBerthelot, - "Combination rule mismatch!"); - } - } - /* generate table for PME */ - nbp->coulomb_tab = nullptr; - if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) - { - GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); - } - else - { - allocateDeviceBuffer(&nbp->coulomb_tab, 1, deviceContext); - } - - { - /* set up LJ parameter lookup table */ - static_assert(sizeof(Float2) == 2 * sizeof(decltype(*nbatParams.nbfp.data())), - "Mismatch in the size of host / device data types"); - DeviceBuffer nbfp; - initParamLookupTable(&nbfp, - nullptr, - reinterpret_cast(nbatParams.nbfp.data()), - nbatParams.numTypes * nbatParams.numTypes, - deviceContext); - nbp->nbfp = nbfp; - - if (ic->vdwtype == VanDerWaalsType::Pme) - { - static_assert(sizeof(Float2) == 2 * sizeof(decltype(*nbatParams.nbfp_comb.data())), - "Mismatch in the size of host / device data types"); - DeviceBuffer nbfp_comb; - initParamLookupTable(&nbfp_comb, - nullptr, - reinterpret_cast(nbatParams.nbfp_comb.data()), - nbatParams.numTypes, - deviceContext); - nbp->nbfp_comb = nbfp_comb; - } - } -} - /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name) { @@ -238,70 +139,11 @@ static void nbnxn_gpu_init_kernels(NbnxmGpu* nb) nbnxn_gpu_create_kernel(nb, "nbnxn_kernel_prune_rolling_opencl"); } -//! This function is documented in the header file -NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, - const interaction_const_t* ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t* nbat, - const bool bLocalAndNonlocal) +void gpu_init_platform_specific(NbnxmGpu* nb) { - GMX_ASSERT(ic, "Need a valid interaction constants object"); - - auto nb = new NbnxmGpu(); - nb->deviceContext_ = &deviceStreamManager.context(); - snew(nb->atdat, 1); - snew(nb->nbparam, 1); - snew(nb->plist[InteractionLocality::Local], 1); - if (bLocalAndNonlocal) - { - snew(nb->plist[InteractionLocality::NonLocal], 1); - } - - nb->bUseTwoStreams = bLocalAndNonlocal; - - nb->timers = new Nbnxm::GpuTimers(); - snew(nb->timings, 1); - /* set device info, just point it to the right GPU among the detected ones */ nb->dev_rundata = new gmx_device_runtime_data_t(); - /* init nbst */ - pmalloc(reinterpret_cast(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ)); - pmalloc(reinterpret_cast(&nb->nbst.eElec), sizeof(*nb->nbst.eElec)); - pmalloc(reinterpret_cast(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift)); - - init_plist(nb->plist[InteractionLocality::Local]); - - /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */ - nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); - - /* local/non-local GPU streams */ - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal), - "Local non-bonded stream should be initialized to use GPU for non-bonded."); - const DeviceStream& localStream = deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal); - nb->deviceStreams[InteractionLocality::Local] = &localStream; - - if (nb->bUseTwoStreams) - { - init_plist(nb->plist[InteractionLocality::NonLocal]); - - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal), - "Non-local non-bonded stream should be initialized to use GPU for " - "non-bonded with domain decomposition."); - nb->deviceStreams[InteractionLocality::NonLocal] = - &deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal); - } - - if (nb->bDoTime) - { - init_timings(nb->timings); - } - - const nbnxn_atomdata_t::Params& nbatParams = nbat->params(); - const DeviceContext& deviceContext = *nb->deviceContext_; - init_atomdata_first(nb->atdat, nbatParams.numTypes, deviceContext, localStream); - init_nbparam(nb->nbparam, ic, listParams, nbatParams, deviceContext); - /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var. * TODO: decide about NVIDIA */ @@ -316,13 +158,6 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, */ nbnxn_gpu_compile_kernels(nb); nbnxn_gpu_init_kernels(nb); - - if (debug) - { - fprintf(debug, "Initialized OpenCL data structures.\n"); - } - - return nb; } //! This function is documented in the header file @@ -401,6 +236,12 @@ void gpu_free(NbnxmGpu* nb) return; } + delete nb->timers; + sfree(nb->timings); + + NBAtomData* atdat = nb->atdat; + NBParamGpu* nbparam = nb->nbparam; + /* Free kernels */ // NOLINTNEXTLINE(bugprone-sizeof-expression) int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]); @@ -424,16 +265,31 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&(nb->atdat->eLJ)); freeDeviceBuffer(&(nb->atdat->eElec)); freeDeviceBuffer(&(nb->atdat->fShift)); - freeDeviceBuffer(&(nb->atdat->ljComb)); - freeDeviceBuffer(&(nb->atdat->atomTypes)); freeDeviceBuffer(&(nb->atdat->shiftVec)); - sfree(nb->atdat); + if (useLjCombRule(nb->nbparam->vdwType)) + { + freeDeviceBuffer(&atdat->ljComb); + } + else + { + freeDeviceBuffer(&atdat->atomTypes); + } /* Free nbparam */ - freeDeviceBuffer(&(nb->nbparam->nbfp)); - freeDeviceBuffer(&(nb->nbparam->nbfp_comb)); - freeDeviceBuffer(&(nb->nbparam->coulomb_tab)); - sfree(nb->nbparam); + if (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin) + { + destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); + } + + if (!useLjCombRule(nb->nbparam->vdwType)) + { + destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj); + } + + if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB) + { + destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj); + } /* Free plist */ auto* plist = nb->plist[InteractionLocality::Local]; @@ -441,7 +297,7 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&plist->cj4); freeDeviceBuffer(&plist->imask); freeDeviceBuffer(&plist->excl); - sfree(plist); + delete plist; if (nb->bUseTwoStreams) { auto* plist_nl = nb->plist[InteractionLocality::NonLocal]; @@ -449,7 +305,7 @@ void gpu_free(NbnxmGpu* nb) freeDeviceBuffer(&plist_nl->cj4); freeDeviceBuffer(&plist_nl->imask); freeDeviceBuffer(&plist_nl->excl); - sfree(plist_nl); + delete plist_nl; } /* Free nbst */ @@ -465,9 +321,8 @@ void gpu_free(NbnxmGpu* nb) freeGpuProgram(nb->dev_rundata->program); delete nb->dev_rundata; - /* Free timers and timings */ - delete nb->timers; - sfree(nb->timings); + delete atdat; + delete nbparam; delete nb; if (debug) diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp index 2f37a0c011..3998f833b5 100644 --- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp @@ -41,7 +41,6 @@ */ #include "gmxpre.h" -#include "gromacs/gpu_utils/device_stream_manager.h" #include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/mdtypes/interaction_const.h" @@ -57,137 +56,9 @@ namespace Nbnxm { -/*! \brief Initialize \p atomdata first time; it only gets filled at pair-search. */ -static void initAtomdataFirst(NBAtomData* atomdata, - int numTypes, - const DeviceContext& deviceContext, - const DeviceStream& localStream) +void gpu_init_platform_specific(NbnxmGpu* /* nb */) { - atomdata->numTypes = numTypes; - allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext); - atomdata->shiftVecUploaded = false; - - allocateDeviceBuffer(&atomdata->fShift, SHIFTS, deviceContext); - allocateDeviceBuffer(&atomdata->eLJ, 1, deviceContext); - allocateDeviceBuffer(&atomdata->eElec, 1, deviceContext); - - clearDeviceBufferAsync(&atomdata->fShift, 0, SHIFTS, localStream); - clearDeviceBufferAsync(&atomdata->eElec, 0, 1, localStream); - clearDeviceBufferAsync(&atomdata->eLJ, 0, 1, localStream); - - /* initialize to nullptr pointers to data that is not allocated here and will - need reallocation in later */ - atomdata->xq = nullptr; - atomdata->f = nullptr; - - /* size -1 indicates that the respective array hasn't been initialized yet */ - atomdata->numAtoms = -1; - atomdata->numAtomsAlloc = -1; -} - -/*! \brief Initialize the nonbonded parameter data structure. */ -static void initNbparam(NBParamGpu* nbp, - const interaction_const_t& ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t::Params& nbatParams, - const DeviceContext& deviceContext) -{ - const int numTypes = nbatParams.numTypes; - - set_cutoff_parameters(nbp, &ic, listParams); - - nbp->vdwType = nbnxmGpuPickVdwKernelType(&ic, nbatParams.ljCombinationRule); - nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(&ic, deviceContext.deviceInfo()); - - /* generate table for PME */ - nbp->coulomb_tab = nullptr; - if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) - { - GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, deviceContext); - } - - /* set up LJ parameter lookup table */ - if (!useLjCombRule(nbp->vdwType)) - { - static_assert(sizeof(decltype(nbp->nbfp)) == 2 * sizeof(decltype(*nbatParams.nbfp.data())), - "Mismatch in the size of host / device data types"); - initParamLookupTable(&nbp->nbfp, - &nbp->nbfp_texobj, - reinterpret_cast(nbatParams.nbfp.data()), - numTypes * numTypes, - deviceContext); - } - - /* set up LJ-PME parameter lookup table */ - if (ic.vdwtype == VanDerWaalsType::Pme) - { - static_assert(sizeof(decltype(nbp->nbfp_comb)) - == 2 * sizeof(decltype(*nbatParams.nbfp_comb.data())), - "Mismatch in the size of host / device data types"); - initParamLookupTable(&nbp->nbfp_comb, - &nbp->nbfp_comb_texobj, - reinterpret_cast(nbatParams.nbfp_comb.data()), - numTypes, - deviceContext); - } -} - -NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager, - const interaction_const_t* ic, - const PairlistParams& listParams, - const nbnxn_atomdata_t* nbat, - const bool bLocalAndNonlocal) -{ - auto* nb = new NbnxmGpu(); - nb->deviceContext_ = &deviceStreamManager.context(); - nb->atdat = new NBAtomData; - nb->nbparam = new NBParamGpu; - nb->plist[InteractionLocality::Local] = new Nbnxm::gpu_plist; - if (bLocalAndNonlocal) - { - nb->plist[InteractionLocality::NonLocal] = new Nbnxm::gpu_plist; - } - - nb->bUseTwoStreams = bLocalAndNonlocal; - - nb->timers = nullptr; - nb->timings = nullptr; - - /* init nbst */ - pmalloc(reinterpret_cast(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ)); - pmalloc(reinterpret_cast(&nb->nbst.eElec), sizeof(*nb->nbst.eElec)); - pmalloc(reinterpret_cast(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift)); - - init_plist(nb->plist[InteractionLocality::Local]); - - /* local/non-local GPU streams */ - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal), - "Local non-bonded stream should be initialized to use GPU for non-bonded."); - const DeviceStream& localStream = deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal); - nb->deviceStreams[InteractionLocality::Local] = &localStream; - // In general, it's not strictly necessary to use 2 streams for SYCL, since they are - // out-of-order. But for the time being, it will be less disruptive to keep them. - if (nb->bUseTwoStreams) - { - init_plist(nb->plist[InteractionLocality::NonLocal]); - - GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal), - "Non-local non-bonded stream should be initialized to use GPU for " - "non-bonded with domain decomposition."); - nb->deviceStreams[InteractionLocality::NonLocal] = - &deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal); - } - - nb->bDoTime = false; - - const nbnxn_atomdata_t::Params& nbatParams = nbat->params(); - const DeviceContext& deviceContext = *nb->deviceContext_; - - initNbparam(nb->nbparam, *ic, listParams, nbatParams, deviceContext); - initAtomdataFirst(nb->atdat, nbatParams.numTypes, deviceContext, localStream); - - return nb; + // Nothing specific in SYCL } void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) @@ -218,11 +89,13 @@ void gpu_free(NbnxmGpu* nb) return; } + delete nb->timers; + sfree(nb->timings); + NBAtomData* atdat = nb->atdat; NBParamGpu* nbparam = nb->nbparam; - if ((!nbparam->coulomb_tab) - && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin)) + if (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin) { destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); } -- 2.22.0