From 97d2a2688f3a409e76e2e42024f82eb6f66f4fe6 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Thu, 18 Mar 2021 09:49:18 +0300 Subject: [PATCH] Move the atomToInteractionLocality(...) into locality.h The atomToInteractionLocality(...) does not depend on NBNXM and can naturally reside alongside the definitions of localitiy enumerations. This allows to combine haveGpuShortRangeWork(...) functions in NBNXM, avoding confuciton of having two very similar functions with different signatures. --- src/gromacs/mdtypes/locality.h | 30 ++++++++++++++++- src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 2 +- src/gromacs/nbnxm/gpu_common.h | 8 ++--- src/gromacs/nbnxm/gpu_common_utils.h | 41 ----------------------- src/gromacs/nbnxm/nbnxm.cpp | 2 +- src/gromacs/nbnxm/nbnxm_gpu.h | 10 +++--- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 12 +++---- 7 files changed, 47 insertions(+), 58 deletions(-) diff --git a/src/gromacs/mdtypes/locality.h b/src/gromacs/mdtypes/locality.h index 4805336f1c..709fcfa3ac 100644 --- a/src/gromacs/mdtypes/locality.h +++ b/src/gromacs/mdtypes/locality.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -44,6 +44,7 @@ #define GMX_MDTYPES_LOCALITY_H #include "gromacs/utility/enumerationhelpers.h" +#include "gromacs/utility/exceptions.h" namespace gmx { @@ -83,6 +84,33 @@ static const EnumerationArray c_interactionLoc { "local", "non-local" } }; +/*! \brief Convert atom locality to interaction locality. + * + * In the current implementation the this is straightforward conversion: + * local to local, non-local to non-local. + * + * \param[in] atomLocality Atom locality specifier + * \returns Interaction locality corresponding to the atom locality passed. + */ +static inline InteractionLocality atomToInteractionLocality(const AtomLocality atomLocality) +{ + + /* determine interaction locality from atom locality */ + if (atomLocality == AtomLocality::Local) + { + return InteractionLocality::Local; + } + else if (atomLocality == AtomLocality::NonLocal) + { + return InteractionLocality::NonLocal; + } + else + { + GMX_THROW(gmx::InconsistentInputError( + "Only Local and NonLocal atom locities can be converted to interaction locality.")); + } +} + } // namespace gmx #endif // GMX_MDTYPES_LOCALITY_H diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 76341b7a97..6a35676acf 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -726,7 +726,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid, const int numColumns = grid.numColumns(); const int cellOffset = grid.cellOffset(); const int numAtomsPerCell = grid.numAtomsPerCell(); - Nbnxm::InteractionLocality interactionLoc = gpuAtomToInteractionLocality(locality); + Nbnxm::InteractionLocality interactionLoc = atomToInteractionLocality(locality); const DeviceStream& deviceStream = *nb->deviceStreams[interactionLoc]; diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h index 39c6d29057..e357433f0f 100644 --- a/src/gromacs/nbnxm/gpu_common.h +++ b/src/gromacs/nbnxm/gpu_common.h @@ -196,7 +196,7 @@ static inline void gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t* timings, } /* determine interaction locality from atom locality */ - const InteractionLocality iLocality = gpuAtomToInteractionLocality(atomLocality); + const InteractionLocality iLocality = atomToInteractionLocality(atomLocality); const bool didEnergyKernels = stepWork.computeEnergy; /* only increase counter once (at local F wait) */ @@ -261,7 +261,7 @@ bool gpu_try_finish_task(NbnxmGpu* nb, GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); /* determine interaction locality from atom locality */ - const InteractionLocality iLocality = gpuAtomToInteractionLocality(aloc); + const InteractionLocality iLocality = atomToInteractionLocality(aloc); // Transfers are launched and therefore need to be waited on if: @@ -277,7 +277,7 @@ bool gpu_try_finish_task(NbnxmGpu* nb, // We skip when during the non-local phase there was actually no work to do. // This is consistent with nbnxn_gpu_launch_kernel but it also considers possible // bonded GPU work. - if ((iLocality == InteractionLocality::Local) || haveGpuShortRangeWork(*nb, iLocality)) + if ((iLocality == InteractionLocality::Local) || haveGpuShortRangeWork(nb, iLocality)) { // Query the state of the GPU stream and return early if we're not done if (completionKind == GpuTaskCompletion::Check) @@ -360,7 +360,7 @@ float gpu_wait_finish_task(NbnxmGpu* nb, gmx::ArrayRef shiftForces, gmx_wallcycle* wcycle) { - auto cycleCounter = (gpuAtomToInteractionLocality(aloc) == InteractionLocality::Local) + auto cycleCounter = (atomToInteractionLocality(aloc) == InteractionLocality::Local) ? ewcWAIT_GPU_NB_L : ewcWAIT_GPU_NB_NL; diff --git a/src/gromacs/nbnxm/gpu_common_utils.h b/src/gromacs/nbnxm/gpu_common_utils.h index 6694f2b2fb..43da174d41 100644 --- a/src/gromacs/nbnxm/gpu_common_utils.h +++ b/src/gromacs/nbnxm/gpu_common_utils.h @@ -73,47 +73,6 @@ static inline bool canSkipNonbondedWork(const NbnxmGpu& nb, InteractionLocality return (iloc == InteractionLocality::NonLocal && nb.plist[iloc]->nsci == 0); } -/*! \brief Convert atom locality to interaction locality. - * - * In the current implementation the this is straightforward conversion: - * local to local, non-local to non-local. - * - * \param[in] atomLocality Atom locality specifier - * \returns Interaction locality corresponding to the atom locality passed. - */ -static inline InteractionLocality gpuAtomToInteractionLocality(const AtomLocality atomLocality) -{ - - /* determine interaction locality from atom locality */ - if (atomLocality == AtomLocality::Local) - { - return InteractionLocality::Local; - } - else if (atomLocality == AtomLocality::NonLocal) - { - return InteractionLocality::NonLocal; - } - else - { - GMX_THROW(gmx::InconsistentInputError( - "Only Local and NonLocal atom locities can be converted to interaction locality.")); - } -} - -/*! \brief Returns true if there is GPU short-range work for the given interaction locality. - * - * Note that as, unlike nonbonded tasks, bonded tasks are not split into local/nonlocal, - * and therefore if there are GPU offloaded bonded interactions, this function will return - * true for all interaction localities. - * - * \param[inout] nb Pointer to the nonbonded GPU data structure - * \param[in] iLocality Interaction locality identifier - */ -static inline bool haveGpuShortRangeWork(const NbnxmGpu& nb, const gmx::InteractionLocality iLocality) -{ - return nb.haveWork[iLocality]; -} - /*! \brief Calculate atom range and return start index and length. * * \param[in] atomData Atom descriptor data structure diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp index 196befc92d..62febc366f 100644 --- a/src/gromacs/nbnxm/nbnxm.cpp +++ b/src/gromacs/nbnxm/nbnxm.cpp @@ -181,7 +181,7 @@ void nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const gmx::AtomLocality local /* Skip the reduction if there was no short-range GPU work to do * (either NB or both NB and bonded work). */ - if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, locality)) + if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, atomToInteractionLocality(locality))) { return; } diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index ebdba45b9d..621878d9eb 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -288,17 +288,19 @@ void setupGpuShortRangeWork(NbnxmGpu gmx_unused* nb, const gmx::GpuBonded gmx_unused* gpuBonded, gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM; -/*! \brief Returns true if there is GPU short-range work for the given atom locality. +/*! \brief Returns true if there is GPU short-range work for the given interaction locality. * * Note that as, unlike nonbonded tasks, bonded tasks are not split into local/nonlocal, * and therefore if there are GPU offloaded bonded interactions, this function will return * true for both local and nonlocal atom range. * - * \param[inout] nb Pointer to the nonbonded GPU data structure - * \param[in] aLocality Atom locality identifier + * \param[inout] nb Pointer to the nonbonded GPU data structure + * \param[in] interactionLocality Interaction locality identifier + * + * \return Whether there is short range work for a given locality. */ GPU_FUNC_QUALIFIER -bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality) +bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::InteractionLocality gmx_unused interactionLocality) GPU_FUNC_TERM_WITH_RETURN(false); /*! \brief sync CPU thread on coordinate copy to device diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index c4efe8d458..c16bb77cda 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -746,11 +746,11 @@ void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const || (gpuBonded != nullptr && gpuBonded->haveInteractions())); } -bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::AtomLocality aLocality) +bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::InteractionLocality interactionLocality) { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); - return haveGpuShortRangeWork(*nb, gpuAtomToInteractionLocality(aLocality)); + return nb->haveWork[interactionLocality]; } /*! \brief @@ -765,7 +765,7 @@ void gpu_launch_cpyback(NbnxmGpu* nb, GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); /* determine interaction locality from atom locality */ - const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality); + const InteractionLocality iloc = atomToInteractionLocality(atomLocality); GMX_ASSERT(iloc == InteractionLocality::Local || (iloc == InteractionLocality::NonLocal && nb->bNonLocalStreamDoneMarked == false), "Non-local stream is indicating that the copy back event is enqueued at the " @@ -778,7 +778,7 @@ void gpu_launch_cpyback(NbnxmGpu* nb, const DeviceStream& deviceStream = *nb->deviceStreams[iloc]; /* don't launch non-local copy-back if there was no non-local work to do */ - if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc)) + if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(nb, iloc)) { /* TODO An alternative way to signal that non-local work is complete is to use a clEnqueueMarker+clEnqueueBarrier @@ -910,7 +910,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); - const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality); + const InteractionLocality iloc = atomToInteractionLocality(atomLocality); NBAtomData* adat = nb->atdat; gpu_plist* plist = nb->plist[iloc]; @@ -928,7 +928,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom we always call the local local x+q copy (and the rest of the local work in nbnxn_gpu_launch_kernel(). */ - if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc)) + if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(nb, iloc)) { plist->haveFreshList = false; -- 2.22.0