add_library(mdlib INTERFACE)
file(GLOB MDLIB_SOURCES *.cpp)
-# To avoid listing all the necessary files manually, we will remove SYCL-specfific files here:
+# To avoid listing all the necessary files manually, we will remove SYCL-specific files here:
list(REMOVE_ITEM MDLIB_SOURCES
+ ${CMAKE_CURRENT_SOURCE_DIR}/gpuforcereduction_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_sycl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp
if(GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
leapfrog_gpu.cu
+ gpuforcereduction_impl.cpp
+ gpuforcereduction_impl_internal.cu
lincs_gpu.cpp
lincs_gpu_internal.cu
settle_gpu.cpp
settle_gpu_internal.cu
update_constrain_gpu_impl.cu
- gpuforcereduction_impl.cu
)
_gmx_add_files_to_property(CUDA_SOURCES
+ gpuforcereduction_impl.cpp
lincs_gpu.cpp
settle_gpu.cpp
)
#include <memory>
+#include "config.h"
+
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/timing/wallcycle.h"
namespace gmx
{
+#define HAVE_GPU_FORCE_REDUCTION (GMX_GPU_CUDA)
+
/*! \internal
* \brief Manages the force reduction directly in GPU memory
*
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr);
+ void registerRvecForce(DeviceBuffer<RVec> forcePtr);
/*! \brief Add a dependency for this force reduction
*
*/
/*! \internal \file
*
- * \brief May be used to implement force reduction interfaces for non-GPU builds.
+ * \brief Implements backend-agnostic GPU Force Reduction functions
*
* \author Alan Gray <alang@nvidia.com>
*
#include "gmxpre.h"
-#include "config.h"
+#include "gpuforcereduction_impl.h"
-#include "gpuforcereduction.h"
-
-#if !GMX_GPU_CUDA
+#include "gromacs/gpu_utils/device_stream.h"
+#include "gromacs/gpu_utils/devicebuffer.h"
+#if GMX_GPU_CUDA
+# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#elif GMX_GPU_SYCL
+# include "gromacs/gpu_utils/gpueventsynchronizer_sycl.h"
+#endif
+#include "gromacs/mdlib/gpuforcereduction_impl_internal.h"
+#include "gromacs/utility/gmxassert.h"
namespace gmx
{
-class GpuForceReduction::Impl
+GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ gmx_wallcycle* wcycle) :
+ baseForce_(),
+ deviceContext_(deviceContext),
+ deviceStream_(deviceStream),
+ nbnxmForceToAdd_(),
+ rvecForceToAdd_(),
+ wcycle_(wcycle)
+{
+}
+
+void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3> baseForcePtr,
+ const int numAtoms,
+ ArrayRef<const int> cell,
+ const int atomStart,
+ const bool accumulate,
+ GpuEventSynchronizer* completionMarker)
+{
+ GMX_ASSERT((baseForcePtr != nullptr), "Input base force for reduction has no data");
+ baseForce_ = baseForcePtr;
+ numAtoms_ = numAtoms;
+ atomStart_ = atomStart;
+ accumulate_ = static_cast<int>(accumulate);
+ completionMarker_ = completionMarker;
+ cellInfo_.cell = cell.data();
+
+ wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+ reallocateDeviceBuffer(
+ &cellInfo_.d_cell, numAtoms_, &cellInfo_.cellSize, &cellInfo_.cellSizeAlloc, deviceContext_);
+ copyToDeviceBuffer(&cellInfo_.d_cell,
+ &(cellInfo_.cell[atomStart]),
+ 0,
+ numAtoms_,
+ deviceStream_,
+ GpuApiCallBehavior::Async,
+ nullptr);
+ wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
+
+ dependencyList_.clear();
+};
+
+void GpuForceReduction::Impl::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
{
+ GMX_ASSERT(forcePtr, "Input force for reduction has no data");
+ nbnxmForceToAdd_ = forcePtr;
};
-GpuForceReduction::GpuForceReduction(const DeviceContext& /* deviceContext */,
- const DeviceStream& /* deviceStream */,
- gmx_wallcycle* /*wcycle*/) :
- impl_(nullptr)
+void GpuForceReduction::Impl::registerRvecForce(DeviceBuffer<RVec> forcePtr)
+{
+ GMX_ASSERT(forcePtr, "Input force for reduction has no data");
+ rvecForceToAdd_ = forcePtr;
+};
+
+void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* const dependency)
+{
+ dependencyList_.push_back(dependency);
+}
+
+void GpuForceReduction::Impl::execute()
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+ wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
+
+ if (numAtoms_ == 0)
+ {
+ return;
+ }
+
+ GMX_ASSERT(nbnxmForceToAdd_, "Nbnxm force for reduction has no data");
+
+ // Enqueue wait on all dependencies passed
+ for (auto* synchronizer : dependencyList_)
+ {
+ synchronizer->enqueueWaitEvent(deviceStream_);
+ }
+
+ const bool addRvecForce = static_cast<bool>(rvecForceToAdd_); // True iff initialized
+
+ launchForceReductionKernel(numAtoms_,
+ atomStart_,
+ addRvecForce,
+ accumulate_,
+ nbnxmForceToAdd_,
+ rvecForceToAdd_,
+ baseForce_,
+ cellInfo_.d_cell,
+ deviceStream_);
+
+ // Mark that kernel has been launched
+ if (completionMarker_ != nullptr)
+ {
+ completionMarker_->markEvent(deviceStream_);
+ }
+
+ wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
+ wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
}
-// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::reinit(DeviceBuffer<RVec> /*baseForcePtr*/,
- const int /*numAtoms*/,
- ArrayRef<const int> /*cell*/,
- const int /*atomStart*/,
- const bool /*accumulate*/,
- GpuEventSynchronizer* /*completionMarker*/)
+GpuForceReduction::Impl::~Impl() = default;
+
+GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ gmx_wallcycle* wcycle) :
+ impl_(new Impl(deviceContext, deviceStream, wcycle))
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
}
-// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+ impl_->registerNbnxmForce(forcePtr);
}
-// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> /* forcePtr */)
+void GpuForceReduction::registerRvecForce(DeviceBuffer<RVec> forcePtr)
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+ impl_->registerRvecForce(forcePtr);
}
-// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::addDependency(GpuEventSynchronizer* const /* dependency */)
+void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+ impl_->addDependency(dependency);
}
-// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::reinit(DeviceBuffer<RVec> baseForcePtr,
+ const int numAtoms,
+ ArrayRef<const int> cell,
+ const int atomStart,
+ const bool accumulate,
+ GpuEventSynchronizer* completionMarker)
+{
+ impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker);
+}
void GpuForceReduction::execute()
{
- GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+ impl_->execute();
}
GpuForceReduction::~GpuForceReduction() = default;
} // namespace gmx
-
-#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2020,2021, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief Implements GPU Force Reduction using CUDA
- *
- * \author Alan Gray <alang@nvidia.com>
- *
- * \ingroup module_mdlib
- */
-
-#include "gmxpre.h"
-
-#include "gpuforcereduction_impl.h"
-
-#include <stdio.h>
-
-#include "gromacs/gpu_utils/cudautils.cuh"
-#include "gromacs/gpu_utils/device_context.h"
-#include "gromacs/gpu_utils/devicebuffer.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
-#include "gromacs/gpu_utils/typecasts.cuh"
-#include "gromacs/gpu_utils/vectype_ops.cuh"
-#include "gromacs/utility/gmxassert.h"
-
-#include "gpuforcereduction.h"
-
-namespace gmx
-{
-
-constexpr static int c_threadsPerBlock = 128;
-
-typedef struct rvecDeviceForceData rvecDeviceForceData_t;
-
-
-template<bool addRvecForce, bool accumulateForce>
-static __global__ void reduceKernel(const float3* __restrict__ gm_nbnxmForce,
- const float3* __restrict__ rvecForceToAdd,
- float3* gm_fTotal,
- const int* gm_cell,
- const int numAtoms)
-{
-
- // map particle-level parallelism to 1D CUDA thread and block index
- const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
-
- // perform addition for each particle
- if (threadIndex < numAtoms)
- {
-
- float3* gm_fDest = &gm_fTotal[threadIndex];
- float3 temp;
-
- // Accumulate or set nbnxm force
- if (accumulateForce)
- {
- temp = *gm_fDest;
- temp += gm_nbnxmForce[gm_cell[threadIndex]];
- }
- else
- {
- temp = gm_nbnxmForce[gm_cell[threadIndex]];
- }
-
- if (addRvecForce)
- {
- temp += rvecForceToAdd[threadIndex];
- }
-
- *gm_fDest = temp;
- }
- return;
-}
-
-GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
- const DeviceStream& deviceStream,
- gmx_wallcycle* wcycle) :
- baseForce_(nullptr),
- deviceContext_(deviceContext),
- deviceStream_(deviceStream),
- nbnxmForceToAdd_(nullptr),
- rvecForceToAdd_(nullptr),
- wcycle_(wcycle){};
-
-void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3> baseForcePtr,
- const int numAtoms,
- ArrayRef<const int> cell,
- const int atomStart,
- const bool accumulate,
- GpuEventSynchronizer* completionMarker)
-{
- GMX_ASSERT((baseForcePtr != nullptr), "Input base force for reduction has no data");
- baseForce_ = baseForcePtr;
- numAtoms_ = numAtoms;
- atomStart_ = atomStart;
- accumulate_ = accumulate;
- completionMarker_ = completionMarker;
- cellInfo_.cell = cell.data();
-
- wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
- reallocateDeviceBuffer(
- &cellInfo_.d_cell, numAtoms_, &cellInfo_.cellSize, &cellInfo_.cellSizeAlloc, deviceContext_);
- copyToDeviceBuffer(&cellInfo_.d_cell,
- &(cellInfo_.cell[atomStart]),
- 0,
- numAtoms_,
- deviceStream_,
- GpuApiCallBehavior::Async,
- nullptr);
- wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
-
- dependencyList_.clear();
-};
-
-void GpuForceReduction::Impl::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
-{
- GMX_ASSERT((forcePtr != nullptr), "Input force for reduction has no data");
- nbnxmForceToAdd_ = forcePtr;
-};
-
-void GpuForceReduction::Impl::registerRvecForce(DeviceBuffer<RVec> forcePtr)
-{
- GMX_ASSERT((forcePtr != nullptr), "Input force for reduction has no data");
- rvecForceToAdd_ = forcePtr;
-};
-
-void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* const dependency)
-{
- dependencyList_.push_back(dependency);
-}
-
-void GpuForceReduction::Impl::execute()
-{
- wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
- wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
-
- if (numAtoms_ == 0)
- {
- return;
- }
-
- GMX_ASSERT((nbnxmForceToAdd_ != nullptr), "Nbnxm force for reduction has no data");
-
- // Enqueue wait on all dependencies passed
- for (auto const synchronizer : dependencyList_)
- {
- synchronizer->enqueueWaitEvent(deviceStream_);
- }
-
- float3* d_baseForce = &(asFloat3(baseForce_)[atomStart_]);
- float3* d_nbnxmForce = asFloat3(nbnxmForceToAdd_);
- float3* d_rvecForceToAdd = &(asFloat3(rvecForceToAdd_)[atomStart_]);
-
- // Configure and launch kernel
- KernelLaunchConfig config;
- config.blockSize[0] = c_threadsPerBlock;
- config.blockSize[1] = 1;
- config.blockSize[2] = 1;
- config.gridSize[0] = ((numAtoms_ + 1) + c_threadsPerBlock - 1) / c_threadsPerBlock;
- config.gridSize[1] = 1;
- config.gridSize[2] = 1;
- config.sharedMemorySize = 0;
-
- auto kernelFn = (rvecForceToAdd_ != nullptr)
- ? (accumulate_ ? reduceKernel<true, true> : reduceKernel<true, false>)
- : (accumulate_ ? reduceKernel<false, true> : reduceKernel<false, false>);
-
- const auto kernelArgs = prepareGpuKernelArguments(
- kernelFn, config, &d_nbnxmForce, &d_rvecForceToAdd, &d_baseForce, &cellInfo_.d_cell, &numAtoms_);
-
- launchGpuKernel(kernelFn, config, deviceStream_, nullptr, "Force Reduction", kernelArgs);
-
- // Mark that kernel has been launched
- if (completionMarker_ != nullptr)
- {
- completionMarker_->markEvent(deviceStream_);
- }
-
- wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
- wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
-}
-
-GpuForceReduction::Impl::~Impl(){};
-
-GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
- const DeviceStream& deviceStream,
- gmx_wallcycle* wcycle) :
- impl_(new Impl(deviceContext, deviceStream, wcycle))
-{
-}
-
-void GpuForceReduction::registerNbnxmForce(DeviceBuffer<Float3> forcePtr)
-{
- impl_->registerNbnxmForce(forcePtr);
-}
-
-void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr)
-{
- impl_->registerRvecForce(forcePtr);
-}
-
-void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
-{
- impl_->addDependency(dependency);
-}
-
-void GpuForceReduction::reinit(DeviceBuffer<RVec> baseForcePtr,
- const int numAtoms,
- ArrayRef<const int> cell,
- const int atomStart,
- const bool accumulate,
- GpuEventSynchronizer* completionMarker)
-{
- impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker);
-}
-void GpuForceReduction::execute()
-{
- impl_->execute();
-}
-
-GpuForceReduction::~GpuForceReduction() = default;
-
-} // namespace gmx
DeviceBuffer<RVec> nbnxmForceToAdd_;
//! Rvec-format force to be added in this reduction
DeviceBuffer<RVec> rvecForceToAdd_;
- //! event to be marked when redcution launch has been completed
+ //! event to be marked when reduction launch has been completed
GpuEventSynchronizer* completionMarker_ = nullptr;
//! The wallclock counter
gmx_wallcycle* wcycle_ = nullptr;
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Implements GPU Force Reduction using CUDA
+ *
+ * \author Alan Gray <alang@nvidia.com>
+ *
+ * \ingroup module_mdlib
+ */
+
+#include "gmxpre.h"
+
+#include "gpuforcereduction_impl_internal.h"
+
+#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
+#include "gromacs/gpu_utils/vectype_ops.cuh"
+
+namespace gmx
+{
+
+constexpr static int c_threadsPerBlock = 128;
+
+template<bool addRvecForce, bool accumulateForce>
+static __global__ void reduceKernel(const float3* __restrict__ gm_nbnxmForce,
+ const float3* __restrict__ rvecForceToAdd,
+ float3* gm_fTotal,
+ const int* gm_cell,
+ const int numAtoms)
+{
+
+ // map particle-level parallelism to 1D CUDA thread and block index
+ const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
+
+ // perform addition for each particle
+ if (threadIndex < numAtoms)
+ {
+
+ float3* gm_fDest = &gm_fTotal[threadIndex];
+ float3 temp;
+
+ // Accumulate or set nbnxm force
+ if (accumulateForce)
+ {
+ temp = *gm_fDest;
+ temp += gm_nbnxmForce[gm_cell[threadIndex]];
+ }
+ else
+ {
+ temp = gm_nbnxmForce[gm_cell[threadIndex]];
+ }
+
+ if (addRvecForce)
+ {
+ temp += rvecForceToAdd[threadIndex];
+ }
+
+ *gm_fDest = temp;
+ }
+ return;
+}
+
+void launchForceReductionKernel(int numAtoms,
+ int atomStart,
+ bool addRvecForce,
+ bool accumulate,
+ const DeviceBuffer<Float3> d_nbnxmForceToAdd,
+ const DeviceBuffer<Float3> d_rvecForceToAdd,
+ DeviceBuffer<Float3> d_baseForce,
+ DeviceBuffer<int> d_cell,
+ const DeviceStream& deviceStream)
+{
+ float3* d_baseForcePtr = &(asFloat3(d_baseForce)[atomStart]);
+ float3* d_nbnxmForcePtr = asFloat3(d_nbnxmForceToAdd);
+ float3* d_rvecForceToAddPtr = &(asFloat3(d_rvecForceToAdd)[atomStart]);
+
+ // Configure and launch kernel
+ KernelLaunchConfig config;
+ config.blockSize[0] = c_threadsPerBlock;
+ config.blockSize[1] = 1;
+ config.blockSize[2] = 1;
+ config.gridSize[0] = ((numAtoms + 1) + c_threadsPerBlock - 1) / c_threadsPerBlock;
+ config.gridSize[1] = 1;
+ config.gridSize[2] = 1;
+ config.sharedMemorySize = 0;
+
+ auto kernelFn = addRvecForce
+ ? (accumulate ? reduceKernel<true, true> : reduceKernel<true, false>)
+ : (accumulate ? reduceKernel<false, true> : reduceKernel<false, false>);
+
+ const auto kernelArgs = prepareGpuKernelArguments(
+ kernelFn, config, &d_nbnxmForcePtr, &d_rvecForceToAddPtr, &d_baseForcePtr, &d_cell, &numAtoms);
+
+ launchGpuKernel(kernelFn, config, deviceStream, nullptr, "Force Reduction", kernelArgs);
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Declares vendor-specific function to launch force reduction kernel
+ *
+ * \author Andrey Alekseenko <al42and@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
+
+class DeviceStream;
+
+namespace gmx
+{
+
+/*! \brief Backend-specific function to launch GPU Force Reduction kernel.
+ *
+ * In pseudocode:
+ *
+ * \code{.cpp}
+ * for (int i = 0; i < numAtoms; i++) {
+ * totalForce = d_nbnxmForceToAdd[d_cell[i]]
+ * if (accumulate)
+ * totalForce += d_baseForce[atomStart + i]
+ * if (addRvecForce)
+ * totalForce += d_rvecForceToAdd[atomStart + i]
+ * d_baseForce[atomStart + i] = totalForce[i]
+ * }
+ * \endcode
+ *
+ * \param numAtoms Number of atoms subject to reduction.
+ * \param atomStart First atom index (for \p d_rvecForceToAdd and \p d_baseForce).
+ * \param addRvecForce When \c false, \p d_rvecForceToAdd is ignored.
+ * \param accumulate When \c false, the previous values of \p d_baseForce are discarded.
+ * \param d_nbnxmForceToAdd Buffer containing Nbnxm forces in Nbnxm layout.
+ * \param d_rvecForceToAdd Optional buffer containing arbitrary forces in linear layout.
+ * \param d_baseForce Destination buffer for forces in linear layout.
+ * \param d_cell Atom index to Nbnxm cell index.
+ * \param deviceStream Device stream for kernel submission.
+ */
+void launchForceReductionKernel(int numAtoms,
+ int atomStart,
+ bool addRvecForce,
+ bool accumulate,
+ DeviceBuffer<Float3> d_nbnxmForceToAdd,
+ DeviceBuffer<Float3> d_rvecForceToAdd,
+ DeviceBuffer<Float3> d_baseForce,
+ DeviceBuffer<int> d_cell,
+ const DeviceStream& deviceStream);
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief May be used to implement force reduction interfaces for non-GPU builds.
+ *
+ * \author Alan Gray <alang@nvidia.com>
+ *
+ * \ingroup module_mdlib
+ */
+
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include "gpuforcereduction.h"
+
+#if !HAVE_GPU_FORCE_REDUCTION
+
+namespace gmx
+{
+
+class GpuForceReduction::Impl
+{
+};
+
+GpuForceReduction::GpuForceReduction(const DeviceContext& /* deviceContext */,
+ const DeviceStream& /* deviceStream */,
+ gmx_wallcycle* /*wcycle*/) :
+ impl_(nullptr)
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::reinit(DeviceBuffer<RVec> /*baseForcePtr*/,
+ const int /*numAtoms*/,
+ ArrayRef<const int> /*cell*/,
+ const int /*atomStart*/,
+ const bool /*accumulate*/,
+ GpuEventSynchronizer* /*completionMarker*/)
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> /* forcePtr */)
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::addDependency(GpuEventSynchronizer* const /* dependency */)
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void GpuForceReduction::execute()
+{
+ GMX_RELEASE_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
+}
+
+GpuForceReduction::~GpuForceReduction() = default;
+
+} // namespace gmx
+
+#endif /* !HAVE_GPU_FORCE_REDUCTION */