#include "locality.h"
+class DeviceContext;
+class DeviceStream;
class GpuEventSynchronizer;
struct gmx_wallcycle;
namespace gmx
{
+class DeviceStreamManager;
class StatePropagatorDataGpu
{
* ops are offloaded. This feature is currently not available in OpenCL and
* hence these streams are not set in these builds.
*
- * \note In CUDA, the update stream is created in the constructor as a temporary
- * solution, in place until the stream manager is introduced.
- * Note that this makes it impossible to construct this object in CUDA
- * builds executing on a host without any CUDA-capable device available.
- *
- * \note In CUDA, \p deviceContext is unused, hence always nullptr;
- * all stream arguments can also be nullptr in runs where the
- * respective streams are not required.
- * In OpenCL, \p deviceContext needs to be a valid device context.
- * In OpenCL runs StatePropagatorDataGpu is currently only used
- * with PME offload, and only on ranks with PME duty. Hence, the
- * \p pmeStream argument needs to be a valid OpenCL queue object
- * which must have been created in \p deviceContext.
- *
- * \todo Make a \p CommandStream visible in the CPU parts of the code so we
- * will not have to pass a void*.
- * \todo Make a \p DeviceContext object visible in CPU parts of the code so we
- * will not have to pass a void*.
- *
- * \param[in] pmeStream Device PME stream, nullptr allowed.
- * \param[in] localStream Device NBNXM local stream, nullptr allowed.
- * \param[in] nonLocalStream Device NBNXM non-local stream, nullptr allowed.
- * \param[in] deviceContext Device context, nullptr allowed.
- * \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not).
- * \param[in] paddingSize Padding size for coordinates buffer.
- * \param[in] wcycle Wall cycle counter data.
+ * \param[in] deviceStreamManager Object that owns the DeviceContext and DeviceStreams.
+ * \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not).
+ * \param[in] allocationBlockSizeDivisor Deterines padding size for coordinates buffer.
+ * \param[in] wcycle Wall cycle counter data.
*/
- StatePropagatorDataGpu(const void* pmeStream,
- const void* localStream,
- const void* nonLocalStream,
- const void* deviceContext,
- GpuApiCallBehavior transferKind,
- int paddingSize,
- gmx_wallcycle* wcycle);
+ StatePropagatorDataGpu(const DeviceStreamManager& deviceStreamManager,
+ GpuApiCallBehavior transferKind,
+ int allocationBlockSizeDivisor,
+ gmx_wallcycle* wcycle);
/*! \brief Constructor to use in PME-only rank and in tests.
*
* \param[in] pmeStream Device PME stream, nullptr is not allowed.
* \param[in] deviceContext Device context, nullptr allowed for non-OpenCL builds.
* \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not).
- * \param[in] paddingSize Padding size for coordinates buffer.
+ * \param[in] allocationBlockSizeDivisor Determines padding size for coordinates buffer.
* \param[in] wcycle Wall cycle counter data.
*/
- StatePropagatorDataGpu(const void* pmeStream,
- const void* deviceContext,
- GpuApiCallBehavior transferKind,
- int paddingSize,
- gmx_wallcycle* wcycle);
+ StatePropagatorDataGpu(const DeviceStream* pmeStream,
+ const DeviceContext& deviceContext,
+ GpuApiCallBehavior transferKind,
+ int allocationBlockSizeDivisor,
+ gmx_wallcycle* wcycle);
//! Move constructor
StatePropagatorDataGpu(StatePropagatorDataGpu&& other) noexcept;
*
* \returns GPU positions buffer.
*/
- DeviceBuffer<float> getCoordinates();
+ DeviceBuffer<RVec> getCoordinates();
/*! \brief Copy positions to the GPU memory.
*
*
* \returns GPU velocities buffer.
*/
- DeviceBuffer<float> getVelocities();
+ DeviceBuffer<RVec> getVelocities();
/*! \brief Copy velocities to the GPU memory.
*
*
* \returns GPU force buffer.
*/
- DeviceBuffer<float> getForces();
+ DeviceBuffer<RVec> getForces();
/*! \brief Copy forces to the GPU memory.
*
*
* \returns The device command stream to use in update-constraints.
*/
- void* getUpdateStream();
+ const DeviceStream* getUpdateStream();
/*! \brief Getter for the number of local atoms.
*