Merge branch 'origin/release-2020' into merge-release-2020-into-master
[alexxy/gromacs.git] / src / gromacs / mdtypes / state_propagator_data_gpu.h
index b83d327916e3548d604648d80173497882c4f3e2..89e6d622ac270a39ac566501ac558da92de7934b 100644 (file)
 
 #include "locality.h"
 
+class DeviceContext;
+class DeviceStream;
 class GpuEventSynchronizer;
 struct gmx_wallcycle;
 
 namespace gmx
 {
+class DeviceStreamManager;
 
 class StatePropagatorDataGpu
 {
@@ -86,40 +89,15 @@ public:
      * ops are offloaded. This feature is currently not available in OpenCL and
      * hence these streams are not set in these builds.
      *
-     * \note In CUDA, the update stream is created in the constructor as a temporary
-     *       solution, in place until the stream manager is introduced.
-     *       Note that this makes it impossible to construct this object in CUDA
-     *       builds executing on a host without any CUDA-capable device available.
-     *
-     * \note In CUDA, \p deviceContext is unused, hence always nullptr;
-     *       all stream arguments can also be nullptr in runs where the
-     *       respective streams are not required.
-     *       In OpenCL, \p deviceContext needs to be a valid device context.
-     *       In OpenCL runs StatePropagatorDataGpu is currently only used
-     *       with PME offload, and only on ranks with PME duty. Hence, the
-     *       \p pmeStream argument needs to be a valid OpenCL queue object
-     *       which must have been created in \p deviceContext.
-     *
-     * \todo Make a \p CommandStream visible in the CPU parts of the code so we
-     *       will not have to pass a void*.
-     * \todo Make a \p DeviceContext object visible in CPU parts of the code so we
-     *       will not have to pass a void*.
-     *
-     *  \param[in] pmeStream       Device PME stream, nullptr allowed.
-     *  \param[in] localStream     Device NBNXM local stream, nullptr allowed.
-     *  \param[in] nonLocalStream  Device NBNXM non-local stream, nullptr allowed.
-     *  \param[in] deviceContext   Device context, nullptr allowed.
-     *  \param[in] transferKind    H2D/D2H transfer call behavior (synchronous or not).
-     *  \param[in] paddingSize     Padding size for coordinates buffer.
-     *  \param[in] wcycle          Wall cycle counter data.
+     *  \param[in] deviceStreamManager         Object that owns the DeviceContext and DeviceStreams.
+     *  \param[in] transferKind                H2D/D2H transfer call behavior (synchronous or not).
+     *  \param[in] allocationBlockSizeDivisor  Deterines padding size for coordinates buffer.
+     *  \param[in] wcycle                      Wall cycle counter data.
      */
-    StatePropagatorDataGpu(const void*        pmeStream,
-                           const void*        localStream,
-                           const void*        nonLocalStream,
-                           const void*        deviceContext,
-                           GpuApiCallBehavior transferKind,
-                           int                paddingSize,
-                           gmx_wallcycle*     wcycle);
+    StatePropagatorDataGpu(const DeviceStreamManager& deviceStreamManager,
+                           GpuApiCallBehavior         transferKind,
+                           int                        allocationBlockSizeDivisor,
+                           gmx_wallcycle*             wcycle);
 
     /*! \brief Constructor to use in PME-only rank and in tests.
      *
@@ -134,14 +112,14 @@ public:
      *  \param[in] pmeStream       Device PME stream, nullptr is not allowed.
      *  \param[in] deviceContext   Device context, nullptr allowed for non-OpenCL builds.
      *  \param[in] transferKind    H2D/D2H transfer call behavior (synchronous or not).
-     *  \param[in] paddingSize     Padding size for coordinates buffer.
+     *  \param[in] allocationBlockSizeDivisor Determines padding size for coordinates buffer.
      *  \param[in] wcycle          Wall cycle counter data.
      */
-    StatePropagatorDataGpu(const void*        pmeStream,
-                           const void*        deviceContext,
-                           GpuApiCallBehavior transferKind,
-                           int                paddingSize,
-                           gmx_wallcycle*     wcycle);
+    StatePropagatorDataGpu(const DeviceStream*  pmeStream,
+                           const DeviceContext& deviceContext,
+                           GpuApiCallBehavior   transferKind,
+                           int                  allocationBlockSizeDivisor,
+                           gmx_wallcycle*       wcycle);
 
     //! Move constructor
     StatePropagatorDataGpu(StatePropagatorDataGpu&& other) noexcept;
@@ -186,7 +164,7 @@ public:
      *
      *  \returns GPU positions buffer.
      */
-    DeviceBuffer<float> getCoordinates();
+    DeviceBuffer<RVec> getCoordinates();
 
     /*! \brief Copy positions to the GPU memory.
      *
@@ -247,7 +225,7 @@ public:
      *
      *  \returns GPU velocities buffer.
      */
-    DeviceBuffer<float> getVelocities();
+    DeviceBuffer<RVec> getVelocities();
 
     /*! \brief Copy velocities to the GPU memory.
      *
@@ -282,7 +260,7 @@ public:
      *
      *  \returns GPU force buffer.
      */
-    DeviceBuffer<float> getForces();
+    DeviceBuffer<RVec> getForces();
 
     /*! \brief Copy forces to the GPU memory.
      *
@@ -332,7 +310,7 @@ public:
      *
      *  \returns The device command stream to use in update-constraints.
      */
-    void* getUpdateStream();
+    const DeviceStream* getUpdateStream();
 
     /*! \brief Getter for the number of local atoms.
      *