Remove thread-MPI limitation for GPU direct PME-PP communication
[alexxy/gromacs.git] / src / gromacs / ewald / pme_coordinate_receiver_gpu.h
index 94aefe8501a1d15d082a04dabf33c023e7767d90..81f640df409a4110f8c71796e598114e06a2dac4 100644 (file)
@@ -83,9 +83,19 @@ public:
     void receiveCoordinatesSynchronizerFromPpCudaDirect(int ppRank);
 
     /*! \brief
-     * enqueue wait for coordinate data from PP ranks
+     * Used for lib MPI, receives co-ordinates from PP ranks
+     * \param[in] recvbuf   coordinates buffer in GPU memory
+     * \param[in] numAtoms  starting element in buffer
+     * \param[in] numBytes  number of bytes to transfer
+     * \param[in] ppRank    PP rank to send data
      */
-    void enqueueWaitReceiveCoordinatesFromPpCudaDirect();
+    void launchReceiveCoordinatesFromPpCudaMpi(DeviceBuffer<RVec> recvbuf, int numAtoms, int numBytes, int ppRank);
+
+    /*! \brief
+     * For lib MPI, wait for coordinates from PP ranks
+     * For thread MPI, enqueue PP co-ordinate transfer event into PME stream
+     */
+    void synchronizeOnCoordinatesFromPpRanks();
 
 private:
     class Impl;