+private:
+ /*! \brief Pull force buffer directly from GPU memory on PME
+ * rank to either GPU or CPU memory on PP task using CUDA
+ * Memory copy. This method is used with Thread-MPI.
+ * \param[out] recvPtr CPU buffer to receive PME force data
+ * \param[in] recvSize Number of elements to receive
+ * \param[in] receivePmeForceToGpu Whether receive is to GPU, otherwise CPU
+ */
+ void receiveForceFromPmeCudaDirect(float3* recvPtr, int recvSize, bool receivePmeForceToGpu);
+
+ /*! \brief Pull force buffer directly from GPU memory on PME
+ * rank to either GPU or CPU memory on PP task using CUDA-aware
+ * MPI. This method is used with process-MPI.
+ * \param[out] recvPtr CPU buffer to receive PME force data
+ * \param[in] recvSize Number of elements to receive
+ */
+ void receiveForceFromPmeCudaMpi(float3* recvPtr, int recvSize);
+
+ /*! \brief Push coordinates buffer directly to GPU memory on PME
+ * task, from either GPU or CPU memory on PP task using CUDA Memory copy.
+ * This method is used with Thread-MPI.
+ * \param[in] sendPtr Buffer with coordinate data
+ * \param[in] sendSize Number of elements to send
+ * \param[in] coordinatesReadyOnDeviceEvent Event recorded when coordinates are available on device
+ */
+ void sendCoordinatesToPmeCudaDirect(float3* sendPtr,
+ int sendSize,
+ GpuEventSynchronizer* coordinatesReadyOnDeviceEvent);
+
+ /*! \brief Push coordinates buffer directly to GPU memory on PME
+ * task, from either GPU or CPU memory on PP task using CUDA-aware MPI.
+ * This method is used with process-MPI.
+ * \param[in] sendPtr Buffer with coordinate data
+ * \param[in] sendSize Number of elements to send
+ * \param[in] coordinatesReadyOnDeviceEvent Event recorded when coordinates are available on device
+ */
+ void sendCoordinatesToPmeCudaMpi(float3* sendPtr,
+ int sendSize,
+ GpuEventSynchronizer* coordinatesReadyOnDeviceEvent);
+