#include "gromacs/math/vectypes.h"
#include "gromacs/utility/gmxmpi.h"
-class DeviceStream;
+class GpuEventSynchronizer;
/*! \libinternal
* \brief Contains information about the PP ranks that partner this PME rank. */
public:
/*! \brief Creates PME GPU Force sender object
- * \param[in] pmeStream CUDA stream used for PME computations
+ * \param[in] pmeForcesReady Event synchronizer marked when PME forces are ready on the GPU
* \param[in] comm Communicator used for simulation
* \param[in] ppRanks List of PP ranks
*/
- PmeForceSenderGpu(const DeviceStream& pmeStream, MPI_Comm comm, gmx::ArrayRef<PpRanks> ppRanks);
+ PmeForceSenderGpu(GpuEventSynchronizer* pmeForcesReady, MPI_Comm comm, gmx::ArrayRef<PpRanks> ppRanks);
~PmeForceSenderGpu();
/*! \brief
void sendForceBufferAddressToPpRanks(rvec* d_f);
/*! \brief
- * Send PP data to PP rank
+ * Send force synchronizer to PP rank
* \param[in] ppRank PP rank to receive data
*/
void sendFToPpCudaDirect(int ppRank);
};
/*!\brief Constructor stub. */
-PmeForceSenderGpu::PmeForceSenderGpu(const DeviceStream& /*pmeStream */,
+PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer* /*pmeForcesReady */,
MPI_Comm /* comm */,
gmx::ArrayRef<PpRanks> /* ppRanks */) :
impl_(nullptr)
{
/*! \brief Create PME-PP GPU communication object */
-PmeForceSenderGpu::Impl::Impl(const DeviceStream& pmeStream, MPI_Comm comm, gmx::ArrayRef<PpRanks> ppRanks) :
- pmeStream_(pmeStream),
+PmeForceSenderGpu::Impl::Impl(GpuEventSynchronizer* pmeForcesReady,
+ MPI_Comm comm,
+ gmx::ArrayRef<PpRanks> ppRanks) :
+ pmeForcesReady_(pmeForcesReady),
comm_(comm),
ppRanks_(ppRanks)
{
}
}
-/*! \brief Send PME data directly using CUDA memory copy */
+/*! \brief Send PME synchronizer directly using CUDA memory copy */
void PmeForceSenderGpu::Impl::sendFToPpCudaDirect(int ppRank)
{
// Data will be pulled directly from PP task
-
- // Record and send event to ensure PME force calcs are completed before PP task pulls data
- pmeSync_.markEvent(pmeStream_);
- GpuEventSynchronizer* pmeSyncPtr = &pmeSync_;
#if GMX_MPI
// TODO Using MPI_Isend would be more efficient, particularly when
// sending to multiple PP ranks
- MPI_Send(&pmeSyncPtr, sizeof(GpuEventSynchronizer*), MPI_BYTE, ppRank, 0, comm_);
+ MPI_Send(&pmeForcesReady_, sizeof(GpuEventSynchronizer*), MPI_BYTE, ppRank, 0, comm_);
#else
GMX_UNUSED_VALUE(pmeSyncPtr);
GMX_UNUSED_VALUE(ppRank);
#endif
}
-PmeForceSenderGpu::PmeForceSenderGpu(const DeviceStream& pmeStream,
+PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer* pmeForcesReady,
MPI_Comm comm,
gmx::ArrayRef<PpRanks> ppRanks) :
- impl_(new Impl(pmeStream, comm, ppRanks))
+ impl_(new Impl(pmeForcesReady, comm, ppRanks))
{
}
public:
/*! \brief Creates PME GPU Force sender object
- * \param[in] pmeStream CUDA stream used for PME computations
+ * \param[in] pmeForcesReady Event synchronizer marked when PME forces are ready on the GPU
* \param[in] comm Communicator used for simulation
* \param[in] ppRanks List of PP ranks
*/
- Impl(const DeviceStream& pmeStream, MPI_Comm comm, gmx::ArrayRef<PpRanks> ppRanks);
+ Impl(GpuEventSynchronizer* pmeForcesReady, MPI_Comm comm, gmx::ArrayRef<PpRanks> ppRanks);
~Impl();
/*! \brief
void sendForceBufferAddressToPpRanks(rvec* d_f);
/*! \brief
- * Send PP data to PP rank
+ * Send force synchronizer to PP rank
* \param[in] ppRank PP rank to receive data
*/
void sendFToPpCudaDirect(int ppRank);
private:
- //! CUDA stream for PME operations
- const DeviceStream& pmeStream_;
- //! Event triggered when to allow remote PP stream to syn with pme stream
- GpuEventSynchronizer pmeSync_;
+ //! Event indicating when PME forces are ready on the GPU in order for PP stream to sync with the PME stream
+ GpuEventSynchronizer* pmeForcesReady_;
//! communicator for simulation
MPI_Comm comm_;
//! list of PP ranks
pme_pp->mpi_comm_mysim,
pme_pp->ppRanks);
pme_pp->pmeForceSenderGpu = std::make_unique<gmx::PmeForceSenderGpu>(
- deviceStreamManager->stream(gmx::DeviceStreamType::Pme),
- pme_pp->mpi_comm_mysim,
- pme_pp->ppRanks);
+ pme_gpu_get_f_ready_synchronizer(pme), pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
}
// TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here.
// This should be made safer.