X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme_coordinate_receiver_gpu_impl.h;h=d268091771bab038053f1b3b4ad8031da141d880;hb=c088e63019ebc68760d35c3bc916015864aa8e89;hp=604079c0b0fbcead4bebf03ae2a99eea132c5962;hpb=f59f6ec6e04f4a3732876c2c76250b8444fdcb69;p=alexxy%2Fgromacs.git diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h index 604079c0b0..d268091771 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h @@ -52,25 +52,41 @@ class GpuEventSynchronizer; namespace gmx { -/*! \internal \brief Class with interfaces and data for CUDA version of PME coordinate receiving functionality */ +/*! \brief Object to manage communications with a specific PP rank */ +struct PpCommManager +{ + //! Details of PP rank that may be updated after repartitioning + const PpRanks& ppRank; + //! Stream used communication with for PP rank + std::unique_ptr stream; + //! Synchronization event to receive from PP rank + GpuEventSynchronizer* sync = nullptr; + //! Range of atoms corresponding to PP rank + std::tuple atomRange = { 0, 0 }; +}; + +/*! \internal \brief Class with interfaces and data for CUDA version of PME coordinate receiving functionality */ class PmeCoordinateReceiverGpu::Impl { public: /*! \brief Creates PME GPU coordinate receiver object - * \param[in] pmeStream CUDA stream used for PME computations * \param[in] comm Communicator used for simulation + * \param[in] deviceContext GPU context * \param[in] ppRanks List of PP ranks */ - Impl(const DeviceStream& pmeStream, MPI_Comm comm, gmx::ArrayRef ppRanks); + Impl(MPI_Comm comm, const DeviceContext& deviceContext, gmx::ArrayRef ppRanks); ~Impl(); /*! \brief - * send coordinates buffer address to PP rank + * Re-initialize: set atom ranges and, for thread-MPI case, + * send coordinates buffer address to PP rank. + * This is required after repartitioning since atom ranges and + * buffer allocations may have changed. * \param[in] d_x coordinates buffer in GPU memory */ - void sendCoordinateBufferAddressToPpRanks(DeviceBuffer d_x); + void reinitCoordinateReceiver(DeviceBuffer d_x); /*! \brief * Receive coordinate synchronizer pointer from the PP ranks. @@ -88,24 +104,47 @@ public: void launchReceiveCoordinatesFromPpCudaMpi(DeviceBuffer recvbuf, int numAtoms, int numBytes, int ppRank); /*! \brief - * For lib MPI, wait for coordinates from PP ranks - * For thread MPI, enqueue PP co-ordinate transfer event into PME stream + * For lib MPI, wait for coordinates from any PP rank + * For thread MPI, enqueue PP co-ordinate transfer event received from PP + * rank determined from pipeline stage into given stream + * \param[in] pipelineStage stage of pipeline corresponding to this transfer + * \param[in] deviceStream stream in which to enqueue the wait event. + * \returns rank of sending PP task + */ + int synchronizeOnCoordinatesFromPpRank(int pipelineStage, const DeviceStream& deviceStream); + + /*! \brief Perform above synchronizeOnCoordinatesFromPpRanks for all PP ranks, + * enqueueing all events to a single stream + * \param[in] deviceStream stream in which to enqueue the wait events. + */ + void synchronizeOnCoordinatesFromAllPpRanks(const DeviceStream& deviceStream); + + /*! \brief + * Return pointer to stream associated with specific PP rank sender index + * \param[in] senderIndex Index of sender PP rank. + */ + DeviceStream* ppCommStream(int senderIndex); + + /*! \brief + * Returns range of atoms involved in communication associated with specific PP rank sender + * index \param[in] senderIndex Index of sender PP rank. + */ + std::tuple ppCommAtomRange(int senderIndex); + + /*! \brief + * Return number of PP ranks involved in PME-PP communication */ - void synchronizeOnCoordinatesFromPpRanks(); + int ppCommNumSenderRanks(); private: - //! CUDA stream for PME operations - const DeviceStream& pmeStream_; //! communicator for simulation MPI_Comm comm_; - //! list of PP ranks - gmx::ArrayRef ppRanks_; - //! vector of MPI requests - std::vector request_; - //! vector of synchronization events to receive from PP tasks - std::vector ppSync_; - //! counter of messages to receive - int recvCount_ = 0; + //! MPI requests, one per PP rank + std::vector requests_; + //! GPU context handle (not used in CUDA) + const DeviceContext& deviceContext_; + //! Communication manager objects corresponding to multiple sending PP ranks + std::vector ppCommManagers_; }; } // namespace gmx