- // Wait for signal from receiving task that it is ready, and similarly send signal to task that will push data to this task
- char thisTaskIsReady, remoteTaskIsReady;
- MPI_Sendrecv(&thisTaskIsReady, sizeof(char), MPI_BYTE, recvRank, 0, &remoteTaskIsReady,
- sizeof(char), MPI_BYTE, sendRank, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
+ // Wait for event from receiving task that remote coordinates are ready, and enqueue that event to stream used
+ // for subsequent data push. This avoids a race condition with the remote data being written in the previous timestep.
+ // Similarly send event to task that will push data to this task.
+ GpuEventSynchronizer* remoteCoordinatesReadyOnDeviceEvent;
+ MPI_Sendrecv(&coordinatesReadyOnDeviceEvent, sizeof(GpuEventSynchronizer*), MPI_BYTE,
+ recvRank, 0, &remoteCoordinatesReadyOnDeviceEvent, sizeof(GpuEventSynchronizer*),
+ MPI_BYTE, sendRank, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
+ remoteCoordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);