Fix race condition in GPU X halo exchange with GPU update
authorAlan Gray <alangraygerrit@gmail.com>
Sat, 2 Nov 2019 19:24:43 +0000 (12:24 -0700)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 25 Nov 2019 21:01:11 +0000 (22:01 +0100)
When the GPU Update featured is enabled on multi-GPU with the GPU halo
exchange feature, it was possible that the GPU receiving the halo data
would still be working on the previous timestep and writing to the
same coordinate buffer in the GPU update, causing a race
condition. This change replaces the pre-existing synchronization
mechanism of a MPI Send/Recv of a boolean flag with the same but
exchanging the events that are recorded when the coordinates are ready
on the device. The remote event is then enqueued to the stream that
the GPU uses for pushing the data, satisfying the dependency of the
push on the remote update being completed on the GPU.

Change-Id: Ie16a93dcc3064d73da1e560d82b4e76870d03e52

src/gromacs/domdec/gpuhaloexchange_impl.cu
src/gromacs/domdec/gpuhaloexchange_impl.cuh

index d917be3f3998e16c770f12b2d8faae85394bf8e7..09061493b38afd1d11ca26b5fc83c3da6b73bd39 100644 (file)
@@ -235,7 +235,7 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
         launchGpuKernel(kernelFn, config, nullptr, "Domdec GPU Apply X Halo Exchange", kernelArgs);
     }
 
-    communicateHaloData(d_x_, HaloQuantity::HaloCoordinates);
+    communicateHaloData(d_x_, HaloQuantity::HaloCoordinates, coordinatesReadyOnDeviceEvent);
 
     return;
 }
@@ -246,7 +246,7 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
 {
 
     // Communicate halo data (in non-local stream)
-    communicateHaloData(d_f_, HaloQuantity::HaloForces);
+    communicateHaloData(d_f_, HaloQuantity::HaloForces, nullptr);
 
     float3* d_f = d_f_;
 
@@ -292,7 +292,9 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
 }
 
 
-void GpuHaloExchange::Impl::communicateHaloData(float3* d_ptr, HaloQuantity haloQuantity)
+void GpuHaloExchange::Impl::communicateHaloData(float3*               d_ptr,
+                                                HaloQuantity          haloQuantity,
+                                                GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
 {
 
     void* sendPtr;
@@ -310,10 +312,14 @@ void GpuHaloExchange::Impl::communicateHaloData(float3* d_ptr, HaloQuantity halo
         recvRank  = recvRankX_;
 
 #if GMX_MPI
-        // Wait for signal from receiving task that it is ready, and similarly send signal to task that will push data to this task
-        char thisTaskIsReady, remoteTaskIsReady;
-        MPI_Sendrecv(&thisTaskIsReady, sizeof(char), MPI_BYTE, recvRank, 0, &remoteTaskIsReady,
-                     sizeof(char), MPI_BYTE, sendRank, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
+        // Wait for event from receiving task that remote coordinates are ready, and enqueue that event to stream used
+        // for subsequent data push. This avoids a race condition with the remote data being written in the previous timestep.
+        // Similarly send event to task that will push data to this task.
+        GpuEventSynchronizer* remoteCoordinatesReadyOnDeviceEvent;
+        MPI_Sendrecv(&coordinatesReadyOnDeviceEvent, sizeof(GpuEventSynchronizer*), MPI_BYTE,
+                     recvRank, 0, &remoteCoordinatesReadyOnDeviceEvent, sizeof(GpuEventSynchronizer*),
+                     MPI_BYTE, sendRank, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
+        remoteCoordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);
 #endif
     }
     else
index 626810b04ad4f6eabd8d4a7086436e85e651071a..9f4bf8cdd84744304479356da2d7fda46479ece0 100644 (file)
@@ -100,8 +100,11 @@ private:
     /*! \brief Data transfer wrapper for GPU halo exchange
      * \param [inout] d_ptr      pointer to coordinates or force buffer in GPU memory
      * \param [in] haloQuantity  switch on whether X or F halo exchange is being performed
+     * \param [in] coordinatesReadyOnDeviceEvent event recorded when coordinates have been copied to device
      */
-    void communicateHaloData(float3* d_ptr, HaloQuantity haloQuantity);
+    void communicateHaloData(float3*               d_ptr,
+                             HaloQuantity          haloQuantity,
+                             GpuEventSynchronizer* coordinatesReadyOnDeviceEvent);
 
     /*! \brief Data transfer for GPU halo exchange using CUDA memcopies
      * \param [inout] sendPtr    address to send data from