Set up event consumption counter in DomDecMpiTests
[alexxy/gromacs.git] / src / gromacs / domdec / tests / haloexchange_mpi.cpp
index fac6cc09abc64ab40da09c11e394436caf7ac87f..93aad8c288d62356af6124ba40995e6075bc5f67 100644 (file)
@@ -53,6 +53,7 @@
 #include "config.h"
 
 #include <array>
+#include <numeric>
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -140,7 +141,14 @@ void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTo
 
     copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
 
-    GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
+    const int numPulses = std::accumulate(
+            dd->comm->cd.begin(), dd->comm->cd.end(), 0, [](const int a, const auto& b) {
+                return a + b.numPulses();
+            });
+    const int numExtraConsumptions = GMX_THREAD_MPI ? 1 : 0;
+    // Will be consumed once for each pulse, and, with tMPI, once more for dim=0,pulse=0 case
+    GpuEventSynchronizer coordinatesReadyOnDeviceEvent(numPulses + numExtraConsumptions,
+                                                       numPulses + numExtraConsumptions);
     coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
 
     std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
@@ -164,11 +172,10 @@ void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTo
             gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
         }
     }
+    // Barrier is needed to avoid other threads using events after its owner has exited and destroyed the context.
     MPI_Barrier(MPI_COMM_WORLD);
 
-    GpuEventSynchronizer haloCompletedEvent;
-    haloCompletedEvent.markEvent(deviceStream);
-    haloCompletedEvent.waitForEvent();
+    deviceStream.synchronize();
 
     // Copy results back to host
     copyFromDeviceBuffer(