#include "config.h"
#include <array>
+#include <numeric>
#include <vector>
#include <gtest/gtest.h>
copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
- GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
+ const int numPulses = std::accumulate(
+ dd->comm->cd.begin(), dd->comm->cd.end(), 0, [](const int a, const auto& b) {
+ return a + b.numPulses();
+ });
+ const int numExtraConsumptions = GMX_THREAD_MPI ? 1 : 0;
+ // Will be consumed once for each pulse, and, with tMPI, once more for dim=0,pulse=0 case
+ GpuEventSynchronizer coordinatesReadyOnDeviceEvent(numPulses + numExtraConsumptions,
+ numPulses + numExtraConsumptions);
coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
}
}
+ // Barrier is needed to avoid other threads using events after its owner has exited and destroyed the context.
MPI_Barrier(MPI_COMM_WORLD);
- GpuEventSynchronizer haloCompletedEvent;
- haloCompletedEvent.markEvent(deviceStream);
- haloCompletedEvent.waitForEvent();
+ deviceStream.synchronize();
// Copy results back to host
copyFromDeviceBuffer(