Insert barrier after each GPU halo test
[alexxy/gromacs.git] / src / gromacs / domdec / tests / haloexchange_mpi.cpp
index 440af109f914603b7d32a333b5d4957bf2dc19bf..45b8364f8087a5a81373f9748ceb95d53baa446e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -53,6 +53,7 @@
 #include "config.h"
 
 #include <array>
+#include <vector>
 
 #include <gtest/gtest.h>
 
@@ -62,8 +63,8 @@
 #if GMX_GPU_CUDA
 #    include "gromacs/gpu_utils/device_stream.h"
 #    include "gromacs/gpu_utils/devicebuffer.h"
-#    include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #endif
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
 #include "gromacs/gpu_utils/hostallocator.h"
 #include "gromacs/mdtypes/inputrec.h"
 
@@ -142,15 +143,15 @@ void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTo
     GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
     coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
 
-    std::vector<std::unique_ptr<gmx::GpuHaloExchange>> gpuHaloExchange[DIM];
+    std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
 
     // Create halo exchange objects
     for (int d = 0; d < dd->ndim; d++)
     {
         for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
         {
-            gpuHaloExchange[d].push_back(std::make_unique<GpuHaloExchange>(
-                    dd, d, MPI_COMM_WORLD, deviceContext, deviceStream, deviceStream, pulse, nullptr));
+            gpuHaloExchange[d].push_back(
+                    GpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, pulse, nullptr));
         }
     }
 
@@ -159,18 +160,19 @@ void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTo
     {
         for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
         {
-            gpuHaloExchange[d][pulse]->reinitHalo(d_x, nullptr);
-            gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+            gpuHaloExchange[d][pulse].reinitHalo(d_x, nullptr);
+            gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
         }
     }
+    MPI_Barrier(MPI_COMM_WORLD);
 
     GpuEventSynchronizer haloCompletedEvent;
     haloCompletedEvent.markEvent(deviceStream);
     haloCompletedEvent.waitForEvent();
 
     // Copy results back to host
-    copyFromDeviceBuffer(h_x->data(), &d_x, 0, numAtomsTotal, deviceStream,
-                         GpuApiCallBehavior::Sync, nullptr);
+    copyFromDeviceBuffer(
+            h_x->data(), &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
 
     freeDeviceBuffer(d_x);
 #else