Insert barrier after each GPU halo test

[alexxy/gromacs.git] / src / gromacs / domdec / tests / haloexchange_mpi.cpp
diff --git a/src/gromacs/domdec/tests/haloexchange_mpi.cpp b/src/gromacs/domdec/tests/haloexchange_mpi.cpp

index be86dbe01f396d64451c7314287dac4e9ac3355d..45b8364f8087a5a81373f9748ceb95d53baa446e 100644 (file)
--- a/src/gromacs/domdec/tests/haloexchange_mpi.cpp
+++ b/src/gromacs/domdec/tests/haloexchange_mpi.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -35,14 +35,14 @@
  /*! \internal \file
   * \brief Tests for the halo exchange
   *
- *  The test sets up a 2D rank topology and performs a coordinate halo
- *  exchange (for both CPU and GPU codepaths), with 2 pulses in
- *  the first dimension and 1 pulse in the second. Each pulse involves
- *  a few non-contiguous indices. The sending rank, atom number and
- *  spatial 3D index are encoded in the x values, to allow correctness
- *  checking following the halo exchange.
+ *  The test sets up the rank topology and performs a coordinate halo
+ *  exchange (for both CPU and GPU codepaths) for several 1D and 2D
+ *  pulse configirations. Each pulse involves a few non-contiguous
+ *  indices. The sending rank, atom number and spatial 3D index are
+ *  encoded in the x values, to allow correctness checking following
+ *  the halo exchange.
   *
- * \todo Add more test variations
+ * \todo Add 3D case
   *
   * \author Alan Gray <alang@nvidia.com>
   * \ingroup module_domdec
@@ -50,7 +50,10 @@
  
  #include "gmxpre.h"
  
+#include "config.h"
+
  #include <array>
+#include <vector>
  
  #include <gtest/gtest.h>
  
@@ -60,8 +63,8 @@
  #if GMX_GPU_CUDA
  #    include "gromacs/gpu_utils/device_stream.h"
  #    include "gromacs/gpu_utils/devicebuffer.h"
-#    include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #endif
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
  #include "gromacs/gpu_utils/hostallocator.h"
  #include "gromacs/mdtypes/inputrec.h"
  
@@ -108,7 +111,6 @@ void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
      }
  }
  
-#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
  /*! \brief Perform GPU halo exchange, including required setup and data transfers
   *
   * \param [in] dd             Domain decomposition object
@@ -116,8 +118,11 @@ void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
   * \param [in] h_x            Atom coordinate data array on host
   * \param [in] numAtomsTotal  Total number of atoms, including halo
   */
-void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
+void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTotal)
  {
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
+    // pin memory if possible
+    changePinningPolicy(h_x, PinningPolicy::PinnedIfSupported);
      // Set up GPU hardware environment and assign this MPI rank to a device
      int rank;
      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -133,33 +138,63 @@ void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
      int                d_x_size_alloc = -1;
      reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
  
-    copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
  
      GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
      coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
  
+    std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
+
+    // Create halo exchange objects
+    for (int d = 0; d < dd->ndim; d++)
+    {
+        for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
+        {
+            gpuHaloExchange[d].push_back(
+                    GpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, pulse, nullptr));
+        }
+    }
+
      // Perform GPU halo exchange
      for (int d = 0; d < dd->ndim; d++)
      {
          for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
          {
-            GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
-                                            deviceStream, pulse, nullptr);
-            gpuHaloExchange.reinitHalo(d_x, nullptr);
-            gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+            gpuHaloExchange[d][pulse].reinitHalo(d_x, nullptr);
+            gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
          }
      }
+    MPI_Barrier(MPI_COMM_WORLD);
  
      GpuEventSynchronizer haloCompletedEvent;
      haloCompletedEvent.markEvent(deviceStream);
      haloCompletedEvent.waitForEvent();
  
      // Copy results back to host
-    copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyFromDeviceBuffer(
+            h_x->data(), &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
  
      freeDeviceBuffer(d_x);
-}
+#else
+    GMX_UNUSED_VALUE(dd);
+    GMX_UNUSED_VALUE(box);
+    GMX_UNUSED_VALUE(h_x);
+    GMX_UNUSED_VALUE(numAtomsTotal);
  #endif
+}
+
+/*! \brief Define 1D rank topology with 4 MPI tasks
+ *
+ * \param [in] dd  Domain decomposition object
+ */
+void define1dRankTopology(gmx_domdec_t* dd)
+{
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    dd->neighbor[0][0] = (rank + 1) % 4;
+    dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
+}
  
  /*! \brief Define 2D rank topology with 4 MPI tasks
   *
@@ -205,12 +240,142 @@ void define2dRankTopology(gmx_domdec_t* dd)
      }
  }
  
+/*! \brief Define a 1D halo with 1 pulses
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim     = 1;
+    int nzone    = 1;
+    int dimIndex = 0;
+
+    // Set up indices involved in halo
+    indexvec.clear();
+    indvec->clear();
+
+    dd->comm->cd[dimIndex].receiveInPlace = true;
+    dd->dim[dimIndex]                     = 0;
+    dd->ci[dimIndex]                      = rank;
+
+    // First pulse involves (arbitrary) indices 1 and 3
+    indexvec.push_back(1);
+    indexvec.push_back(3);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 2;
+    ind.nrecv[nzone + 1] = 2;
+    indvec->push_back(ind);
+
+    dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 1D halo with 2 pulses
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim     = 1;
+    int nzone    = 1;
+    int dimIndex = 0;
+
+    // Set up indices involved in halo
+    indexvec.clear();
+    indvec->clear();
+
+    dd->comm->cd[dimIndex].receiveInPlace = true;
+    dd->dim[dimIndex]                     = 0;
+    dd->ci[dimIndex]                      = rank;
+
+    // First pulse involves (arbitrary) indices 1 and 3
+    indexvec.push_back(1);
+    indexvec.push_back(3);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 2;
+    ind.nrecv[nzone + 1] = 2;
+    indvec->push_back(ind);
+
+    // Add another pulse with (arbitrary) indices 4,5,7
+    indexvec.clear();
+
+    indexvec.push_back(4);
+    indexvec.push_back(5);
+    indexvec.push_back(7);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 3;
+    ind.nrecv[nzone + 1] = 3;
+    indvec->push_back(ind);
+
+    dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim  = 2;
+    int nzone = 1;
+    for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
+    {
+
+        // Set up indices involved in halo
+        indexvec.clear();
+        indvec->clear();
+
+        dd->comm->cd[dimIndex].receiveInPlace = true;
+        dd->dim[dimIndex]                     = 0;
+        dd->ci[dimIndex]                      = rank;
+
+        // Single pulse involving (arbitrary) indices 1 and 3
+        indexvec.push_back(1);
+        indexvec.push_back(3);
+
+        ind.index            = indexvec;
+        ind.nsend[nzone + 1] = 2;
+        ind.nrecv[nzone + 1] = 2;
+        indvec->push_back(ind);
+
+        dd->comm->cd[dimIndex].ind = *indvec;
+
+        nzone += nzone;
+    }
+}
+
  /*! \brief Define a 2D halo with 2 pulses in the first dimension
   *
   * \param [in] dd      Domain decomposition object
   * \param [in] indvec  Vector of index vectors
   */
-void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t> indvec)
+void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
  {
  
      int rank;
@@ -226,7 +391,7 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
  
          // Set up indices involved in halo
          indexvec.clear();
-        indvec.clear();
+        indvec->clear();
  
          dd->comm->cd[dimIndex].receiveInPlace = true;
          dd->dim[dimIndex]                     = 0;
@@ -239,14 +404,12 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
          ind.index            = indexvec;
          ind.nsend[nzone + 1] = 2;
          ind.nrecv[nzone + 1] = 2;
-        indvec.push_back(ind);
+        indvec->push_back(ind);
  
          if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
          {
              indexvec.clear();
  
-            dd->comm->cd[dimIndex].ind = indvec;
-
              indexvec.push_back(4);
              indexvec.push_back(5);
              indexvec.push_back(7);
@@ -254,15 +417,72 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
              ind.index            = indexvec;
              ind.nsend[nzone + 1] = 3;
              ind.nrecv[nzone + 1] = 3;
-            indvec.push_back(ind);
+            indvec->push_back(ind);
          }
  
-        dd->comm->cd[dimIndex].ind = indvec;
+        dd->comm->cd[dimIndex].ind = *indvec;
  
          nzone += nzone;
      }
  }
  
+/*! \brief Check results for above-defined 1D halo with 1 pulse
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+    }
+}
+
+/*! \brief Check results for above-defined 1D halo with 2 pulses
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+        // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
+        EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
+        EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
+    }
+}
+
+/*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+        // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
+        EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
+    }
+}
  
  /*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
   *
@@ -288,16 +508,128 @@ void checkResults2dHaloWith2PulsesInDim1(const RVec* x, const gmx_domdec_t* dd,
      }
  }
  
-TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
+TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
  {
      GMX_MPI_TEST(4);
  
      // Set up atom data
      const int        numHomeAtoms  = 10;
-    const int        numHaloAtoms  = 7;
+    const int        numHaloAtoms  = 2;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define1dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define1dHaloWith1Pulse(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // early return if no devices are available.
+        if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+        {
+            return;
+        }
+
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+        // Check results
+        checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+    }
+}
+
+TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 5;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define1dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define1dHaloWith2Pulses(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // early return if no devices are available.
+        if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+        {
+            return;
+        }
+
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+        // Check results
+        checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+    }
+}
+
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 4;
      const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
      HostVector<RVec> h_x;
-    changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
      h_x.resize(numAtomsTotal);
  
      initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
@@ -317,25 +649,88 @@ TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
      define2dRankTopology(&dd);
  
      std::vector<gmx_domdec_ind_t> indvec;
-    define2dHaloWith2PulsesInDim1(&dd, indvec);
+    define2dHaloWith1PulseInEachDim(&dd, &indvec);
  
      // Perform halo exchange
      matrix box = { { 0., 0., 0. } };
      dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
  
      // Check results
-    checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+    checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // early return if no devices are available.
+        if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+        {
+            return;
+        }
+
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+        // Check results
+        checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+    }
+}
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 7;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    h_x.resize(numAtomsTotal);
  
-#if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
-    // Re-initialize input
      initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
  
-    // Perform GPU halo exchange
-    gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define2dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define2dHaloWith2PulsesInDim1(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
  
      // Check results
      checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
-#endif
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // early return if no devices are available.
+        if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+        {
+            return;
+        }
+
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+        // Check results
+        checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+    }
  }
  
  } // namespace