/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/*! \internal \file
* \brief Tests for the halo exchange
*
- * The test sets up a 2D rank topology and performs a coordinate halo
- * exchange (for both CPU and GPU codepaths), with 2 pulses in
- * the first dimension and 1 pulse in the second. Each pulse involves
- * a few non-contiguous indices. The sending rank, atom number and
- * spatial 3D index are encoded in the x values, to allow correctness
- * checking following the halo exchange.
+ * The test sets up the rank topology and performs a coordinate halo
+ * exchange (for both CPU and GPU codepaths) for several 1D and 2D
+ * pulse configirations. Each pulse involves a few non-contiguous
+ * indices. The sending rank, atom number and spatial 3D index are
+ * encoded in the x values, to allow correctness checking following
+ * the halo exchange.
*
- * \todo Add more test variations
+ * \todo Add 3D case
*
* \author Alan Gray <alang@nvidia.com>
* \ingroup module_domdec
#include "gmxpre.h"
+#include "config.h"
+
#include <array>
+#include <vector>
#include <gtest/gtest.h>
#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/device_stream.h"
# include "gromacs/gpu_utils/devicebuffer.h"
-# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#endif
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/mdtypes/inputrec.h"
}
}
-#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
/*! \brief Perform GPU halo exchange, including required setup and data transfers
*
* \param [in] dd Domain decomposition object
* \param [in] h_x Atom coordinate data array on host
* \param [in] numAtomsTotal Total number of atoms, including halo
*/
-void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
+void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTotal)
{
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
+ // pin memory if possible
+ changePinningPolicy(h_x, PinningPolicy::PinnedIfSupported);
// Set up GPU hardware environment and assign this MPI rank to a device
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int d_x_size_alloc = -1;
reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
- copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
+ std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
+
+ // Create halo exchange objects
+ for (int d = 0; d < dd->ndim; d++)
+ {
+ for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
+ {
+ gpuHaloExchange[d].push_back(
+ GpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, pulse, nullptr));
+ }
+ }
+
// Perform GPU halo exchange
for (int d = 0; d < dd->ndim; d++)
{
for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
{
- GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
- deviceStream, pulse, nullptr);
- gpuHaloExchange.reinitHalo(d_x, nullptr);
- gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+ gpuHaloExchange[d][pulse].reinitHalo(d_x, nullptr);
+ gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
}
}
+ MPI_Barrier(MPI_COMM_WORLD);
GpuEventSynchronizer haloCompletedEvent;
haloCompletedEvent.markEvent(deviceStream);
haloCompletedEvent.waitForEvent();
// Copy results back to host
- copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(
+ h_x->data(), &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
freeDeviceBuffer(d_x);
-}
+#else
+ GMX_UNUSED_VALUE(dd);
+ GMX_UNUSED_VALUE(box);
+ GMX_UNUSED_VALUE(h_x);
+ GMX_UNUSED_VALUE(numAtomsTotal);
#endif
+}
+
+/*! \brief Define 1D rank topology with 4 MPI tasks
+ *
+ * \param [in] dd Domain decomposition object
+ */
+void define1dRankTopology(gmx_domdec_t* dd)
+{
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ dd->neighbor[0][0] = (rank + 1) % 4;
+ dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
+}
/*! \brief Define 2D rank topology with 4 MPI tasks
*
}
}
+/*! \brief Define a 1D halo with 1 pulses
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 1;
+ int nzone = 1;
+ int dimIndex = 0;
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // First pulse involves (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 1D halo with 2 pulses
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 1;
+ int nzone = 1;
+ int dimIndex = 0;
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // First pulse involves (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ // Add another pulse with (arbitrary) indices 4,5,7
+ indexvec.clear();
+
+ indexvec.push_back(4);
+ indexvec.push_back(5);
+ indexvec.push_back(7);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 3;
+ ind.nrecv[nzone + 1] = 3;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 2;
+ int nzone = 1;
+ for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
+ {
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // Single pulse involving (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+
+ nzone += nzone;
+ }
+}
+
/*! \brief Define a 2D halo with 2 pulses in the first dimension
*
* \param [in] dd Domain decomposition object
* \param [in] indvec Vector of index vectors
*/
-void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t> indvec)
+void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
{
int rank;
// Set up indices involved in halo
indexvec.clear();
- indvec.clear();
+ indvec->clear();
dd->comm->cd[dimIndex].receiveInPlace = true;
dd->dim[dimIndex] = 0;
ind.index = indexvec;
ind.nsend[nzone + 1] = 2;
ind.nrecv[nzone + 1] = 2;
- indvec.push_back(ind);
+ indvec->push_back(ind);
if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
{
indexvec.clear();
- dd->comm->cd[dimIndex].ind = indvec;
-
indexvec.push_back(4);
indexvec.push_back(5);
indexvec.push_back(7);
ind.index = indexvec;
ind.nsend[nzone + 1] = 3;
ind.nrecv[nzone + 1] = 3;
- indvec.push_back(ind);
+ indvec->push_back(ind);
}
- dd->comm->cd[dimIndex].ind = indvec;
+ dd->comm->cd[dimIndex].ind = *indvec;
nzone += nzone;
}
}
+/*! \brief Check results for above-defined 1D halo with 1 pulse
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ }
+}
+
+/*! \brief Check results for above-defined 1D halo with 2 pulses
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
+ EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
+ EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
+ }
+}
+
+/*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
+ EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
+ }
+}
/*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
*
}
}
-TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
+TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
{
GMX_MPI_TEST(4);
// Set up atom data
const int numHomeAtoms = 10;
- const int numHaloAtoms = 7;
+ const int numHaloAtoms = 2;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define1dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define1dHaloWith1Pulse(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+ // Check results
+ checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+ }
+}
+
+TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 5;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define1dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define1dHaloWith2Pulses(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+ // Check results
+ checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+ }
+}
+
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 4;
const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
HostVector<RVec> h_x;
- changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
h_x.resize(numAtomsTotal);
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
define2dRankTopology(&dd);
std::vector<gmx_domdec_ind_t> indvec;
- define2dHaloWith2PulsesInDim1(&dd, indvec);
+ define2dHaloWith1PulseInEachDim(&dd, &indvec);
// Perform halo exchange
matrix box = { { 0., 0., 0. } };
dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
// Check results
- checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+ checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+ // Check results
+ checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+ }
+}
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 7;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ h_x.resize(numAtomsTotal);
-#if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
- // Re-initialize input
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
- // Perform GPU halo exchange
- gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define2dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define2dHaloWith2PulsesInDim1(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
// Check results
checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
-#endif
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+ // Check results
+ checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+ }
}
} // namespace