/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "config.h"
#include <array>
+#include <vector>
#include <gtest/gtest.h>
#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/device_stream.h"
# include "gromacs/gpu_utils/devicebuffer.h"
-# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#endif
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/mdtypes/inputrec.h"
* \param [in] h_x Atom coordinate data array on host
* \param [in] numAtomsTotal Total number of atoms, including halo
*/
-void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
+void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTotal)
{
#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
+ // pin memory if possible
+ changePinningPolicy(h_x, PinningPolicy::PinnedIfSupported);
// Set up GPU hardware environment and assign this MPI rank to a device
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int d_x_size_alloc = -1;
reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
- copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
+ std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
+
+ // Create halo exchange objects
+ for (int d = 0; d < dd->ndim; d++)
+ {
+ for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
+ {
+ gpuHaloExchange[d].push_back(
+ GpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, pulse, nullptr));
+ }
+ }
+
// Perform GPU halo exchange
for (int d = 0; d < dd->ndim; d++)
{
for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
{
- GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
- deviceStream, pulse, nullptr);
- gpuHaloExchange.reinitHalo(d_x, nullptr);
- gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+ gpuHaloExchange[d][pulse].reinitHalo(d_x, nullptr);
+ gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
}
}
+ MPI_Barrier(MPI_COMM_WORLD);
GpuEventSynchronizer haloCompletedEvent;
haloCompletedEvent.markEvent(deviceStream);
haloCompletedEvent.waitForEvent();
// Copy results back to host
- copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(
+ h_x->data(), &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
freeDeviceBuffer(d_x);
#else
const int numHaloAtoms = 2;
const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
HostVector<RVec> h_x;
- changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
h_x.resize(numAtomsTotal);
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
{
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
// Re-initialize input
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
// Perform GPU halo exchange
- gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
// Check results
checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
const int numHaloAtoms = 5;
const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
HostVector<RVec> h_x;
- changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
h_x.resize(numAtomsTotal);
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
{
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
// Re-initialize input
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
// Perform GPU halo exchange
- gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
// Check results
checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
const int numHaloAtoms = 4;
const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
HostVector<RVec> h_x;
- changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
h_x.resize(numAtomsTotal);
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
{
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
+
// Re-initialize input
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
// Perform GPU halo exchange
- gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
// Check results
checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
const int numHaloAtoms = 7;
const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
HostVector<RVec> h_x;
- changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
h_x.resize(numAtomsTotal);
initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
// Check results
checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
-#if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
- // Re-initialize input
- initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // early return if no devices are available.
+ if (getTestHardwareEnvironment()->getTestDeviceList().empty())
+ {
+ return;
+ }
- // Perform GPU halo exchange
- gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
- // Check results
- checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
-#endif
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, &h_x, numAtomsTotal);
+
+ // Check results
+ checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+ }
}
} // namespace