2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Tests for the halo exchange
38 * The test sets up the rank topology and performs a coordinate halo
39 * exchange (for both CPU and GPU codepaths) for several 1D and 2D
40 * pulse configirations. Each pulse involves a few non-contiguous
41 * indices. The sending rank, atom number and spatial 3D index are
42 * encoded in the x values, to allow correctness checking following
47 * \author Alan Gray <alang@nvidia.com>
48 * \ingroup module_domdec
55 #include <gtest/gtest.h>
57 #include "gromacs/domdec/atomdistribution.h"
58 #include "gromacs/domdec/domdec_internal.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
61 # include "gromacs/gpu_utils/device_stream.h"
62 # include "gromacs/gpu_utils/devicebuffer.h"
63 # include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
65 #include "gromacs/gpu_utils/hostallocator.h"
66 #include "gromacs/mdtypes/inputrec.h"
68 #include "testutils/mpitest.h"
69 #include "testutils/test_hardware_environment.h"
78 /*! \brief Get encoded numerical value for sending rank, atom number and spatial 3D index
80 * \param [in] sendRank MPI rank of sender
81 * \param [in] atomNumber Atom number
82 * \param [in] spatial3dIndex Spatial 3D Index
84 * \returns Encoded value
86 float encodedValue(const int sendRank, const int atomNumber, const int spatial3dIndex)
88 return sendRank * 1000 + atomNumber * 100 + spatial3dIndex;
91 /*! \brief Initialize halo array
93 * \param [in] x Atom coordinate data array
94 * \param [in] numHomeAtoms Number of home atoms
95 * \param [in] numAtomsTotal Total number of atoms, including halo
97 void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
100 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
102 for (int i = 0; i < numAtomsTotal; i++)
104 for (int j = 0; j < DIM; j++)
106 x[i][j] = i < numHomeAtoms ? encodedValue(rank, i, j) : -1;
111 /*! \brief Perform GPU halo exchange, including required setup and data transfers
113 * \param [in] dd Domain decomposition object
114 * \param [in] box Box matrix
115 * \param [in] h_x Atom coordinate data array on host
116 * \param [in] numAtomsTotal Total number of atoms, including halo
118 void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
120 #if (GMX_GPU_CUDA && GMX_THREAD_MPI)
121 // Set up GPU hardware environment and assign this MPI rank to a device
123 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
124 int numDevices = getTestHardwareEnvironment()->getTestDeviceList().size();
125 const auto& testDevice = getTestHardwareEnvironment()->getTestDeviceList()[rank % numDevices];
126 const auto& deviceContext = testDevice->deviceContext();
127 setActiveDevice(testDevice->deviceInfo());
128 DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
130 // Set up GPU buffer and copy input data from host
131 DeviceBuffer<RVec> d_x;
133 int d_x_size_alloc = -1;
134 reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
136 copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
138 GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
139 coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
141 // Perform GPU halo exchange
142 for (int d = 0; d < dd->ndim; d++)
144 for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
146 GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
147 deviceStream, pulse, nullptr);
148 gpuHaloExchange.reinitHalo(d_x, nullptr);
149 gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
153 GpuEventSynchronizer haloCompletedEvent;
154 haloCompletedEvent.markEvent(deviceStream);
155 haloCompletedEvent.waitForEvent();
157 // Copy results back to host
158 copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
160 freeDeviceBuffer(d_x);
162 GMX_UNUSED_VALUE(dd);
163 GMX_UNUSED_VALUE(box);
164 GMX_UNUSED_VALUE(h_x);
165 GMX_UNUSED_VALUE(numAtomsTotal);
169 /*! \brief Define 1D rank topology with 4 MPI tasks
171 * \param [in] dd Domain decomposition object
173 void define1dRankTopology(gmx_domdec_t* dd)
176 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
178 dd->neighbor[0][0] = (rank + 1) % 4;
179 dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
182 /*! \brief Define 2D rank topology with 4 MPI tasks
189 * \param [in] dd Domain decomposition object
191 void define2dRankTopology(gmx_domdec_t* dd)
195 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
200 dd->neighbor[0][0] = 1;
201 dd->neighbor[0][1] = 1;
202 dd->neighbor[1][0] = 2;
203 dd->neighbor[1][1] = 2;
206 dd->neighbor[0][0] = 0;
207 dd->neighbor[0][1] = 0;
208 dd->neighbor[1][0] = 3;
209 dd->neighbor[1][1] = 3;
212 dd->neighbor[0][0] = 3;
213 dd->neighbor[0][1] = 3;
214 dd->neighbor[1][0] = 0;
215 dd->neighbor[1][1] = 0;
218 dd->neighbor[0][0] = 2;
219 dd->neighbor[0][1] = 2;
220 dd->neighbor[1][0] = 1;
221 dd->neighbor[1][1] = 1;
226 /*! \brief Define a 1D halo with 1 pulses
228 * \param [in] dd Domain decomposition object
229 * \param [in] indvec Vector of index vectors
231 void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
235 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
237 std::vector<int> indexvec;
238 gmx_domdec_ind_t ind;
244 // Set up indices involved in halo
248 dd->comm->cd[dimIndex].receiveInPlace = true;
249 dd->dim[dimIndex] = 0;
250 dd->ci[dimIndex] = rank;
252 // First pulse involves (arbitrary) indices 1 and 3
253 indexvec.push_back(1);
254 indexvec.push_back(3);
256 ind.index = indexvec;
257 ind.nsend[nzone + 1] = 2;
258 ind.nrecv[nzone + 1] = 2;
259 indvec->push_back(ind);
261 dd->comm->cd[dimIndex].ind = *indvec;
264 /*! \brief Define a 1D halo with 2 pulses
266 * \param [in] dd Domain decomposition object
267 * \param [in] indvec Vector of index vectors
269 void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
273 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
275 std::vector<int> indexvec;
276 gmx_domdec_ind_t ind;
282 // Set up indices involved in halo
286 dd->comm->cd[dimIndex].receiveInPlace = true;
287 dd->dim[dimIndex] = 0;
288 dd->ci[dimIndex] = rank;
290 // First pulse involves (arbitrary) indices 1 and 3
291 indexvec.push_back(1);
292 indexvec.push_back(3);
294 ind.index = indexvec;
295 ind.nsend[nzone + 1] = 2;
296 ind.nrecv[nzone + 1] = 2;
297 indvec->push_back(ind);
299 // Add another pulse with (arbitrary) indices 4,5,7
302 indexvec.push_back(4);
303 indexvec.push_back(5);
304 indexvec.push_back(7);
306 ind.index = indexvec;
307 ind.nsend[nzone + 1] = 3;
308 ind.nrecv[nzone + 1] = 3;
309 indvec->push_back(ind);
311 dd->comm->cd[dimIndex].ind = *indvec;
314 /*! \brief Define a 2D halo with 1 pulse in each dimension
316 * \param [in] dd Domain decomposition object
317 * \param [in] indvec Vector of index vectors
319 void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
323 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
325 std::vector<int> indexvec;
326 gmx_domdec_ind_t ind;
330 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
333 // Set up indices involved in halo
337 dd->comm->cd[dimIndex].receiveInPlace = true;
338 dd->dim[dimIndex] = 0;
339 dd->ci[dimIndex] = rank;
341 // Single pulse involving (arbitrary) indices 1 and 3
342 indexvec.push_back(1);
343 indexvec.push_back(3);
345 ind.index = indexvec;
346 ind.nsend[nzone + 1] = 2;
347 ind.nrecv[nzone + 1] = 2;
348 indvec->push_back(ind);
350 dd->comm->cd[dimIndex].ind = *indvec;
356 /*! \brief Define a 2D halo with 2 pulses in the first dimension
358 * \param [in] dd Domain decomposition object
359 * \param [in] indvec Vector of index vectors
361 void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
365 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
367 std::vector<int> indexvec;
368 gmx_domdec_ind_t ind;
372 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
375 // Set up indices involved in halo
379 dd->comm->cd[dimIndex].receiveInPlace = true;
380 dd->dim[dimIndex] = 0;
381 dd->ci[dimIndex] = rank;
383 // First pulse involves (arbitrary) indices 1 and 3
384 indexvec.push_back(1);
385 indexvec.push_back(3);
387 ind.index = indexvec;
388 ind.nsend[nzone + 1] = 2;
389 ind.nrecv[nzone + 1] = 2;
390 indvec->push_back(ind);
392 if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
396 indexvec.push_back(4);
397 indexvec.push_back(5);
398 indexvec.push_back(7);
400 ind.index = indexvec;
401 ind.nsend[nzone + 1] = 3;
402 ind.nrecv[nzone + 1] = 3;
403 indvec->push_back(ind);
406 dd->comm->cd[dimIndex].ind = *indvec;
412 /*! \brief Check results for above-defined 1D halo with 1 pulse
414 * \param [in] x Atom coordinate data array
415 * \param [in] dd Domain decomposition object
416 * \param [in] numHomeAtoms Number of home atoms
418 void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
420 // Check results are expected from values encoded in x data
421 for (int j = 0; j < DIM; j++)
423 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
424 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
425 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
429 /*! \brief Check results for above-defined 1D halo with 2 pulses
431 * \param [in] x Atom coordinate data array
432 * \param [in] dd Domain decomposition object
433 * \param [in] numHomeAtoms Number of home atoms
435 void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
437 // Check results are expected from values encoded in x data
438 for (int j = 0; j < DIM; j++)
440 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
441 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
442 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
443 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
444 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
445 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
446 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
450 /*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
452 * \param [in] x Atom coordinate data array
453 * \param [in] dd Domain decomposition object
454 * \param [in] numHomeAtoms Number of home atoms
456 void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
458 // Check results are expected from values encoded in x data
459 for (int j = 0; j < DIM; j++)
461 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
462 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
463 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
464 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
465 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
466 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
470 /*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
472 * \param [in] x Atom coordinate data array
473 * \param [in] dd Domain decomposition object
474 * \param [in] numHomeAtoms Number of home atoms
476 void checkResults2dHaloWith2PulsesInDim1(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
478 // Check results are expected from values encoded in x data
479 for (int j = 0; j < DIM; j++)
481 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
482 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
483 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
484 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
485 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
486 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
487 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
488 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
489 EXPECT_EQ(x[numHomeAtoms + 5][j], encodedValue(dd->neighbor[1][0], 1, j));
490 EXPECT_EQ(x[numHomeAtoms + 6][j], encodedValue(dd->neighbor[1][0], 3, j));
494 TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
499 const int numHomeAtoms = 10;
500 const int numHaloAtoms = 2;
501 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
502 HostVector<RVec> h_x;
503 changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
504 h_x.resize(numAtomsTotal);
506 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
511 dd.mpi_comm_all = MPI_COMM_WORLD;
512 gmx_domdec_comm_t comm;
514 dd.unitCellInfo.haveScrewPBC = false;
516 DDAtomRanges atomRanges;
517 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
518 dd.comm->atomRanges = atomRanges;
520 define1dRankTopology(&dd);
522 std::vector<gmx_domdec_ind_t> indvec;
523 define1dHaloWith1Pulse(&dd, &indvec);
525 // Perform halo exchange
526 matrix box = { { 0., 0., 0. } };
527 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
530 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
532 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
534 // Re-initialize input
535 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
537 // Perform GPU halo exchange
538 gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
541 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
545 TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
550 const int numHomeAtoms = 10;
551 const int numHaloAtoms = 5;
552 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
553 HostVector<RVec> h_x;
554 changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
555 h_x.resize(numAtomsTotal);
557 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
562 dd.mpi_comm_all = MPI_COMM_WORLD;
563 gmx_domdec_comm_t comm;
565 dd.unitCellInfo.haveScrewPBC = false;
567 DDAtomRanges atomRanges;
568 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
569 dd.comm->atomRanges = atomRanges;
571 define1dRankTopology(&dd);
573 std::vector<gmx_domdec_ind_t> indvec;
574 define1dHaloWith2Pulses(&dd, &indvec);
576 // Perform halo exchange
577 matrix box = { { 0., 0., 0. } };
578 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
581 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
583 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
585 // Re-initialize input
586 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
588 // Perform GPU halo exchange
589 gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
592 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
597 TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
602 const int numHomeAtoms = 10;
603 const int numHaloAtoms = 4;
604 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
605 HostVector<RVec> h_x;
606 changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
607 h_x.resize(numAtomsTotal);
609 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
614 dd.mpi_comm_all = MPI_COMM_WORLD;
615 gmx_domdec_comm_t comm;
617 dd.unitCellInfo.haveScrewPBC = false;
619 DDAtomRanges atomRanges;
620 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
621 dd.comm->atomRanges = atomRanges;
623 define2dRankTopology(&dd);
625 std::vector<gmx_domdec_ind_t> indvec;
626 define2dHaloWith1PulseInEachDim(&dd, &indvec);
628 // Perform halo exchange
629 matrix box = { { 0., 0., 0. } };
630 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
633 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
635 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
637 // Re-initialize input
638 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
640 // Perform GPU halo exchange
641 gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
644 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
648 TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
653 const int numHomeAtoms = 10;
654 const int numHaloAtoms = 7;
655 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
656 HostVector<RVec> h_x;
657 changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
658 h_x.resize(numAtomsTotal);
660 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
665 dd.mpi_comm_all = MPI_COMM_WORLD;
666 gmx_domdec_comm_t comm;
668 dd.unitCellInfo.haveScrewPBC = false;
670 DDAtomRanges atomRanges;
671 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
672 dd.comm->atomRanges = atomRanges;
674 define2dRankTopology(&dd);
676 std::vector<gmx_domdec_ind_t> indvec;
677 define2dHaloWith2PulsesInDim1(&dd, &indvec);
679 // Perform halo exchange
680 matrix box = { { 0., 0., 0. } };
681 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
684 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
686 #if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
687 // Re-initialize input
688 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
690 // Perform GPU halo exchange
691 gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
694 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);