2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Tests for the halo exchange
38 * The test sets up the rank topology and performs a coordinate halo
39 * exchange (for both CPU and GPU codepaths) for several 1D and 2D
40 * pulse configirations. Each pulse involves a few non-contiguous
41 * indices. The sending rank, atom number and spatial 3D index are
42 * encoded in the x values, to allow correctness checking following
47 * \author Alan Gray <alang@nvidia.com>
48 * \ingroup module_domdec
57 #include <gtest/gtest.h>
59 #include "gromacs/domdec/atomdistribution.h"
60 #include "gromacs/domdec/domdec_internal.h"
61 #include "gromacs/domdec/gpuhaloexchange.h"
63 # include "gromacs/gpu_utils/device_stream.h"
64 # include "gromacs/gpu_utils/devicebuffer.h"
65 # include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
67 #include "gromacs/gpu_utils/hostallocator.h"
68 #include "gromacs/mdtypes/inputrec.h"
70 #include "testutils/mpitest.h"
71 #include "testutils/test_hardware_environment.h"
80 /*! \brief Get encoded numerical value for sending rank, atom number and spatial 3D index
82 * \param [in] sendRank MPI rank of sender
83 * \param [in] atomNumber Atom number
84 * \param [in] spatial3dIndex Spatial 3D Index
86 * \returns Encoded value
88 float encodedValue(const int sendRank, const int atomNumber, const int spatial3dIndex)
90 return sendRank * 1000 + atomNumber * 100 + spatial3dIndex;
93 /*! \brief Initialize halo array
95 * \param [in] x Atom coordinate data array
96 * \param [in] numHomeAtoms Number of home atoms
97 * \param [in] numAtomsTotal Total number of atoms, including halo
99 void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
102 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
104 for (int i = 0; i < numAtomsTotal; i++)
106 for (int j = 0; j < DIM; j++)
108 x[i][j] = i < numHomeAtoms ? encodedValue(rank, i, j) : -1;
113 /*! \brief Perform GPU halo exchange, including required setup and data transfers
115 * \param [in] dd Domain decomposition object
116 * \param [in] box Box matrix
117 * \param [in] h_x Atom coordinate data array on host
118 * \param [in] numAtomsTotal Total number of atoms, including halo
120 void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTotal)
122 #if (GMX_GPU_CUDA && GMX_THREAD_MPI)
123 // pin memory if possible
124 changePinningPolicy(h_x, PinningPolicy::PinnedIfSupported);
125 // Set up GPU hardware environment and assign this MPI rank to a device
127 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
128 int numDevices = getTestHardwareEnvironment()->getTestDeviceList().size();
129 const auto& testDevice = getTestHardwareEnvironment()->getTestDeviceList()[rank % numDevices];
130 const auto& deviceContext = testDevice->deviceContext();
131 setActiveDevice(testDevice->deviceInfo());
132 DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
134 // Set up GPU buffer and copy input data from host
135 DeviceBuffer<RVec> d_x;
137 int d_x_size_alloc = -1;
138 reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
140 copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
142 GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
143 coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
145 std::vector<std::unique_ptr<gmx::GpuHaloExchange>> gpuHaloExchange[DIM];
147 // Create halo exchange objects
148 for (int d = 0; d < dd->ndim; d++)
150 for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
152 gpuHaloExchange[d].push_back(std::make_unique<GpuHaloExchange>(
153 dd, d, MPI_COMM_WORLD, deviceContext, deviceStream, deviceStream, pulse, nullptr));
157 // Perform GPU halo exchange
158 for (int d = 0; d < dd->ndim; d++)
160 for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
162 gpuHaloExchange[d][pulse]->reinitHalo(d_x, nullptr);
163 gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
167 GpuEventSynchronizer haloCompletedEvent;
168 haloCompletedEvent.markEvent(deviceStream);
169 haloCompletedEvent.waitForEvent();
171 // Copy results back to host
172 copyFromDeviceBuffer(h_x->data(), &d_x, 0, numAtomsTotal, deviceStream,
173 GpuApiCallBehavior::Sync, nullptr);
175 freeDeviceBuffer(d_x);
177 GMX_UNUSED_VALUE(dd);
178 GMX_UNUSED_VALUE(box);
179 GMX_UNUSED_VALUE(h_x);
180 GMX_UNUSED_VALUE(numAtomsTotal);
184 /*! \brief Define 1D rank topology with 4 MPI tasks
186 * \param [in] dd Domain decomposition object
188 void define1dRankTopology(gmx_domdec_t* dd)
191 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
193 dd->neighbor[0][0] = (rank + 1) % 4;
194 dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
197 /*! \brief Define 2D rank topology with 4 MPI tasks
204 * \param [in] dd Domain decomposition object
206 void define2dRankTopology(gmx_domdec_t* dd)
210 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
215 dd->neighbor[0][0] = 1;
216 dd->neighbor[0][1] = 1;
217 dd->neighbor[1][0] = 2;
218 dd->neighbor[1][1] = 2;
221 dd->neighbor[0][0] = 0;
222 dd->neighbor[0][1] = 0;
223 dd->neighbor[1][0] = 3;
224 dd->neighbor[1][1] = 3;
227 dd->neighbor[0][0] = 3;
228 dd->neighbor[0][1] = 3;
229 dd->neighbor[1][0] = 0;
230 dd->neighbor[1][1] = 0;
233 dd->neighbor[0][0] = 2;
234 dd->neighbor[0][1] = 2;
235 dd->neighbor[1][0] = 1;
236 dd->neighbor[1][1] = 1;
241 /*! \brief Define a 1D halo with 1 pulses
243 * \param [in] dd Domain decomposition object
244 * \param [in] indvec Vector of index vectors
246 void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
250 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
252 std::vector<int> indexvec;
253 gmx_domdec_ind_t ind;
259 // Set up indices involved in halo
263 dd->comm->cd[dimIndex].receiveInPlace = true;
264 dd->dim[dimIndex] = 0;
265 dd->ci[dimIndex] = rank;
267 // First pulse involves (arbitrary) indices 1 and 3
268 indexvec.push_back(1);
269 indexvec.push_back(3);
271 ind.index = indexvec;
272 ind.nsend[nzone + 1] = 2;
273 ind.nrecv[nzone + 1] = 2;
274 indvec->push_back(ind);
276 dd->comm->cd[dimIndex].ind = *indvec;
279 /*! \brief Define a 1D halo with 2 pulses
281 * \param [in] dd Domain decomposition object
282 * \param [in] indvec Vector of index vectors
284 void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
288 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
290 std::vector<int> indexvec;
291 gmx_domdec_ind_t ind;
297 // Set up indices involved in halo
301 dd->comm->cd[dimIndex].receiveInPlace = true;
302 dd->dim[dimIndex] = 0;
303 dd->ci[dimIndex] = rank;
305 // First pulse involves (arbitrary) indices 1 and 3
306 indexvec.push_back(1);
307 indexvec.push_back(3);
309 ind.index = indexvec;
310 ind.nsend[nzone + 1] = 2;
311 ind.nrecv[nzone + 1] = 2;
312 indvec->push_back(ind);
314 // Add another pulse with (arbitrary) indices 4,5,7
317 indexvec.push_back(4);
318 indexvec.push_back(5);
319 indexvec.push_back(7);
321 ind.index = indexvec;
322 ind.nsend[nzone + 1] = 3;
323 ind.nrecv[nzone + 1] = 3;
324 indvec->push_back(ind);
326 dd->comm->cd[dimIndex].ind = *indvec;
329 /*! \brief Define a 2D halo with 1 pulse in each dimension
331 * \param [in] dd Domain decomposition object
332 * \param [in] indvec Vector of index vectors
334 void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
338 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
340 std::vector<int> indexvec;
341 gmx_domdec_ind_t ind;
345 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
348 // Set up indices involved in halo
352 dd->comm->cd[dimIndex].receiveInPlace = true;
353 dd->dim[dimIndex] = 0;
354 dd->ci[dimIndex] = rank;
356 // Single pulse involving (arbitrary) indices 1 and 3
357 indexvec.push_back(1);
358 indexvec.push_back(3);
360 ind.index = indexvec;
361 ind.nsend[nzone + 1] = 2;
362 ind.nrecv[nzone + 1] = 2;
363 indvec->push_back(ind);
365 dd->comm->cd[dimIndex].ind = *indvec;
371 /*! \brief Define a 2D halo with 2 pulses in the first dimension
373 * \param [in] dd Domain decomposition object
374 * \param [in] indvec Vector of index vectors
376 void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
380 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
382 std::vector<int> indexvec;
383 gmx_domdec_ind_t ind;
387 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
390 // Set up indices involved in halo
394 dd->comm->cd[dimIndex].receiveInPlace = true;
395 dd->dim[dimIndex] = 0;
396 dd->ci[dimIndex] = rank;
398 // First pulse involves (arbitrary) indices 1 and 3
399 indexvec.push_back(1);
400 indexvec.push_back(3);
402 ind.index = indexvec;
403 ind.nsend[nzone + 1] = 2;
404 ind.nrecv[nzone + 1] = 2;
405 indvec->push_back(ind);
407 if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
411 indexvec.push_back(4);
412 indexvec.push_back(5);
413 indexvec.push_back(7);
415 ind.index = indexvec;
416 ind.nsend[nzone + 1] = 3;
417 ind.nrecv[nzone + 1] = 3;
418 indvec->push_back(ind);
421 dd->comm->cd[dimIndex].ind = *indvec;
427 /*! \brief Check results for above-defined 1D halo with 1 pulse
429 * \param [in] x Atom coordinate data array
430 * \param [in] dd Domain decomposition object
431 * \param [in] numHomeAtoms Number of home atoms
433 void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
435 // Check results are expected from values encoded in x data
436 for (int j = 0; j < DIM; j++)
438 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
439 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
440 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
444 /*! \brief Check results for above-defined 1D halo with 2 pulses
446 * \param [in] x Atom coordinate data array
447 * \param [in] dd Domain decomposition object
448 * \param [in] numHomeAtoms Number of home atoms
450 void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
452 // Check results are expected from values encoded in x data
453 for (int j = 0; j < DIM; j++)
455 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
456 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
457 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
458 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
459 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
460 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
461 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
465 /*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
467 * \param [in] x Atom coordinate data array
468 * \param [in] dd Domain decomposition object
469 * \param [in] numHomeAtoms Number of home atoms
471 void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
473 // Check results are expected from values encoded in x data
474 for (int j = 0; j < DIM; j++)
476 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
477 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
478 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
479 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
480 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
481 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
485 /*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
487 * \param [in] x Atom coordinate data array
488 * \param [in] dd Domain decomposition object
489 * \param [in] numHomeAtoms Number of home atoms
491 void checkResults2dHaloWith2PulsesInDim1(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
493 // Check results are expected from values encoded in x data
494 for (int j = 0; j < DIM; j++)
496 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
497 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
498 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
499 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
500 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
501 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
502 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
503 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
504 EXPECT_EQ(x[numHomeAtoms + 5][j], encodedValue(dd->neighbor[1][0], 1, j));
505 EXPECT_EQ(x[numHomeAtoms + 6][j], encodedValue(dd->neighbor[1][0], 3, j));
509 TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
514 const int numHomeAtoms = 10;
515 const int numHaloAtoms = 2;
516 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
517 HostVector<RVec> h_x;
518 h_x.resize(numAtomsTotal);
520 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
525 dd.mpi_comm_all = MPI_COMM_WORLD;
526 gmx_domdec_comm_t comm;
528 dd.unitCellInfo.haveScrewPBC = false;
530 DDAtomRanges atomRanges;
531 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
532 dd.comm->atomRanges = atomRanges;
534 define1dRankTopology(&dd);
536 std::vector<gmx_domdec_ind_t> indvec;
537 define1dHaloWith1Pulse(&dd, &indvec);
539 // Perform halo exchange
540 matrix box = { { 0., 0., 0. } };
541 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
544 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
546 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
548 // early return if no devices are available.
549 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
554 // Re-initialize input
555 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
557 // Perform GPU halo exchange
558 gpuHalo(&dd, box, &h_x, numAtomsTotal);
561 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
565 TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
570 const int numHomeAtoms = 10;
571 const int numHaloAtoms = 5;
572 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
573 HostVector<RVec> h_x;
574 h_x.resize(numAtomsTotal);
576 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
581 dd.mpi_comm_all = MPI_COMM_WORLD;
582 gmx_domdec_comm_t comm;
584 dd.unitCellInfo.haveScrewPBC = false;
586 DDAtomRanges atomRanges;
587 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
588 dd.comm->atomRanges = atomRanges;
590 define1dRankTopology(&dd);
592 std::vector<gmx_domdec_ind_t> indvec;
593 define1dHaloWith2Pulses(&dd, &indvec);
595 // Perform halo exchange
596 matrix box = { { 0., 0., 0. } };
597 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
600 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
602 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
604 // early return if no devices are available.
605 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
610 // Re-initialize input
611 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
613 // Perform GPU halo exchange
614 gpuHalo(&dd, box, &h_x, numAtomsTotal);
617 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
622 TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
627 const int numHomeAtoms = 10;
628 const int numHaloAtoms = 4;
629 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
630 HostVector<RVec> h_x;
631 h_x.resize(numAtomsTotal);
633 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
638 dd.mpi_comm_all = MPI_COMM_WORLD;
639 gmx_domdec_comm_t comm;
641 dd.unitCellInfo.haveScrewPBC = false;
643 DDAtomRanges atomRanges;
644 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
645 dd.comm->atomRanges = atomRanges;
647 define2dRankTopology(&dd);
649 std::vector<gmx_domdec_ind_t> indvec;
650 define2dHaloWith1PulseInEachDim(&dd, &indvec);
652 // Perform halo exchange
653 matrix box = { { 0., 0., 0. } };
654 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
657 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
659 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
661 // early return if no devices are available.
662 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
667 // Re-initialize input
668 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
670 // Perform GPU halo exchange
671 gpuHalo(&dd, box, &h_x, numAtomsTotal);
674 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
678 TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
683 const int numHomeAtoms = 10;
684 const int numHaloAtoms = 7;
685 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
686 HostVector<RVec> h_x;
687 h_x.resize(numAtomsTotal);
689 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
694 dd.mpi_comm_all = MPI_COMM_WORLD;
695 gmx_domdec_comm_t comm;
697 dd.unitCellInfo.haveScrewPBC = false;
699 DDAtomRanges atomRanges;
700 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
701 dd.comm->atomRanges = atomRanges;
703 define2dRankTopology(&dd);
705 std::vector<gmx_domdec_ind_t> indvec;
706 define2dHaloWith2PulsesInDim1(&dd, &indvec);
708 // Perform halo exchange
709 matrix box = { { 0., 0., 0. } };
710 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
713 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
715 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
717 // early return if no devices are available.
718 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
723 // Re-initialize input
724 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
726 // Perform GPU halo exchange
727 gpuHalo(&dd, box, &h_x, numAtomsTotal);
730 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);