2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Tests for the halo exchange
38 * The test sets up the rank topology and performs a coordinate halo
39 * exchange (for both CPU and GPU codepaths) for several 1D and 2D
40 * pulse configirations. Each pulse involves a few non-contiguous
41 * indices. The sending rank, atom number and spatial 3D index are
42 * encoded in the x values, to allow correctness checking following
47 * \author Alan Gray <alang@nvidia.com>
48 * \ingroup module_domdec
58 #include <gtest/gtest.h>
60 #include "gromacs/domdec/atomdistribution.h"
61 #include "gromacs/domdec/domdec_internal.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
64 # include "gromacs/gpu_utils/device_stream.h"
65 # include "gromacs/gpu_utils/devicebuffer.h"
67 #include "gromacs/gpu_utils/gpueventsynchronizer.h"
68 #include "gromacs/gpu_utils/hostallocator.h"
69 #include "gromacs/mdtypes/inputrec.h"
71 #include "testutils/mpitest.h"
72 #include "testutils/test_hardware_environment.h"
81 /*! \brief Get encoded numerical value for sending rank, atom number and spatial 3D index
83 * \param [in] sendRank MPI rank of sender
84 * \param [in] atomNumber Atom number
85 * \param [in] spatial3dIndex Spatial 3D Index
87 * \returns Encoded value
89 float encodedValue(const int sendRank, const int atomNumber, const int spatial3dIndex)
91 return sendRank * 1000 + atomNumber * 100 + spatial3dIndex;
94 /*! \brief Initialize halo array
96 * \param [in] x Atom coordinate data array
97 * \param [in] numHomeAtoms Number of home atoms
98 * \param [in] numAtomsTotal Total number of atoms, including halo
100 void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
103 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
105 for (int i = 0; i < numAtomsTotal; i++)
107 for (int j = 0; j < DIM; j++)
109 x[i][j] = i < numHomeAtoms ? encodedValue(rank, i, j) : -1;
114 /*! \brief Perform GPU halo exchange, including required setup and data transfers
116 * \param [in] dd Domain decomposition object
117 * \param [in] box Box matrix
118 * \param [in] h_x Atom coordinate data array on host
119 * \param [in] numAtomsTotal Total number of atoms, including halo
121 void gpuHalo(gmx_domdec_t* dd, matrix box, HostVector<RVec>* h_x, int numAtomsTotal)
123 #if (GMX_GPU_CUDA && GMX_THREAD_MPI)
124 // pin memory if possible
125 changePinningPolicy(h_x, PinningPolicy::PinnedIfSupported);
126 // Set up GPU hardware environment and assign this MPI rank to a device
128 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
129 int numDevices = getTestHardwareEnvironment()->getTestDeviceList().size();
130 const auto& testDevice = getTestHardwareEnvironment()->getTestDeviceList()[rank % numDevices];
131 const auto& deviceContext = testDevice->deviceContext();
132 setActiveDevice(testDevice->deviceInfo());
133 DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
135 // Set up GPU buffer and copy input data from host
136 DeviceBuffer<RVec> d_x;
138 int d_x_size_alloc = -1;
139 reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
141 copyToDeviceBuffer(&d_x, h_x->data(), 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
143 GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
144 coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
146 std::array<std::vector<GpuHaloExchange>, DIM> gpuHaloExchange;
148 // Create halo exchange objects
149 for (int d = 0; d < dd->ndim; d++)
151 for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
153 gpuHaloExchange[d].push_back(
154 GpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, pulse, nullptr));
158 // Perform GPU halo exchange
159 for (int d = 0; d < dd->ndim; d++)
161 for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
163 gpuHaloExchange[d][pulse].reinitHalo(d_x, nullptr);
164 gpuHaloExchange[d][pulse].communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
168 GpuEventSynchronizer haloCompletedEvent;
169 haloCompletedEvent.markEvent(deviceStream);
170 haloCompletedEvent.waitForEvent();
172 // Copy results back to host
173 copyFromDeviceBuffer(
174 h_x->data(), &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
176 freeDeviceBuffer(d_x);
178 GMX_UNUSED_VALUE(dd);
179 GMX_UNUSED_VALUE(box);
180 GMX_UNUSED_VALUE(h_x);
181 GMX_UNUSED_VALUE(numAtomsTotal);
185 /*! \brief Define 1D rank topology with 4 MPI tasks
187 * \param [in] dd Domain decomposition object
189 void define1dRankTopology(gmx_domdec_t* dd)
192 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
194 dd->neighbor[0][0] = (rank + 1) % 4;
195 dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
198 /*! \brief Define 2D rank topology with 4 MPI tasks
205 * \param [in] dd Domain decomposition object
207 void define2dRankTopology(gmx_domdec_t* dd)
211 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
216 dd->neighbor[0][0] = 1;
217 dd->neighbor[0][1] = 1;
218 dd->neighbor[1][0] = 2;
219 dd->neighbor[1][1] = 2;
222 dd->neighbor[0][0] = 0;
223 dd->neighbor[0][1] = 0;
224 dd->neighbor[1][0] = 3;
225 dd->neighbor[1][1] = 3;
228 dd->neighbor[0][0] = 3;
229 dd->neighbor[0][1] = 3;
230 dd->neighbor[1][0] = 0;
231 dd->neighbor[1][1] = 0;
234 dd->neighbor[0][0] = 2;
235 dd->neighbor[0][1] = 2;
236 dd->neighbor[1][0] = 1;
237 dd->neighbor[1][1] = 1;
242 /*! \brief Define a 1D halo with 1 pulses
244 * \param [in] dd Domain decomposition object
245 * \param [in] indvec Vector of index vectors
247 void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
251 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
253 std::vector<int> indexvec;
254 gmx_domdec_ind_t ind;
260 // Set up indices involved in halo
264 dd->comm->cd[dimIndex].receiveInPlace = true;
265 dd->dim[dimIndex] = 0;
266 dd->ci[dimIndex] = rank;
268 // First pulse involves (arbitrary) indices 1 and 3
269 indexvec.push_back(1);
270 indexvec.push_back(3);
272 ind.index = indexvec;
273 ind.nsend[nzone + 1] = 2;
274 ind.nrecv[nzone + 1] = 2;
275 indvec->push_back(ind);
277 dd->comm->cd[dimIndex].ind = *indvec;
280 /*! \brief Define a 1D halo with 2 pulses
282 * \param [in] dd Domain decomposition object
283 * \param [in] indvec Vector of index vectors
285 void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
289 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
291 std::vector<int> indexvec;
292 gmx_domdec_ind_t ind;
298 // Set up indices involved in halo
302 dd->comm->cd[dimIndex].receiveInPlace = true;
303 dd->dim[dimIndex] = 0;
304 dd->ci[dimIndex] = rank;
306 // First pulse involves (arbitrary) indices 1 and 3
307 indexvec.push_back(1);
308 indexvec.push_back(3);
310 ind.index = indexvec;
311 ind.nsend[nzone + 1] = 2;
312 ind.nrecv[nzone + 1] = 2;
313 indvec->push_back(ind);
315 // Add another pulse with (arbitrary) indices 4,5,7
318 indexvec.push_back(4);
319 indexvec.push_back(5);
320 indexvec.push_back(7);
322 ind.index = indexvec;
323 ind.nsend[nzone + 1] = 3;
324 ind.nrecv[nzone + 1] = 3;
325 indvec->push_back(ind);
327 dd->comm->cd[dimIndex].ind = *indvec;
330 /*! \brief Define a 2D halo with 1 pulse in each dimension
332 * \param [in] dd Domain decomposition object
333 * \param [in] indvec Vector of index vectors
335 void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
339 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
341 std::vector<int> indexvec;
342 gmx_domdec_ind_t ind;
346 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
349 // Set up indices involved in halo
353 dd->comm->cd[dimIndex].receiveInPlace = true;
354 dd->dim[dimIndex] = 0;
355 dd->ci[dimIndex] = rank;
357 // Single pulse involving (arbitrary) indices 1 and 3
358 indexvec.push_back(1);
359 indexvec.push_back(3);
361 ind.index = indexvec;
362 ind.nsend[nzone + 1] = 2;
363 ind.nrecv[nzone + 1] = 2;
364 indvec->push_back(ind);
366 dd->comm->cd[dimIndex].ind = *indvec;
372 /*! \brief Define a 2D halo with 2 pulses in the first dimension
374 * \param [in] dd Domain decomposition object
375 * \param [in] indvec Vector of index vectors
377 void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
381 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
383 std::vector<int> indexvec;
384 gmx_domdec_ind_t ind;
388 for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
391 // Set up indices involved in halo
395 dd->comm->cd[dimIndex].receiveInPlace = true;
396 dd->dim[dimIndex] = 0;
397 dd->ci[dimIndex] = rank;
399 // First pulse involves (arbitrary) indices 1 and 3
400 indexvec.push_back(1);
401 indexvec.push_back(3);
403 ind.index = indexvec;
404 ind.nsend[nzone + 1] = 2;
405 ind.nrecv[nzone + 1] = 2;
406 indvec->push_back(ind);
408 if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
412 indexvec.push_back(4);
413 indexvec.push_back(5);
414 indexvec.push_back(7);
416 ind.index = indexvec;
417 ind.nsend[nzone + 1] = 3;
418 ind.nrecv[nzone + 1] = 3;
419 indvec->push_back(ind);
422 dd->comm->cd[dimIndex].ind = *indvec;
428 /*! \brief Check results for above-defined 1D halo with 1 pulse
430 * \param [in] x Atom coordinate data array
431 * \param [in] dd Domain decomposition object
432 * \param [in] numHomeAtoms Number of home atoms
434 void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
436 // Check results are expected from values encoded in x data
437 for (int j = 0; j < DIM; j++)
439 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
440 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
441 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
445 /*! \brief Check results for above-defined 1D halo with 2 pulses
447 * \param [in] x Atom coordinate data array
448 * \param [in] dd Domain decomposition object
449 * \param [in] numHomeAtoms Number of home atoms
451 void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
453 // Check results are expected from values encoded in x data
454 for (int j = 0; j < DIM; j++)
456 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
457 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
458 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
459 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
460 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
461 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
462 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
466 /*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
468 * \param [in] x Atom coordinate data array
469 * \param [in] dd Domain decomposition object
470 * \param [in] numHomeAtoms Number of home atoms
472 void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
474 // Check results are expected from values encoded in x data
475 for (int j = 0; j < DIM; j++)
477 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
478 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
479 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
480 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
481 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
482 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
486 /*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
488 * \param [in] x Atom coordinate data array
489 * \param [in] dd Domain decomposition object
490 * \param [in] numHomeAtoms Number of home atoms
492 void checkResults2dHaloWith2PulsesInDim1(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
494 // Check results are expected from values encoded in x data
495 for (int j = 0; j < DIM; j++)
497 // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
498 EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
499 EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
500 // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
501 EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
502 EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
503 EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
504 // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
505 EXPECT_EQ(x[numHomeAtoms + 5][j], encodedValue(dd->neighbor[1][0], 1, j));
506 EXPECT_EQ(x[numHomeAtoms + 6][j], encodedValue(dd->neighbor[1][0], 3, j));
510 TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
515 const int numHomeAtoms = 10;
516 const int numHaloAtoms = 2;
517 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
518 HostVector<RVec> h_x;
519 h_x.resize(numAtomsTotal);
521 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
526 dd.mpi_comm_all = MPI_COMM_WORLD;
527 gmx_domdec_comm_t comm;
529 dd.unitCellInfo.haveScrewPBC = false;
531 DDAtomRanges atomRanges;
532 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
533 dd.comm->atomRanges = atomRanges;
535 define1dRankTopology(&dd);
537 std::vector<gmx_domdec_ind_t> indvec;
538 define1dHaloWith1Pulse(&dd, &indvec);
540 // Perform halo exchange
541 matrix box = { { 0., 0., 0. } };
542 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
545 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
547 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
549 // early return if no devices are available.
550 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
555 // Re-initialize input
556 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
558 // Perform GPU halo exchange
559 gpuHalo(&dd, box, &h_x, numAtomsTotal);
562 checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
566 TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
571 const int numHomeAtoms = 10;
572 const int numHaloAtoms = 5;
573 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
574 HostVector<RVec> h_x;
575 h_x.resize(numAtomsTotal);
577 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
582 dd.mpi_comm_all = MPI_COMM_WORLD;
583 gmx_domdec_comm_t comm;
585 dd.unitCellInfo.haveScrewPBC = false;
587 DDAtomRanges atomRanges;
588 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
589 dd.comm->atomRanges = atomRanges;
591 define1dRankTopology(&dd);
593 std::vector<gmx_domdec_ind_t> indvec;
594 define1dHaloWith2Pulses(&dd, &indvec);
596 // Perform halo exchange
597 matrix box = { { 0., 0., 0. } };
598 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
601 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
603 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
605 // early return if no devices are available.
606 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
611 // Re-initialize input
612 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
614 // Perform GPU halo exchange
615 gpuHalo(&dd, box, &h_x, numAtomsTotal);
618 checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
623 TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
628 const int numHomeAtoms = 10;
629 const int numHaloAtoms = 4;
630 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
631 HostVector<RVec> h_x;
632 h_x.resize(numAtomsTotal);
634 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
639 dd.mpi_comm_all = MPI_COMM_WORLD;
640 gmx_domdec_comm_t comm;
642 dd.unitCellInfo.haveScrewPBC = false;
644 DDAtomRanges atomRanges;
645 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
646 dd.comm->atomRanges = atomRanges;
648 define2dRankTopology(&dd);
650 std::vector<gmx_domdec_ind_t> indvec;
651 define2dHaloWith1PulseInEachDim(&dd, &indvec);
653 // Perform halo exchange
654 matrix box = { { 0., 0., 0. } };
655 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
658 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
660 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
662 // early return if no devices are available.
663 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
668 // Re-initialize input
669 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
671 // Perform GPU halo exchange
672 gpuHalo(&dd, box, &h_x, numAtomsTotal);
675 checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
679 TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
684 const int numHomeAtoms = 10;
685 const int numHaloAtoms = 7;
686 const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
687 HostVector<RVec> h_x;
688 h_x.resize(numAtomsTotal);
690 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
695 dd.mpi_comm_all = MPI_COMM_WORLD;
696 gmx_domdec_comm_t comm;
698 dd.unitCellInfo.haveScrewPBC = false;
700 DDAtomRanges atomRanges;
701 atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
702 dd.comm->atomRanges = atomRanges;
704 define2dRankTopology(&dd);
706 std::vector<gmx_domdec_ind_t> indvec;
707 define2dHaloWith2PulsesInDim1(&dd, &indvec);
709 // Perform halo exchange
710 matrix box = { { 0., 0., 0. } };
711 dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
714 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
716 if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
718 // early return if no devices are available.
719 if (getTestHardwareEnvironment()->getTestDeviceList().empty())
724 // Re-initialize input
725 initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
727 // Perform GPU halo exchange
728 gpuHalo(&dd, box, &h_x, numAtomsTotal);
731 checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);