2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008-2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines functions for (mostly) the domdec module
39 * to use MPI functionality
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_domdec
47 #include "domdec_network.h"
53 #include "gromacs/domdec/domdec_struct.h"
54 #include "gromacs/utility/gmxmpi.h"
56 #include "domdec_internal.h"
59 /*! \brief Returns the MPI rank of the domain decomposition master rank */
60 #define DDMASTERRANK(dd) ((dd)->masterrank)
63 /*! \brief Move data of type \p T in the communication region one cell along
64 * the domain decomposition
66 * Moves in the dimension indexed by ddDimensionIndex, either forward
67 * (direction=dddirFoward) or backward (direction=dddirBackward).
70 void ddSendrecv(const struct gmx_domdec_t* dd,
74 int numElementsToSend,
76 int numElementsToReceive)
79 int sendRank = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 0 : 1];
80 int receiveRank = dd->neighbor[ddDimensionIndex][direction == dddirForward ? 1 : 0];
82 constexpr int mpiTag = 0;
84 if (numElementsToSend > 0 && numElementsToReceive > 0)
86 MPI_Sendrecv(sendBuffer,
87 numElementsToSend * sizeof(T),
92 numElementsToReceive * sizeof(T),
99 else if (numElementsToSend > 0)
101 MPI_Send(sendBuffer, numElementsToSend * sizeof(T), MPI_BYTE, sendRank, mpiTag, dd->mpi_comm_all);
103 else if (numElementsToReceive > 0)
105 MPI_Recv(receiveBuffer, numElementsToReceive * sizeof(T), MPI_BYTE, receiveRank, mpiTag, dd->mpi_comm_all, &mpiStatus);
108 GMX_UNUSED_VALUE(dd);
109 GMX_UNUSED_VALUE(ddDimensionIndex);
110 GMX_UNUSED_VALUE(direction);
111 GMX_UNUSED_VALUE(sendBuffer);
112 GMX_UNUSED_VALUE(numElementsToSend);
113 GMX_UNUSED_VALUE(receiveBuffer);
114 GMX_UNUSED_VALUE(numElementsToReceive);
118 //! Specialization of extern template for int
119 template void ddSendrecv(const gmx_domdec_t*, int, int, int*, int, int*, int);
120 //! Specialization of extern template for real
121 template void ddSendrecv(const gmx_domdec_t*, int, int, real*, int, real*, int);
122 //! Specialization of extern template for gmx::RVec
123 template void ddSendrecv(const gmx_domdec_t*, int, int, rvec*, int, rvec*, int);
126 void ddSendrecv(const gmx_domdec_t* dd,
127 int ddDimensionIndex,
129 gmx::ArrayRef<T> sendBuffer,
130 gmx::ArrayRef<T> receiveBuffer)
137 receiveBuffer.data(),
138 receiveBuffer.size());
141 //! Specialization of extern template for int
142 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<int>, gmx::ArrayRef<int>);
143 //! Specialization of extern template for real
144 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<real>, gmx::ArrayRef<real>);
145 //! Specialization of extern template for gmx::RVec
146 template void ddSendrecv(const gmx_domdec_t*, int, int, gmx::ArrayRef<gmx::RVec>, gmx::ArrayRef<gmx::RVec>);
148 void dd_sendrecv2_rvec(const struct gmx_domdec_t gmx_unused* dd,
149 int gmx_unused ddimind,
150 rvec gmx_unused* buf_s_fw,
151 int gmx_unused n_s_fw,
152 rvec gmx_unused* buf_r_fw,
153 int gmx_unused n_r_fw,
154 rvec gmx_unused* buf_s_bw,
155 int gmx_unused n_s_bw,
156 rvec gmx_unused* buf_r_bw,
157 int gmx_unused n_r_bw)
163 int rank_fw = dd->neighbor[ddimind][0];
164 int rank_bw = dd->neighbor[ddimind][1];
166 if (!dd->comm->ddSettings.useSendRecv2)
168 /* Try to send and receive in two directions simultaneously.
169 * Should be faster, especially on machines
170 * with full 3D communication networks.
171 * However, it could be that communication libraries are
172 * optimized for MPI_Sendrecv and non-blocking MPI calls
174 * SendRecv2 can be turned on with the env.var. GMX_DD_SENDRECV2
179 MPI_Irecv(buf_r_fw[0], n_r_fw * sizeof(rvec), MPI_BYTE, rank_bw, 0, dd->mpi_comm_all, &req[nreq++]);
183 MPI_Irecv(buf_r_bw[0], n_r_bw * sizeof(rvec), MPI_BYTE, rank_fw, 1, dd->mpi_comm_all, &req[nreq++]);
187 MPI_Isend(buf_s_fw[0], n_s_fw * sizeof(rvec), MPI_BYTE, rank_fw, 0, dd->mpi_comm_all, &req[nreq++]);
191 MPI_Isend(buf_s_bw[0], n_s_bw * sizeof(rvec), MPI_BYTE, rank_bw, 1, dd->mpi_comm_all, &req[nreq++]);
195 MPI_Waitall(nreq, req, stat); //NOLINT(clang-analyzer-optin.mpi.MPI-Checker)
200 /* Communicate in two ordered phases.
201 * This is slower, even on a dual-core Opteron cluster
202 * with a single full-duplex network connection per machine.
205 MPI_Sendrecv(buf_s_fw[0],
206 n_s_fw * sizeof(rvec),
211 n_r_fw * sizeof(rvec),
218 MPI_Sendrecv(buf_s_bw[0],
219 n_s_bw * sizeof(rvec),
224 n_r_bw * sizeof(rvec),
234 void dd_bcast(const gmx_domdec_t gmx_unused* dd, int gmx_unused nbytes, void gmx_unused* data)
239 MPI_Bcast(data, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
244 void dd_bcastc(const gmx_domdec_t* dd, int nbytes, void* src, void* dest)
246 if (DDMASTER(dd) || dd->nnodes == 1)
248 memcpy(dest, src, nbytes);
253 MPI_Bcast(dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
258 void dd_scatter(const gmx_domdec_t gmx_unused* dd, int gmx_unused nbytes, const void gmx_unused* src, void* dest)
263 /* Some MPI implementions don't specify const */
264 MPI_Scatter(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
269 /* 1 rank, either we copy everything, or dest=src: nothing to do */
272 memcpy(dest, src, nbytes);
277 void dd_gather(const gmx_domdec_t gmx_unused* dd,
278 int gmx_unused nbytes,
279 const void gmx_unused* src,
280 void gmx_unused* dest)
283 /* Some MPI implementions don't specify const */
284 MPI_Gather(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
288 void dd_scatterv(const gmx_domdec_t gmx_unused* dd,
289 int gmx_unused* scounts,
290 int gmx_unused* disps,
302 /* MPI does not allow NULL pointers */
305 /* Some MPI implementions don't specify const */
307 const_cast<void*>(sbuf), scounts, disps, MPI_BYTE, rbuf, rcount, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
312 /* 1 rank, either we copy everything, or rbuf=sbuf: nothing to do */
315 memcpy(rbuf, sbuf, rcount);
320 void dd_gatherv(const gmx_domdec_t gmx_unused* dd,
321 int gmx_unused scount,
322 const void gmx_unused* sbuf,
323 int gmx_unused* rcounts,
324 int gmx_unused* disps,
325 void gmx_unused* rbuf)
332 /* MPI does not allow NULL pointers */
335 /* Some MPI implementions don't specify const */
337 const_cast<void*>(sbuf), scount, MPI_BYTE, rbuf, rcounts, disps, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);