2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
41 * \brief This file contains function definitions for redistributing
42 * atoms over the PME domains
44 * \author Berk Hess <hess@kth.se>
45 * \ingroup module_ewald
50 #include "pme_redistribute.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdtypes/commrec.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/gmxmpi.h"
61 #include "gromacs/utility/smalloc.h"
63 #include "pme_internal.h"
65 //! Calculate the slab indices and store in \p atc, store counts in \p count
66 static void pme_calc_pidx(int start,
68 const matrix recipbox,
69 gmx::ArrayRef<const gmx::RVec> x,
77 real rxx, ryx, rzx, ryy, rzy;
80 /* Calculate PME task index (pidx) for each grid index.
81 * Here we always assign equally sized slabs to each node
82 * for load balancing reasons (the PME grid spacing is not used).
89 for (i = 0; i < nslab; i++)
96 rxx = recipbox[XX][XX];
97 ryx = recipbox[YY][XX];
98 rzx = recipbox[ZZ][XX];
99 /* Calculate the node index in x-dimension */
100 for (i = start; i < end; i++)
103 /* Fractional coordinates along box vectors */
104 s = nslab * (xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx);
105 si = static_cast<int>(s + 2 * nslab) % nslab;
112 ryy = recipbox[YY][YY];
113 rzy = recipbox[ZZ][YY];
114 /* Calculate the node index in y-dimension */
115 for (i = start; i < end; i++)
118 /* Fractional coordinates along box vectors */
119 s = nslab * (xptr[YY] * ryy + xptr[ZZ] * rzy);
120 si = static_cast<int>(s + 2 * nslab) % nslab;
127 //! Wrapper function for calculating slab indices, stored in \p atc
128 static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec> x, const matrix recipbox, PmeAtomComm* atc)
130 int nthread = atc->nthread;
132 #pragma omp parallel for num_threads(nthread) schedule(static)
133 for (int thread = 0; thread < nthread; thread++)
137 const int natoms = x.ssize();
138 pme_calc_pidx(natoms * thread / nthread,
139 natoms * (thread + 1) / nthread,
143 atc->count_thread[thread].data());
145 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
147 /* Non-parallel reduction, since nslab is small */
149 for (int thread = 1; thread < nthread; thread++)
151 for (int slab = 0; slab < atc->nslab; slab++)
153 atc->count_thread[0][slab] += atc->count_thread[thread][slab];
160 void SplineCoefficients::realloc(const int nalloc)
162 const int padding = 4;
164 bufferX_.resize(nalloc);
165 coefficients[XX] = bufferX_.data();
166 bufferY_.resize(nalloc);
167 coefficients[YY] = bufferY_.data();
168 /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
169 bufferZ_.resize(nalloc + 2 * padding);
170 coefficients[ZZ] = bufferZ_.data() + padding;
175 //! Reallocates all buffers in \p spline to fit atoms in \p atc
176 static void pme_realloc_splinedata(splinedata_t* spline, const PmeAtomComm* atc)
178 if (spline->nalloc >= atc->x.ssize() && spline->nalloc >= atc->numAtoms())
183 spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
184 spline->ind.resize(spline->nalloc);
185 /* Initialize the index to identity so it works without threads */
186 for (int i = 0; i < spline->nalloc; i++)
191 spline->theta.realloc(atc->pme_order * spline->nalloc);
192 spline->dtheta.realloc(atc->pme_order * spline->nalloc);
197 void PmeAtomComm::setNumAtoms(const int numAtoms)
199 numAtoms_ = numAtoms;
203 /* We have to avoid a NULL pointer for atc->x to avoid
204 * possible fatal errors in MPI routines.
206 xBuffer.resize(numAtoms_);
207 if (xBuffer.capacity() == 0)
212 coefficientBuffer.resize(numAtoms_);
213 if (coefficientBuffer.capacity() == 0)
215 coefficientBuffer.reserve(1);
217 coefficient = coefficientBuffer;
218 const int nalloc_old = fBuffer.size();
219 fBuffer.resize(numAtoms_);
220 for (int i = nalloc_old; i < numAtoms_; i++)
222 clear_rvec(fBuffer[i]);
228 fractx.resize(numAtoms_);
229 idx.resize(numAtoms_);
233 thread_idx.resize(numAtoms_);
236 for (int i = 0; i < nthread; i++)
238 pme_realloc_splinedata(&spline[i], this);
245 //! Communicates buffers between rank separated by \p shift slabs
246 static void pme_dd_sendrecv(PmeAtomComm gmx_unused* atc,
247 gmx_bool gmx_unused bBackward,
248 int gmx_unused shift,
249 void gmx_unused* buf_s,
250 int gmx_unused nbyte_s,
251 void gmx_unused* buf_r,
252 int gmx_unused nbyte_r)
260 dest = atc->slabCommSetup[shift].node_dest;
261 src = atc->slabCommSetup[shift].node_src;
265 dest = atc->slabCommSetup[shift].node_src;
266 src = atc->slabCommSetup[shift].node_dest;
269 if (nbyte_s > 0 && nbyte_r > 0)
272 buf_s, nbyte_s, MPI_BYTE, dest, shift, buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
274 else if (nbyte_s > 0)
276 MPI_Send(buf_s, nbyte_s, MPI_BYTE, dest, shift, atc->mpi_comm);
278 else if (nbyte_r > 0)
280 MPI_Recv(buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
285 //! Redistristributes \p data and optionally coordinates between MPI ranks
286 static void dd_pmeredist_pos_coeffs(gmx_pme_t* pme,
288 gmx::ArrayRef<const gmx::RVec> x,
292 int nnodes_comm, i, local_pos, buf_pos, node;
294 nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
296 auto sendCount = atc->sendCount();
298 for (i = 0; i < nnodes_comm; i++)
300 const int commnode = atc->slabCommSetup[i].node_dest;
301 atc->slabCommSetup[commnode].buf_index = nsend;
302 nsend += sendCount[commnode];
306 if (sendCount[atc->nodeid] + nsend != x.ssize())
310 "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off "
311 "out of the domain decomposition cell of their charge group in dimension %c.\n"
312 "This usually means that your system is not well equilibrated.",
313 x.ssize() - (sendCount[atc->nodeid] + nsend),
318 if (nsend > pme->buf_nalloc)
320 pme->buf_nalloc = over_alloc_dd(nsend);
321 srenew(pme->bufv, pme->buf_nalloc);
322 srenew(pme->bufr, pme->buf_nalloc);
325 int numAtoms = sendCount[atc->nodeid];
326 for (i = 0; i < nnodes_comm; i++)
328 const int commnode = atc->slabCommSetup[i].node_dest;
329 int scount = sendCount[commnode];
330 /* Communicate the count */
333 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n", atc->dimind, atc->nodeid, commnode, scount);
336 atc, FALSE, i, &scount, sizeof(int), &atc->slabCommSetup[i].rcount, sizeof(int));
337 numAtoms += atc->slabCommSetup[i].rcount;
340 atc->setNumAtoms(numAtoms);
344 for (gmx::index i = 0; i < x.ssize(); i++)
347 if (node == atc->nodeid)
349 /* Copy direct to the receive buffer */
352 copy_rvec(x[i], atc->xBuffer[local_pos]);
354 atc->coefficientBuffer[local_pos] = data[i];
359 /* Copy to the send buffer */
360 int& buf_index = atc->slabCommSetup[node].buf_index;
363 copy_rvec(x[i], pme->bufv[buf_index]);
365 pme->bufr[buf_index] = data[i];
371 for (i = 0; i < nnodes_comm; i++)
373 const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
374 const int rcount = atc->slabCommSetup[i].rcount;
375 if (scount > 0 || rcount > 0)
379 /* Communicate the coordinates */
384 scount * sizeof(rvec),
385 atc->xBuffer.data() + local_pos,
386 rcount * sizeof(rvec));
388 /* Communicate the coefficients */
393 scount * sizeof(real),
394 atc->coefficientBuffer.data() + local_pos,
395 rcount * sizeof(real));
397 local_pos += atc->slabCommSetup[i].rcount;
400 GMX_ASSERT(local_pos == atc->numAtoms(), "After receiving we should have numAtoms coordinates");
403 void dd_pmeredist_f(struct gmx_pme_t* pme, PmeAtomComm* atc, gmx::ArrayRef<gmx::RVec> f, gmx_bool bAddF)
405 int nnodes_comm, local_pos, buf_pos, i, node;
407 nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
409 local_pos = atc->sendCount()[atc->nodeid];
411 for (i = 0; i < nnodes_comm; i++)
413 const int commnode = atc->slabCommSetup[i].node_dest;
414 const int scount = atc->slabCommSetup[i].rcount;
415 const int rcount = atc->sendCount()[commnode];
416 if (scount > 0 || rcount > 0)
418 /* Communicate the forces */
422 atc->f.data() + local_pos,
423 scount * sizeof(rvec),
425 rcount * sizeof(rvec));
428 atc->slabCommSetup[commnode].buf_index = buf_pos;
435 for (gmx::index i = 0; i < f.ssize(); i++)
438 if (node == atc->nodeid)
440 /* Add from the local force array */
441 rvec_inc(f[i], atc->f[local_pos]);
446 /* Add from the receive buffer */
447 rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
448 atc->slabCommSetup[node].buf_index++;
454 for (gmx::index i = 0; i < f.ssize(); i++)
457 if (node == atc->nodeid)
459 /* Copy from the local force array */
460 copy_rvec(atc->f[local_pos], f[i]);
465 /* Copy from the receive buffer */
466 copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
467 atc->slabCommSetup[node].buf_index++;
473 void do_redist_pos_coeffs(struct gmx_pme_t* pme,
476 gmx::ArrayRef<const gmx::RVec> x,
479 for (int d = pme->ndecompdim - 1; d >= 0; d--)
481 gmx::ArrayRef<const gmx::RVec> xRef;
483 if (d == pme->ndecompdim - 1)
485 /* Start out with the local coordinates and charges */
491 /* Redistribute the data collected along the previous dimension */
492 const PmeAtomComm& atc = pme->atc[d + 1];
494 param_d = atc.coefficient.data();
496 PmeAtomComm& atc = pme->atc[d];
497 atc.pd.resize(xRef.size());
498 pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
499 /* Redistribute x (only once) and qA/c6A or qB/c6B */
500 if (DOMAINDECOMP(cr))
502 dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);