*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
int recv_index0, recv_nindex;
MPI_Status stat;
int i, j, k, ix, iy, iz, icnt;
- int ipulse, send_id, recv_id, datasize;
+ int send_id, recv_id, datasize;
real *p;
real *sendptr, *recvptr;
/* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
overlap = &pme->overlap[1];
- for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+ for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
{
/* Since we have already (un)wrapped the overlap in the z-dimension,
* we only have to communicate 0 to nkz (not pmegrid_nz).
*/
if (direction == GMX_SUM_GRID_FORWARD)
{
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->comm_data[ipulse].send_id;
+ recv_id = overlap->comm_data[ipulse].recv_id;
send_index0 = overlap->comm_data[ipulse].send_index0;
send_nindex = overlap->comm_data[ipulse].send_nindex;
recv_index0 = overlap->comm_data[ipulse].recv_index0;
}
else
{
- send_id = overlap->recv_id[ipulse];
- recv_id = overlap->send_id[ipulse];
+ send_id = overlap->comm_data[ipulse].recv_id;
+ recv_id = overlap->comm_data[ipulse].send_id;
send_index0 = overlap->comm_data[ipulse].recv_index0;
send_nindex = overlap->comm_data[ipulse].recv_nindex;
recv_index0 = overlap->comm_data[ipulse].send_index0;
datasize = pme->pmegrid_nx * pme->nkz;
- MPI_Sendrecv(overlap->sendbuf, send_nindex*datasize, GMX_MPI_REAL,
+ MPI_Sendrecv(overlap->sendbuf.data(), send_nindex*datasize, GMX_MPI_REAL,
send_id, ipulse,
- overlap->recvbuf, recv_nindex*datasize, GMX_MPI_REAL,
+ overlap->recvbuf.data(), recv_nindex*datasize, GMX_MPI_REAL,
recv_id, ipulse,
overlap->mpi_comm, &stat);
*/
overlap = &pme->overlap[0];
- for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+ for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
{
if (direction == GMX_SUM_GRID_FORWARD)
{
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->comm_data[ipulse].send_id;
+ recv_id = overlap->comm_data[ipulse].recv_id;
send_index0 = overlap->comm_data[ipulse].send_index0;
send_nindex = overlap->comm_data[ipulse].send_nindex;
recv_index0 = overlap->comm_data[ipulse].recv_index0;
recv_nindex = overlap->comm_data[ipulse].recv_nindex;
- recvptr = overlap->recvbuf;
+ recvptr = overlap->recvbuf.data();
}
else
{
- send_id = overlap->recv_id[ipulse];
- recv_id = overlap->send_id[ipulse];
+ send_id = overlap->comm_data[ipulse].recv_id;
+ recv_id = overlap->comm_data[ipulse].send_id;
send_index0 = overlap->comm_data[ipulse].recv_index0;
send_nindex = overlap->comm_data[ipulse].recv_nindex;
recv_index0 = overlap->comm_data[ipulse].send_index0;
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
//! @cond Doxygen_Suppress
/*! \brief Data structure for grid communication */
-typedef struct {
+struct pme_grid_comm_t
+{
+ int send_id; //!< Source rank id
int send_index0;
int send_nindex;
+ int recv_id; //!< Destination rank id
int recv_index0;
int recv_nindex;
- int recv_size; /* Receive buffer width, used with OpenMP */
-} pme_grid_comm_t;
+ int recv_size = 0; //!< Receive buffer width, used with OpenMP
+};
-/*! \brief Data structure for grid overlap communication */
-typedef struct {
+/*! \brief Data structure for grid overlap communication in a single dimension */
+struct pme_overlap_t
+{
#if GMX_MPI
- MPI_Comm mpi_comm;
+ MPI_Comm mpi_comm; //!< MPI communcator
#endif
- int nnodes, nodeid;
- int *s2g0;
- int *s2g1;
- int noverlap_nodes;
- int *send_id, *recv_id;
- int send_size; /* Send buffer width, used with OpenMP */
- pme_grid_comm_t *comm_data;
- real *sendbuf;
- real *recvbuf;
-} pme_overlap_t;
+ int nnodes; //!< Number of ranks
+ int nodeid; //!< Unique rank identifcator
+ std::vector<int> s2g0; //!< The local interpolation grid start
+ std::vector<int> s2g1; //!< The local interpolation grid end
+ int send_size; //!< Send buffer width, used with OpenMP
+ std::vector<pme_grid_comm_t> comm_data; //!< All the individual communication data for each rank
+ std::vector<real> sendbuf; //!< Shared buffer for sending
+ std::vector<real> recvbuf; //!< Shared buffer for receiving
+};
/*! \brief Data structure for organizing particle allocation to threads */
typedef struct {
MPI_Status stat;
#endif
int recv_size_y;
- int ipulse, size_yx;
- real *sendptr, *recvptr;
+ int size_yx;
int x, y, z, indg, indb;
/* Note that this routine is only used for forward communication.
int send_size_y = overlap->send_size;
#endif
- for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+ for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
{
send_index0 =
overlap->comm_data[ipulse].send_index0 -
recv_nindex = overlap->comm_data[ipulse].recv_nindex;
recv_size_y = overlap->comm_data[ipulse].recv_size;
- sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
- recvptr = overlap->recvbuf;
+ auto *sendptr = const_cast<real *>(overlap->sendbuf.data()) + send_index0 * local_fft_ndata[ZZ];
+ auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
if (debug != nullptr)
{
}
#if GMX_MPI
- int send_id = overlap->send_id[ipulse];
- int recv_id = overlap->recv_id[ipulse];
+ int send_id = overlap->comm_data[ipulse].send_id;
+ int recv_id = overlap->comm_data[ipulse].recv_id;
MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
send_id, ipulse,
recvptr, recv_size_y*datasize, GMX_MPI_REAL,
if (pme->nnodes_major > 1)
{
/* Copy from the received buffer to the send buffer for dim 0 */
- sendptr = pme->overlap[0].sendbuf;
+ sendptr = const_cast<real *>(pme->overlap[0].sendbuf.data());
for (x = 0; x < size_yx; x++)
{
for (y = 0; y < recv_nindex; y++)
/* Major dimension */
const pme_overlap_t *overlap = &pme->overlap[0];
- ipulse = 0;
+ size_t ipulse = 0;
send_nindex = overlap->comm_data[ipulse].send_nindex;
/* We don't use recv_index0, as we always receive starting at 0 */
recv_nindex = overlap->comm_data[ipulse].recv_nindex;
- recvptr = overlap->recvbuf;
-
if (debug != nullptr)
{
fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
}
#if GMX_MPI
- int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
- int send_id = overlap->send_id[ipulse];
- int recv_id = overlap->recv_id[ipulse];
- sendptr = overlap->sendbuf;
+ int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
+ int send_id = overlap->comm_data[ipulse].send_id;
+ int recv_id = overlap->comm_data[ipulse].recv_id;
+ auto *sendptr = const_cast<real *>(overlap->sendbuf.data());
+ auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
send_id, ipulse,
recvptr, recv_nindex*datasize, GMX_MPI_REAL,
indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
for (z = 0; z < local_fft_ndata[ZZ]; z++)
{
- fftgrid[indg+z] += recvptr[indb+z];
+ fftgrid[indg + z] += overlap->recvbuf[indb + z];
}
}
}
{
reduce_threadgrid_overlap(pme, grids, thread,
fftgrid,
- pme->overlap[0].sendbuf,
- pme->overlap[1].sendbuf,
+ const_cast<real *>(pme->overlap[0].sendbuf.data()),
+ const_cast<real *>(pme->overlap[1].sendbuf.data()),
grid_index);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
int ndata,
int commplainsize)
{
- int b, i;
- pme_grid_comm_t *pgc;
gmx_bool bCont;
- int fft_start, fft_end, send_index1, recv_index1;
#if GMX_MPI
MPI_Status stat;
* that belong to higher nodes (modulo nnodes)
*/
- snew(ol->s2g0, ol->nnodes+1);
- snew(ol->s2g1, ol->nnodes);
+ ol->s2g0.resize(ol->nnodes + 1);
+ ol->s2g1.resize(ol->nnodes);
if (debug)
{
fprintf(debug, "PME slab boundaries:");
}
- for (i = 0; i < nnodes; i++)
+ for (int i = 0; i < nnodes; i++)
{
/* s2g0 the local interpolation grid start.
* s2g1 the local interpolation grid end.
* spatially uniform along dimension x or y, we need to round
* s2g0 down and s2g1 up.
*/
- ol->s2g0[i] = ( i *ndata + 0 )/nnodes;
- ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
+ ol->s2g0[i] = (i * ndata + 0) / nnodes;
+ ol->s2g1[i] = ((i + 1) * ndata + nnodes - 1) / nnodes + norder - 1;
if (debug)
{
}
/* Determine with how many nodes we need to communicate the grid overlap */
- b = 0;
+ int testRankCount = 0;
do
{
- b++;
+ testRankCount++;
bCont = FALSE;
- for (i = 0; i < nnodes; i++)
+ for (int i = 0; i < nnodes; i++)
{
- if ((i+b < nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
- (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
+ if ((i + testRankCount < nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount]) ||
+ (i + testRankCount >= nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount - nnodes] + ndata))
{
bCont = TRUE;
}
}
}
- while (bCont && b < nnodes);
- ol->noverlap_nodes = b - 1;
-
- snew(ol->send_id, ol->noverlap_nodes);
- snew(ol->recv_id, ol->noverlap_nodes);
- for (b = 0; b < ol->noverlap_nodes; b++)
- {
- ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
- ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
- }
- snew(ol->comm_data, ol->noverlap_nodes);
+ while (bCont && testRankCount < nnodes);
+ ol->comm_data.resize(testRankCount - 1);
ol->send_size = 0;
- for (b = 0; b < ol->noverlap_nodes; b++)
+
+ for (size_t b = 0; b < ol->comm_data.size(); b++)
{
- pgc = &ol->comm_data[b];
+ pme_grid_comm_t *pgc = &ol->comm_data[b];
+
/* Send */
- fft_start = ol->s2g0[ol->send_id[b]];
- fft_end = ol->s2g0[ol->send_id[b]+1];
- if (ol->send_id[b] < nodeid)
+ pgc->send_id = (ol->nodeid + (b + 1)) % ol->nnodes;
+ int fft_start = ol->s2g0[pgc->send_id];
+ int fft_end = ol->s2g0[pgc->send_id + 1];
+ if (pgc->send_id < nodeid)
{
fft_start += ndata;
fft_end += ndata;
}
- send_index1 = ol->s2g1[nodeid];
- send_index1 = std::min(send_index1, fft_end);
- pgc->send_index0 = fft_start;
- pgc->send_nindex = std::max(0, send_index1 - pgc->send_index0);
- ol->send_size += pgc->send_nindex;
+ int send_index1 = ol->s2g1[nodeid];
+ send_index1 = std::min(send_index1, fft_end);
+ pgc->send_index0 = fft_start;
+ pgc->send_nindex = std::max(0, send_index1 - pgc->send_index0);
+ ol->send_size += pgc->send_nindex;
/* We always start receiving to the first index of our slab */
+ pgc->recv_id = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
fft_start = ol->s2g0[ol->nodeid];
- fft_end = ol->s2g0[ol->nodeid+1];
- recv_index1 = ol->s2g1[ol->recv_id[b]];
- if (ol->recv_id[b] > nodeid)
+ fft_end = ol->s2g0[ol->nodeid + 1];
+ int recv_index1 = ol->s2g1[pgc->recv_id];
+ if (pgc->recv_id > nodeid)
{
recv_index1 -= ndata;
}
#if GMX_MPI
/* Communicate the buffer sizes to receive */
- for (b = 0; b < ol->noverlap_nodes; b++)
+ for (size_t b = 0; b < ol->comm_data.size(); b++)
{
- MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->send_id[b], b,
- &ol->comm_data[b].recv_size, 1, MPI_INT, ol->recv_id[b], b,
+ MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b,
+ &ol->comm_data[b].recv_size, 1, MPI_INT, ol->comm_data[b].recv_id, b,
ol->mpi_comm, &stat);
}
#endif
/* For non-divisible grid we need pme_order iso pme_order-1 */
- snew(ol->sendbuf, norder*commplainsize);
- snew(ol->recvbuf, norder*commplainsize);
-}
-
-/*! \brief Destroy data structure for communication */
-static void
-destroy_overlap_comm(const pme_overlap_t *ol)
-{
- sfree(ol->s2g0);
- sfree(ol->s2g1);
- sfree(ol->send_id);
- sfree(ol->recv_id);
- sfree(ol->comm_data);
- sfree(ol->sendbuf);
- sfree(ol->recvbuf);
+ ol->sendbuf.resize(norder * commplainsize);
+ ol->recvbuf.resize(norder * commplainsize);
}
int minimalPmeGridSize(int pmeOrder)
/* Double-check for a limitation of the (current) sum_fftgrid_dd code.
* Note that gmx_pme_check_restrictions checked for this already.
*/
- if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
+ if (pme->bUseThreads && (pme->overlap[0].comm_data.size() > 1))
{
gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
}
sfree(pme->bsp_mod[i]);
}
- destroy_overlap_comm(&pme->overlap[0]);
- destroy_overlap_comm(&pme->overlap[1]);
-
sfree(pme->lb_buf1);
sfree(pme->lb_buf2);