PME energies and forces could be incorrect with combined MPI+OpenMP
parallelization. This would, only, happen when
pmegrids->nthread_comm[YY] >= 2, which can only occur with high OpenMP
thread count with multiple prime factors that are large wrt the grid.
It's unlikely that this issue affected production runs.
This bug was fixed in
27189bba, but
6ba80a26 broke it again.
Fixes #1572.
Change-Id: Ic01bed4193062f8ca885fcb6bf347f2ef0de909f
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
- * David van der Spoel, Berk Hess, Erik Lindahl, and including many
- * others, as listed in the AUTHORS file in the top-level source
- * directory and at http://www.gromacs.org.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
*
* GROMACS is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
*
* GROMACS is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
int fft_my, fft_mz;
int buf_my = -1;
int nsx, nsy, nsz;
int fft_my, fft_mz;
int buf_my = -1;
int nsx, nsy, nsz;
+ ivec localcopy_end, commcopy_end;
int offx, offy, offz, x, y, z, i0, i0t;
int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
int offx, offy, offz, x, y, z, i0, i0t;
int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
* not beyond the local FFT grid.
*/
localcopy_end[d] =
* not beyond the local FFT grid.
*/
localcopy_end[d] =
- min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+ min(pmegrid->offset[d] + pmegrid->n[d] - (pmegrid->order - 1),
+
+ /* Determine up to where our thread needs to copy from the
+ * thread-local charge spreading grid to the communication buffer.
+ * Note: only relevant with communication, ignored otherwise.
+ */
+ commcopy_end[d] = localcopy_end[d];
+ if (pmegrid->ci[d] == pmegrids->nc[d] - 1)
+ {
+ /* The last thread should copy up to the last pme grid line.
+ * When the rank-local FFT grid is narrower than pme-order,
+ * we need the max below to ensure copying of all data.
+ */
+ commcopy_end[d] = max(commcopy_end[d], pme->pme_order);
+ }
}
offx = pmegrid->offset[XX];
}
offx = pmegrid->offset[XX];
}
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
ox += pmegrid_g->offset[XX];
}
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
ox += pmegrid_g->offset[XX];
- /* Determine the end of our part of the source grid */
- if (!bCommX)
- {
- /* Use our thread local source grid and target grid part */
- tx1 = min(ox + pmegrid_g->n[XX], localcopy_end[XX]);
- }
- else
- {
- /* Use our thread local source grid and the spreading range */
- tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
- }
+ /* Determine the end of our part of the source grid.
+ * Use our thread local source grid and target grid part
+ */
+ tx1 = min(ox + pmegrid_g->n[XX],
+ !bCommX ? localcopy_end[XX] : commcopy_end[XX]);
for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
{
for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
{
}
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
oy += pmegrid_g->offset[YY];
}
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
oy += pmegrid_g->offset[YY];
- /* Determine the end of our part of the source grid */
- if (!bCommY)
- {
- /* Use our thread local source grid and target grid part */
- ty1 = min(oy + pmegrid_g->n[YY], localcopy_end[YY]);
- }
- else
- {
- /* Use our thread local source grid and the spreading range */
- ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
- }
+ /* Determine the end of our part of the source grid.
+ * Use our thread local source grid and target grid part
+ */
+ ty1 = min(oy + pmegrid_g->n[YY],
+ !bCommY ? localcopy_end[YY] : commcopy_end[YY]);
for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
{
for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
{