/* Abstract type for PME <-> PP communication */
typedef struct gmx_pme_pp *gmx_pme_pp_t;
+GMX_LIBMD_EXPORT
+void gmx_pme_check_restrictions(int pme_order,
+ int nkx, int nky, int nkz,
+ int nnodes_major,
+ int nnodes_minor,
+ gmx_bool bUseThreads,
+ gmx_bool bFatal,
+ gmx_bool *bValidSettings);
+/* Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ * With bFatal=TRUE, a fatal error is generated on violation,
+ * bValidSettings=NULL can be passed.
+ * With bFatal=FALSE, *bValidSettings reports the validity of the settings.
+ * bUseThreads tells if any MPI rank doing PME uses more than 1 threads.
+ * If at calling you bUseThreads is unknown, pass TRUE for conservative
+ * checking.
+ */
+
gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
/* Initialize the PME-only side of the PME <-> PP communication */
#define PME_LB_ACCEL_TOL 1.02
enum {
- epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR
+ epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimNR
};
const char *pmelblim_str[epmelblimNR] =
-{ "no", "box size", "domain decompostion" };
+{ "no", "box size", "domain decompostion", "PME grid restriction" };
struct pme_load_balancing {
int nstage; /* the current maximum number of stages */
}
static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
- int pme_order)
+ int pme_order,
+ const gmx_domdec_t *dd)
{
pme_setup_t *set;
+ int npmenodes_x, npmenodes_y;
real fac, sp;
real tmpr_coulomb, tmpr_vdw;
int d;
+ gmx_bool grid_ok;
/* Try to add a new setup with next larger cut-off to the list */
pme_lb->n++;
set = &pme_lb->setup[pme_lb->n-1];
set->pmedata = NULL;
+ get_pme_nnodes(dd, &npmenodes_x, &npmenodes_y);
+
fac = 1;
do
{
+ /* Avoid infinite while loop, which can occur at the minimum grid size.
+ * Note that in practice load balancing will stop before this point.
+ * The factor 2.1 allows for the extreme case in which only grids
+ * of powers of 2 are allowed (the current code supports more grids).
+ */
+ if (fac > 2.1)
+ {
+ pme_lb->n--;
+
+ return FALSE;
+ }
+
fac *= 1.01;
clear_ivec(set->grid);
sp = calc_grid(NULL, pme_lb->box_start,
&set->grid[YY],
&set->grid[ZZ]);
- /* In parallel we can't have grids smaller than 2*pme_order,
- * and we would anyhow not gain much speed at these grid sizes.
+ /* As here we can't easily check if one of the PME nodes
+ * uses threading, we do a conservative grid check.
+ * This means we can't use pme_order or less grid lines
+ * per PME node along x, which is not a strong restriction.
*/
- for (d = 0; d < DIM; d++)
- {
- if (set->grid[d] <= 2*pme_order)
- {
- pme_lb->n--;
-
- return FALSE;
- }
- }
+ gmx_pme_check_restrictions(pme_order,
+ set->grid[XX], set->grid[YY], set->grid[ZZ],
+ npmenodes_x, npmenodes_y,
+ TRUE,
+ FALSE,
+ &grid_ok);
}
- while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
+ while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
set->rcut_coulomb = pme_lb->cut_spacing*sp;
{
char buf[STRLEN], sbuf[22];
- sprintf(buf, "step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
+ sprintf(buf, "step %4s: the %s limits the PME load balancing to a coulomb cut-off of %.3f",
gmx_step_str(step, sbuf),
pmelblim_str[pme_lb->elimited],
pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
else
{
/* Find the next setup */
- OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
+ OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);
+
+ if (!OK)
+ {
+ pme_lb->elimited = epmelblimPMEGRID;
+ }
}
if (OK && ir->ePBC != epbcNONE)
return work;
}
-static void
-gmx_pme_check_grid_restrictions(FILE *fplog, char dim, int nnodes, int *nk)
+void gmx_pme_check_restrictions(int pme_order,
+ int nkx, int nky, int nkz,
+ int nnodes_major,
+ int nnodes_minor,
+ gmx_bool bUseThreads,
+ gmx_bool bFatal,
+ gmx_bool *bValidSettings)
{
- int nk_new;
-
- if (*nk % nnodes != 0)
+ if (pme_order > PME_ORDER_MAX)
{
- nk_new = nnodes*(*nk/nnodes + 1);
+ if (!bFatal)
+ {
+ *bValidSettings = FALSE;
+ return;
+ }
+ gmx_fatal(FARGS, "pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
+ pme_order, PME_ORDER_MAX);
+ }
- if (2*nk_new >= 3*(*nk))
+ if (nkx <= pme_order*(nnodes_major > 1 ? 2 : 1) ||
+ nky <= pme_order*(nnodes_minor > 1 ? 2 : 1) ||
+ nkz <= pme_order)
+ {
+ if (!bFatal)
{
- gmx_fatal(FARGS, "The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
- dim, *nk, dim, nnodes, dim);
+ *bValidSettings = FALSE;
+ return;
}
+ gmx_fatal(FARGS, "The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order",
+ pme_order);
+ }
- if (fplog != NULL)
+ /* Check for a limitation of the (current) sum_fftgrid_dd code.
+ * We only allow multiple communication pulses in dim 1, not in dim 0.
+ */
+ if (bUseThreads && (nkx < nnodes_major*pme_order &&
+ nkx != nnodes_major*(pme_order - 1)))
+ {
+ if (!bFatal)
{
- fprintf(fplog, "\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
- dim, *nk, dim, nnodes, dim, nk_new, dim);
+ *bValidSettings = FALSE;
+ return;
}
+ gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
+ nkx/(double)nnodes_major, pme_order);
+ }
- *nk = nk_new;
+ if (bValidSettings != NULL)
+ {
+ *bValidSettings = TRUE;
}
+
+ return;
}
int gmx_pme_init(gmx_pme_t * pmedata,
pme->pme_order = ir->pme_order;
pme->epsilon_r = ir->epsilon_r;
- if (pme->pme_order > PME_ORDER_MAX)
- {
- gmx_fatal(FARGS, "pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
- pme->pme_order, PME_ORDER_MAX);
- }
-
- /* Currently pme.c supports only the fft5d FFT code.
- * Therefore the grid always needs to be divisible by nnodes.
- * When the old 1D code is also supported again, change this check.
- *
- * This check should be done before calling gmx_pme_init
- * and fplog should be passed iso stderr.
- *
- if (pme->ndecompdim >= 2)
- */
- if (pme->ndecompdim >= 1)
- {
- /*
- gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
- 'x',nnodes_major,&pme->nkx);
- gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
- 'y',nnodes_minor,&pme->nky);
- */
- }
-
- if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
- pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
- pme->nkz <= pme->pme_order)
- {
- gmx_fatal(FARGS, "The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order", pme->pme_order);
- }
+ /* If we violate restrictions, generate a fatal error here */
+ gmx_pme_check_restrictions(pme->pme_order,
+ pme->nkx, pme->nky, pme->nkz,
+ pme->nnodes_major,
+ pme->nnodes_minor,
+ pme->bUseThreads,
+ TRUE,
+ NULL);
if (pme->nnodes > 1)
{
pme->nky,
(div_round_up(pme->nkx, pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
- /* Check for a limitation of the (current) sum_fftgrid_dd code.
- * We only allow multiple communication pulses in dim 1, not in dim 0.
+ /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
+ * Note that gmx_pme_check_restrictions checked for this already.
*/
- if (pme->bUseThreads && (pme->overlap[0].noverlap_nodes > 1 ||
- pme->nkx < pme->nnodes_major*pme->pme_order))
+ if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
{
- gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
- pme->nkx/(double)pme->nnodes_major, pme->pme_order);
+ gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
}
snew(pme->bsp_mod[XX], pme->nkx);