PME load balancing now checks for PME grid restrictions
authorBerk Hess <hess@kth.se>
Thu, 21 Mar 2013 11:30:22 +0000 (12:30 +0100)
committerBerk Hess <hess@kth.se>
Fri, 26 Apr 2013 16:52:47 +0000 (18:52 +0200)
To enable this, the PME restrictions checks have been moved
to a separate function called gmx_pme_check_restrictions.
Also allowed nkx==nnodes_major*(order-1) again with threading.

Change-Id: I50b6f8cfaeba5e360c534424c80c320515812e43

include/domdec.h
include/pme.h
src/kernel/pme_loadbal.c
src/mdlib/domdec.c
src/mdlib/pme.c

index eff1f302393602e92b053a3f09c6d9363d848937..19a3a78e44baa17c9d0da7f4101e17526f3660f0 100644 (file)
@@ -79,6 +79,11 @@ real dd_cutoff_mbody(gmx_domdec_t *dd);
 
 real dd_cutoff_twobody(gmx_domdec_t *dd);
 
+GMX_LIBMD_EXPORT
+void get_pme_nnodes(const gmx_domdec_t *dd,
+                    int *npmenodes_x, int *npmenodes_y);
+/* Get the number of PME nodes along x and y, can be called with dd=NULL */
+
 gmx_bool gmx_pmeonlynode(t_commrec *cr, int nodeid);
 /* Return if nodeid in cr->mpi_comm_mysim is a PME-only node */
 
index 59e5874c532199389c95e46af784d54b69fe8bf1..352118bfdf4ea14c5bfbf2c395b445c32bb6ab5b 100644 (file)
@@ -122,6 +122,23 @@ void gmx_pme_calc_energy(gmx_pme_t pme, int n, rvec *x, real *q, real *V);
 /* Abstract type for PME <-> PP communication */
 typedef struct gmx_pme_pp *gmx_pme_pp_t;
 
+GMX_LIBMD_EXPORT
+void gmx_pme_check_restrictions(int pme_order,
+                                int nkx, int nky, int nkz,
+                                int nnodes_major,
+                                int nnodes_minor,
+                                gmx_bool bUseThreads,
+                                gmx_bool bFatal,
+                                gmx_bool *bValidSettings);
+/* Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ * With bFatal=TRUE, a fatal error is generated on violation,
+ * bValidSettings=NULL can be passed.
+ * With bFatal=FALSE, *bValidSettings reports the validity of the settings.
+ * bUseThreads tells if any MPI rank doing PME uses more than 1 threads.
+ * If at calling you bUseThreads is unknown, pass TRUE for conservative
+ * checking.
+ */
+
 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
 /* Initialize the PME-only side of the PME <-> PP communication */
 
index 5e57d1c4e151f9cd12c3c35fad045069fd4e3c51..d1a2f8efee5e71e259e1c37c0bf41f1a7975ca9b 100644 (file)
@@ -80,11 +80,11 @@ typedef struct {
 #define PME_LB_ACCEL_TOL 1.02
 
 enum {
-    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR
+    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimNR
 };
 
 const char *pmelblim_str[epmelblimNR] =
-{ "no", "box size", "domain decompostion" };
+{ "no", "box size", "domain decompostion", "PME grid restriction" };
 
 struct pme_load_balancing {
     int          nstage;             /* the current maximum number of stages */
@@ -205,12 +205,15 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 }
 
 static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
-                                            int                  pme_order)
+                                            int                  pme_order,
+                                            const gmx_domdec_t   *dd)
 {
     pme_setup_t *set;
+    int          npmenodes_x, npmenodes_y;
     real         fac, sp;
     real         tmpr_coulomb, tmpr_vdw;
     int          d;
+    gmx_bool     grid_ok;
 
     /* Try to add a new setup with next larger cut-off to the list */
     pme_lb->n++;
@@ -218,9 +221,23 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
     set          = &pme_lb->setup[pme_lb->n-1];
     set->pmedata = NULL;
 
+    get_pme_nnodes(dd, &npmenodes_x, &npmenodes_y);
+
     fac = 1;
     do
     {
+        /* Avoid infinite while loop, which can occur at the minimum grid size.
+         * Note that in practice load balancing will stop before this point.
+         * The factor 2.1 allows for the extreme case in which only grids
+         * of powers of 2 are allowed (the current code supports more grids).
+         */
+        if (fac > 2.1)
+        {
+            pme_lb->n--;
+
+            return FALSE;
+        }
+
         fac *= 1.01;
         clear_ivec(set->grid);
         sp = calc_grid(NULL, pme_lb->box_start,
@@ -229,20 +246,19 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
                        &set->grid[YY],
                        &set->grid[ZZ]);
 
-        /* In parallel we can't have grids smaller than 2*pme_order,
-         * and we would anyhow not gain much speed at these grid sizes.
+        /* As here we can't easily check if one of the PME nodes
+         * uses threading, we do a conservative grid check.
+         * This means we can't use pme_order or less grid lines
+         * per PME node along x, which is not a strong restriction.
          */
-        for (d = 0; d < DIM; d++)
-        {
-            if (set->grid[d] <= 2*pme_order)
-            {
-                pme_lb->n--;
-
-                return FALSE;
-            }
-        }
+        gmx_pme_check_restrictions(pme_order,
+                                   set->grid[XX], set->grid[YY], set->grid[ZZ],
+                                   npmenodes_x, npmenodes_y,
+                                   TRUE,
+                                   FALSE,
+                                   &grid_ok);
     }
-    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
+    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
 
     set->rcut_coulomb = pme_lb->cut_spacing*sp;
 
@@ -363,7 +379,7 @@ static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
 {
     char buf[STRLEN], sbuf[22];
 
-    sprintf(buf, "step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
+    sprintf(buf, "step %4s: the %s limits the PME load balancing to a coulomb cut-off of %.3f",
             gmx_step_str(step, sbuf),
             pmelblim_str[pme_lb->elimited],
             pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
@@ -531,7 +547,12 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
             else
             {
                 /* Find the next setup */
-                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
+                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);
+
+                if (!OK)
+                {
+                    pme_lb->elimited = epmelblimPMEGRID;
+                }
             }
 
             if (OK && ir->ePBC != epbcNONE)
index 56eb38d25f5b09b2337912e570ad426de2b6452a..ad5097d853240e2056be34f10daeef417b40cd70 100644 (file)
@@ -2317,6 +2317,21 @@ static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
     return pmenode;
 }
 
+void get_pme_nnodes(const gmx_domdec_t *dd,
+                    int *npmenodes_x, int *npmenodes_y)
+{
+    if (dd != NULL)
+    {
+        *npmenodes_x = dd->comm->npmenodes_x;
+        *npmenodes_y = dd->comm->npmenodes_y;
+    }
+    else
+    {
+        *npmenodes_x = 1;
+        *npmenodes_y = 1;
+    }
+}
+
 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
 {
     gmx_bool bPMEOnlyNode;
index d5113abdbe69ef9eddf9b5a10836049590837296..520aa7368aa98d3c4cbf5455783e1afdb97b61ac 100644 (file)
@@ -3032,29 +3032,59 @@ static pme_spline_work_t *make_pme_spline_work(int order)
     return work;
 }
 
-static void
-gmx_pme_check_grid_restrictions(FILE *fplog, char dim, int nnodes, int *nk)
+void gmx_pme_check_restrictions(int pme_order,
+                                int nkx, int nky, int nkz,
+                                int nnodes_major,
+                                int nnodes_minor,
+                                gmx_bool bUseThreads,
+                                gmx_bool bFatal,
+                                gmx_bool *bValidSettings)
 {
-    int nk_new;
-
-    if (*nk % nnodes != 0)
+    if (pme_order > PME_ORDER_MAX)
     {
-        nk_new = nnodes*(*nk/nnodes + 1);
+        if (!bFatal)
+        {
+            *bValidSettings = FALSE;
+            return;
+        }
+        gmx_fatal(FARGS, "pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
+                  pme_order, PME_ORDER_MAX);
+    }
 
-        if (2*nk_new >= 3*(*nk))
+    if (nkx <= pme_order*(nnodes_major > 1 ? 2 : 1) ||
+        nky <= pme_order*(nnodes_minor > 1 ? 2 : 1) ||
+        nkz <= pme_order)
+    {
+        if (!bFatal)
         {
-            gmx_fatal(FARGS, "The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
-                      dim, *nk, dim, nnodes, dim);
+            *bValidSettings = FALSE;
+            return;
         }
+        gmx_fatal(FARGS, "The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order",
+                  pme_order);
+    }
 
-        if (fplog != NULL)
+    /* Check for a limitation of the (current) sum_fftgrid_dd code.
+     * We only allow multiple communication pulses in dim 1, not in dim 0.
+     */
+    if (bUseThreads && (nkx < nnodes_major*pme_order &&
+                        nkx != nnodes_major*(pme_order - 1)))
+    {
+        if (!bFatal)
         {
-            fprintf(fplog, "\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
-                    dim, *nk, dim, nnodes, dim, nk_new, dim);
+            *bValidSettings = FALSE;
+            return;
         }
+        gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
+                  nkx/(double)nnodes_major, pme_order);
+    }
 
-        *nk = nk_new;
+    if (bValidSettings != NULL)
+    {
+        *bValidSettings = TRUE;
     }
+
+    return;
 }
 
 int gmx_pme_init(gmx_pme_t *         pmedata,
@@ -3197,37 +3227,14 @@ int gmx_pme_init(gmx_pme_t *         pmedata,
     pme->pme_order   = ir->pme_order;
     pme->epsilon_r   = ir->epsilon_r;
 
-    if (pme->pme_order > PME_ORDER_MAX)
-    {
-        gmx_fatal(FARGS, "pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
-                  pme->pme_order, PME_ORDER_MAX);
-    }
-
-    /* Currently pme.c supports only the fft5d FFT code.
-     * Therefore the grid always needs to be divisible by nnodes.
-     * When the old 1D code is also supported again, change this check.
-     *
-     * This check should be done before calling gmx_pme_init
-     * and fplog should be passed iso stderr.
-     *
-       if (pme->ndecompdim >= 2)
-     */
-    if (pme->ndecompdim >= 1)
-    {
-        /*
-           gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
-                                        'x',nnodes_major,&pme->nkx);
-           gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
-                                        'y',nnodes_minor,&pme->nky);
-         */
-    }
-
-    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
-        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
-        pme->nkz <= pme->pme_order)
-    {
-        gmx_fatal(FARGS, "The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order", pme->pme_order);
-    }
+    /* If we violate restrictions, generate a fatal error here */
+    gmx_pme_check_restrictions(pme->pme_order,
+                               pme->nkx, pme->nky, pme->nkz,
+                               pme->nnodes_major,
+                               pme->nnodes_minor,
+                               pme->bUseThreads,
+                               TRUE,
+                               NULL);
 
     if (pme->nnodes > 1)
     {
@@ -3285,14 +3292,12 @@ int gmx_pme_init(gmx_pme_t *         pmedata,
                       pme->nky,
                       (div_round_up(pme->nkx, pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
 
-    /* Check for a limitation of the (current) sum_fftgrid_dd code.
-     * We only allow multiple communication pulses in dim 1, not in dim 0.
+    /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
+     * Note that gmx_pme_check_restrictions checked for this already.
      */
-    if (pme->bUseThreads && (pme->overlap[0].noverlap_nodes > 1 ||
-                             pme->nkx < pme->nnodes_major*pme->pme_order))
+    if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
     {
-        gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
-                  pme->nkx/(double)pme->nnodes_major, pme->pme_order);
+        gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
     }
 
     snew(pme->bsp_mod[XX], pme->nkx);