PME load balancing now checks for PME grid restrictions
[alexxy/gromacs.git] / src / kernel / pme_loadbal.c
index 5e57d1c4e151f9cd12c3c35fad045069fd4e3c51..d1a2f8efee5e71e259e1c37c0bf41f1a7975ca9b 100644 (file)
@@ -80,11 +80,11 @@ typedef struct {
 #define PME_LB_ACCEL_TOL 1.02
 
 enum {
-    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR
+    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimNR
 };
 
 const char *pmelblim_str[epmelblimNR] =
-{ "no", "box size", "domain decompostion" };
+{ "no", "box size", "domain decompostion", "PME grid restriction" };
 
 struct pme_load_balancing {
     int          nstage;             /* the current maximum number of stages */
@@ -205,12 +205,15 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 }
 
 static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
-                                            int                  pme_order)
+                                            int                  pme_order,
+                                            const gmx_domdec_t   *dd)
 {
     pme_setup_t *set;
+    int          npmenodes_x, npmenodes_y;
     real         fac, sp;
     real         tmpr_coulomb, tmpr_vdw;
     int          d;
+    gmx_bool     grid_ok;
 
     /* Try to add a new setup with next larger cut-off to the list */
     pme_lb->n++;
@@ -218,9 +221,23 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
     set          = &pme_lb->setup[pme_lb->n-1];
     set->pmedata = NULL;
 
+    get_pme_nnodes(dd, &npmenodes_x, &npmenodes_y);
+
     fac = 1;
     do
     {
+        /* Avoid infinite while loop, which can occur at the minimum grid size.
+         * Note that in practice load balancing will stop before this point.
+         * The factor 2.1 allows for the extreme case in which only grids
+         * of powers of 2 are allowed (the current code supports more grids).
+         */
+        if (fac > 2.1)
+        {
+            pme_lb->n--;
+
+            return FALSE;
+        }
+
         fac *= 1.01;
         clear_ivec(set->grid);
         sp = calc_grid(NULL, pme_lb->box_start,
@@ -229,20 +246,19 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
                        &set->grid[YY],
                        &set->grid[ZZ]);
 
-        /* In parallel we can't have grids smaller than 2*pme_order,
-         * and we would anyhow not gain much speed at these grid sizes.
+        /* As here we can't easily check if one of the PME nodes
+         * uses threading, we do a conservative grid check.
+         * This means we can't use pme_order or less grid lines
+         * per PME node along x, which is not a strong restriction.
          */
-        for (d = 0; d < DIM; d++)
-        {
-            if (set->grid[d] <= 2*pme_order)
-            {
-                pme_lb->n--;
-
-                return FALSE;
-            }
-        }
+        gmx_pme_check_restrictions(pme_order,
+                                   set->grid[XX], set->grid[YY], set->grid[ZZ],
+                                   npmenodes_x, npmenodes_y,
+                                   TRUE,
+                                   FALSE,
+                                   &grid_ok);
     }
-    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
+    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
 
     set->rcut_coulomb = pme_lb->cut_spacing*sp;
 
@@ -363,7 +379,7 @@ static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
 {
     char buf[STRLEN], sbuf[22];
 
-    sprintf(buf, "step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
+    sprintf(buf, "step %4s: the %s limits the PME load balancing to a coulomb cut-off of %.3f",
             gmx_step_str(step, sbuf),
             pmelblim_str[pme_lb->elimited],
             pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
@@ -531,7 +547,12 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
             else
             {
                 /* Find the next setup */
-                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
+                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);
+
+                if (!OK)
+                {
+                    pme_lb->elimited = epmelblimPMEGRID;
+                }
             }
 
             if (OK && ir->ePBC != epbcNONE)