PME-only counters are now reset with -maxh -resethway
authorBerk Hess <hess@kth.se>
Wed, 30 Jan 2013 09:57:51 +0000 (10:57 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 1 Feb 2013 09:22:45 +0000 (10:22 +0100)
Fixes #1125
For clarity gmx_pme_recv_q_x now returns and enum
and gmx_pme_send_switch is renamed to gmx_pme_send_switchgrid.

Change-Id: I21952780f0b719378d947a79324c844ccde7160a

include/pme.h
src/kernel/md.c
src/kernel/pme_loadbal.c
src/mdlib/pme.c
src/mdlib/pme_pp.c

index 2d84bfdc79e24d8ca365be7d87d93bc8fa865d08..59e5874c532199389c95e46af784d54b69fe8bf1 100644 (file)
@@ -141,16 +141,29 @@ void gmx_pme_send_finish(t_commrec *cr);
 /* Tell our PME-only node to finish */
 
 GMX_LIBMD_EXPORT
-void gmx_pme_send_switch(t_commrec *cr, ivec grid_size, real ewaldcoeff);
+void gmx_pme_send_switchgrid(t_commrec *cr, ivec grid_size, real ewaldcoeff);
 /* Tell our PME-only node to switch to a new grid size */
 
+GMX_LIBMD_EXPORT
+void gmx_pme_send_resetcounters(t_commrec *cr, gmx_large_int_t step);
+/* Tell our PME-only node to reset all cycle and flop counters */
+
 void gmx_pme_receive_f(t_commrec *cr,
                        rvec f[], matrix vir,
                        real *energy, real *dvdlambda,
                        float *pme_cycles);
 /* PP nodes receive the long range forces from the PME nodes */
 
+/* Return values for gmx_pme_recv_q_x */
+enum {
+    pmerecvqxX,            /* calculate PME mesh interactions for new x    */
+    pmerecvqxFINISH,       /* the simulation should finish, we should quit */
+    pmerecvqxSWITCHGRID,   /* change the PME grid size                     */
+    pmerecvqxRESETCOUNTERS /* reset the cycle and flop counters            */
+};
+
 int gmx_pme_recv_q_x(gmx_pme_pp_t pme_pp,
+                     int *natoms,
                      real **chargeA, real **chargeB,
                      matrix box, rvec **x, rvec **f,
                      int *maxshift_x, int *maxshift_y,
@@ -159,10 +172,11 @@ int gmx_pme_recv_q_x(gmx_pme_pp_t pme_pp,
                      gmx_large_int_t *step,
                      ivec grid_size, real *ewaldcoeff);
 ;
-/* Receive charges and/or coordinates from the PP-only nodes.
- * Returns the number of atoms, or -1 when the run is finished.
- * In the special case of a PME grid size switch request, -2 is returned
- * and grid_size and *ewaldcoeff are set, which are otherwise not set.
+/* With return value:
+ * pmerecvqxX:             all parameters set, chargeA and chargeB can be NULL
+ * pmerecvqxFINISH:        no parameters set
+ * pmerecvqxSWITCHGRID:    only grid_size and *ewaldcoeff are set
+ * pmerecvqxRESETCOUNTERS: *step is set
  */
 
 void gmx_pme_send_force_vir_ener(gmx_pme_pp_t pme_pp,
index 6bfab428f85238148f67ee46d52e47f4c7a5f234..ffbcef6ea60c8a50b1a2091f83ff447105fb1774 100644 (file)
@@ -2172,6 +2172,11 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
             reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, runtime,
                                fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
             wcycle_set_reset_counters(wcycle, -1);
+            if (!(cr->duty & DUTY_PME))
+            {
+                /* Tell our PME node to reset its counters */
+                gmx_pme_send_resetcounters(cr, step);
+            }
             /* Correct max_hours for the elapsed time */
             max_hours                -= run_time/(60.0*60.0);
             bResetCountersHalfMaxH    = FALSE;
index 7e6c2b397166a7e42ec585cfb4c0e44cc6af14d4..c6c11bc13265785a609a7c2b60ed1d4345989c65 100644 (file)
@@ -676,7 +676,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
     else
     {
         /* Tell our PME-only node to switch grid */
-        gmx_pme_send_switch(cr, set->grid, set->ewaldcoeff);
+        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff);
     }
 
     if (debug)
index 54f003f6e2568a5231d3686b9ebde4dbe5a42493..128f5e36021172511ff146482b5742b62074692a 100644 (file)
@@ -4108,14 +4108,19 @@ void gmx_pme_calc_energy(gmx_pme_t pme, int n, rvec *x, real *q, real *V)
 
 
 static void reset_pmeonly_counters(t_commrec *cr, gmx_wallcycle_t wcycle,
-                                   t_nrnb *nrnb, t_inputrec *ir, gmx_large_int_t step_rel)
+                                   t_nrnb *nrnb, t_inputrec *ir,
+                                   gmx_large_int_t step)
 {
     /* Reset all the counters related to performance over the run */
     wallcycle_stop(wcycle, ewcRUN);
     wallcycle_reset_all(wcycle);
     init_nrnb(nrnb);
-    ir->init_step += step_rel;
-    ir->nsteps    -= step_rel;
+    if (ir->nsteps >= 0)
+    {
+        /* ir->nsteps is not used here, but we update it for consistency */
+        ir->nsteps -= step - ir->init_step;
+    }
+    ir->init_step = step;
     wallcycle_start(wcycle, ewcRUN);
 }
 
@@ -4163,6 +4168,7 @@ int gmx_pmeonly(gmx_pme_t pme,
     int npmedata;
     gmx_pme_t *pmedata;
     gmx_pme_pp_t pme_pp;
+    int  ret;
     int  natoms;
     matrix box;
     rvec *x_pp      = NULL, *f_pp = NULL;
@@ -4193,23 +4199,30 @@ int gmx_pmeonly(gmx_pme_t pme,
         do
         {
             /* Domain decomposition */
-            natoms = gmx_pme_recv_q_x(pme_pp,
-                                      &chargeA, &chargeB, box, &x_pp, &f_pp,
-                                      &maxshift_x, &maxshift_y,
-                                      &pme->bFEP, &lambda,
-                                      &bEnerVir,
-                                      &step,
-                                      grid_switch, &ewaldcoeff);
-
-            if (natoms == -2)
+            ret = gmx_pme_recv_q_x(pme_pp,
+                                   &natoms,
+                                   &chargeA, &chargeB, box, &x_pp, &f_pp,
+                                   &maxshift_x, &maxshift_y,
+                                   &pme->bFEP, &lambda,
+                                   &bEnerVir,
+                                   &step,
+                                   grid_switch, &ewaldcoeff);
+
+            if (ret == pmerecvqxSWITCHGRID)
             {
                 /* Switch the PME grid to grid_switch */
                 gmx_pmeonly_switch(&npmedata, &pmedata, grid_switch, cr, ir, &pme);
             }
+
+            if (ret == pmerecvqxRESETCOUNTERS)
+            {
+                /* Reset the cycle and flop counters */
+                reset_pmeonly_counters(cr, wcycle, nrnb, ir, step);
+            }
         }
-        while (natoms == -2);
+        while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS);
 
-        if (natoms == -1)
+        if (ret == pmerecvqxFINISH)
         {
             /* We should stop: break out of the loop */
             break;
@@ -4238,14 +4251,6 @@ int gmx_pmeonly(gmx_pme_t pme,
                                     cycles);
 
         count++;
-
-        if (step_rel == wcycle_get_reset_counters(wcycle))
-        {
-            /* Reset all the counters related to performance over the run */
-            reset_pmeonly_counters(cr, wcycle, nrnb, ir, step_rel);
-            wcycle_set_reset_counters(wcycle, 0);
-        }
-
     } /***** end of quasi-loop, we stop with the break above */
     while (TRUE);
 
index 1e791bef6388962cb9583ed4b5d9c1b6789ab4a3..8a760e4e2e706a8cc6c473cee0fb290e8f7867ef 100644 (file)
 
 #include "mpelogging.h"
 
-#define PP_PME_CHARGE   (1<<0)
-#define PP_PME_CHARGEB  (1<<1)
-#define PP_PME_COORD    (1<<2)
-#define PP_PME_FEP      (1<<3)
-#define PP_PME_ENER_VIR (1<<4)
-#define PP_PME_FINISH   (1<<5)
-#define PP_PME_SWITCH   (1<<6)
+#define PP_PME_CHARGE         (1<<0)
+#define PP_PME_CHARGEB        (1<<1)
+#define PP_PME_COORD          (1<<2)
+#define PP_PME_FEP            (1<<3)
+#define PP_PME_ENER_VIR       (1<<4)
+#define PP_PME_FINISH         (1<<5)
+#define PP_PME_SWITCHGRID     (1<<6)
+#define PP_PME_RESETCOUNTERS  (1<<7)
+
 
 #define PME_PP_SIGSTOP     (1<<0)
 #define PME_PP_SIGSTOPNSS     (1<<1)
@@ -291,14 +293,15 @@ void gmx_pme_send_finish(t_commrec *cr)
     gmx_pme_send_q_x(cr, flags, NULL, NULL, NULL, NULL, 0, 0, 0, -1);
 }
 
-void gmx_pme_send_switch(t_commrec *cr, ivec grid_size, real ewaldcoeff)
+void gmx_pme_send_switchgrid(t_commrec *cr, ivec grid_size, real ewaldcoeff)
 {
 #ifdef GMX_MPI
     gmx_pme_comm_n_box_t cnb;
 
+    /* Only let one PP node signal each PME node */
     if (cr->dd->pme_receive_vir_ener)
     {
-        cnb.flags = PP_PME_SWITCH;
+        cnb.flags = PP_PME_SWITCHGRID;
         copy_ivec(grid_size, cnb.grid_size);
         cnb.ewaldcoeff = ewaldcoeff;
 
@@ -309,7 +312,26 @@ void gmx_pme_send_switch(t_commrec *cr, ivec grid_size, real ewaldcoeff)
 #endif
 }
 
+void gmx_pme_send_resetcounters(t_commrec *cr, gmx_large_int_t step)
+{
+#ifdef GMX_MPI
+    gmx_pme_comm_n_box_t cnb;
+
+    /* Only let one PP node signal each PME node */
+    if (cr->dd->pme_receive_vir_ener)
+    {
+        cnb.flags = PP_PME_RESETCOUNTERS;
+        cnb.step  = step;
+
+        /* We send this, uncommon, message blocking to simplify the code */
+        MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
+                 cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim);
+    }
+#endif
+}
+
 int gmx_pme_recv_q_x(struct gmx_pme_pp *pme_pp,
+                     int *natoms,
                      real **chargeA, real **chargeB,
                      matrix box, rvec **x, rvec **f,
                      int *maxshift_x, int *maxshift_y,
@@ -336,20 +358,29 @@ int gmx_pme_recv_q_x(struct gmx_pme_pp *pme_pp,
 
         if (debug)
         {
-            fprintf(debug, "PME only node receiving:%s%s%s%s\n",
-                    (cnb.flags & PP_PME_CHARGE) ? " charges" : "",
-                    (cnb.flags & PP_PME_COORD ) ? " coordinates" : "",
-                    (cnb.flags & PP_PME_FINISH) ? " finish" : "",
-                    (cnb.flags & PP_PME_SWITCH) ? " switch" : "");
+            fprintf(debug, "PME only node receiving:%s%s%s%s%s\n",
+                    (cnb.flags & PP_PME_CHARGE)        ? " charges" : "",
+                    (cnb.flags & PP_PME_COORD )        ? " coordinates" : "",
+                    (cnb.flags & PP_PME_FINISH)        ? " finish" : "",
+                    (cnb.flags & PP_PME_SWITCHGRID)    ? " switch grid" : "",
+                    (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
         }
 
-        if (cnb.flags & PP_PME_SWITCH)
+        if (cnb.flags & PP_PME_SWITCHGRID)
         {
             /* Special case, receive the new parameters and return */
             copy_ivec(cnb.grid_size, grid_size);
             *ewaldcoeff = cnb.ewaldcoeff;
 
-            return -2;
+            return pmerecvqxSWITCHGRID;
+        }
+
+        if (cnb.flags & PP_PME_RESETCOUNTERS)
+        {
+            /* Special case, receive the step and return */
+            *step = cnb.step;
+
+            return pmerecvqxRESETCOUNTERS;
         }
 
         if (cnb.flags & PP_PME_CHARGE)
@@ -481,13 +512,13 @@ int gmx_pme_recv_q_x(struct gmx_pme_pp *pme_pp,
     *step = cnb.step;
 #endif
 
+    *natoms  = nat;
     *chargeA = pme_pp->chargeA;
     *chargeB = pme_pp->chargeB;
     *x       = pme_pp->x;
     *f       = pme_pp->f;
 
-
-    return ((cnb.flags & PP_PME_FINISH) ? -1 : nat);
+    return ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
 }
 
 static void receive_virial_energy(t_commrec *cr,