Cleaned up run time measurement
authorMark Abraham <mark.j.abraham@gmail.com>
Wed, 25 Sep 2013 22:25:20 +0000 (00:25 +0200)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Sat, 26 Oct 2013 05:44:58 +0000 (07:44 +0200)
Removed all use of clock(), in favour of using clock_gettime for
per-node timer and (where available) per-thread timer. This is better
than using gettimeofday(), which is deprecated by POSIX. clock() has
no real advantages, and requires us to keep bumping our own counter if
we use it (in particular, on any remaining 32-bit
machines). Eliminated related dead code.

Cleaned up struct gmx_runtime_t and related code by
* renaming variables more descriptively
* removing variables that should be local variables in a function
* eliminating variables associated with using clock()

The behaviour of print_perf is preserved (tested real MPI, thread MPI;
serial looks like it should work also); namely that the "(%)" should
be a crude reflection of machine load.

Change-Id: Icdf506150045bee88cf04200b25cd010a9bb5419

CMakeLists.txt
src/gromacs/gmxlib/nrnb.c
src/gromacs/legacyheaders/sim_util.h
src/gromacs/mdlib/expanded.c
src/gromacs/mdlib/sim_util.c
src/programs/mdrun/md.c
src/programs/mdrun/runner.c

index 0ee52bf377d1463ed9e66621b34c528ffac2a4b5..be9a1f04f4c8cd96a446527003ea4e08f51e797a 100644 (file)
@@ -335,6 +335,7 @@ include(CheckIncludeFileCXX)
 check_include_files(unistd.h     HAVE_UNISTD_H)
 check_include_files(pwd.h        HAVE_PWD_H)
 check_include_files(dirent.h     HAVE_DIRENT_H)
+check_include_files(time.h       HAVE_TIME_H)
 check_include_files(sys/time.h   HAVE_SYS_TIME_H)
 check_include_files(io.h         HAVE_IO_H)
 check_include_files(sched.h      HAVE_SCHED_H)
@@ -363,6 +364,7 @@ check_function_exists(sqrtf             HAVE_SQRTF)
 
 include(CheckLibraryExists)
 check_library_exists(m sqrt "" HAVE_LIBM)
+check_library_exists(rt clock_gettime "" HAVE_LIBRT)
 
 
 include(CheckTypeSize)
@@ -1058,6 +1060,11 @@ if(GMX_LOAD_PLUGINS)
 endif(GMX_LOAD_PLUGINS)
 set(VMD_QUIETLY TRUE CACHE INTERNAL "")
 
+# Link real-time library for POSIX timers
+if(HAVE_TIME_H AND HAVE_UNISTD_H AND HAVE_LIBRT)
+    list(APPEND GMX_EXTRA_LIBRARIES rt)
+endif()
+
 # Math and thread libraries must often come after all others when linking...
 if(HAVE_LIBM)
     list(APPEND GMX_EXTRA_LIBRARIES m)
index fa8dd0886bcf845c89cbdc2decde0501902dd0e4..9c4917e821c80e075bf81ed4d93eb13c5b2a066b 100644 (file)
@@ -383,7 +383,7 @@ void print_flop(FILE *out, t_nrnb *nrnb, double *nbfs, double *mflop)
     }
 }
 
-void print_perf(FILE *out, double nodetime, double realtime,
+void print_perf(FILE *out, double time_per_thread, double time_per_node,
                 gmx_large_int_t nsteps, real delta_t,
                 double nbfs, double mflop)
 {
@@ -391,20 +391,20 @@ void print_perf(FILE *out, double nodetime, double realtime,
 
     fprintf(out, "\n");
 
-    if (realtime > 0)
+    if (time_per_node > 0)
     {
         fprintf(out, "%12s %12s %12s %10s\n", "", "Core t (s)", "Wall t (s)", "(%)");
         fprintf(out, "%12s %12.3f %12.3f %10.1f\n", "Time:",
-                nodetime, realtime, 100.0*nodetime/realtime);
-        /* only print day-hour-sec format if realtime is more than 30 min */
-        if (realtime > 30*60)
+                time_per_thread, time_per_node, 100.0*time_per_thread/time_per_node);
+        /* only print day-hour-sec format if time_per_node is more than 30 min */
+        if (time_per_node > 30*60)
         {
             fprintf(out, "%12s %12s", "", "");
-            pr_difftime(out, realtime);
+            pr_difftime(out, time_per_node);
         }
         if (delta_t > 0)
         {
-            mflop   = mflop/realtime;
+            mflop   = mflop/time_per_node;
             runtime = nsteps*delta_t;
 
             if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
@@ -412,7 +412,7 @@ void print_perf(FILE *out, double nodetime, double realtime,
                 fprintf(out, "%12s %12s %12s\n",
                         "", "(ns/day)", "(hour/ns)");
                 fprintf(out, "%12s %12.3f %12.3f\n", "Performance:",
-                        runtime*24*3.6/realtime, 1000*realtime/(3600*runtime));
+                        runtime*24*3.6/time_per_node, 1000*time_per_node/(3600*runtime));
             }
             else
             {
@@ -420,8 +420,8 @@ void print_perf(FILE *out, double nodetime, double realtime,
                         "", "(Mnbf/s)", (mflop > 1000) ? "(GFlops)" : "(MFlops)",
                         "(ns/day)", "(hour/ns)");
                 fprintf(out, "%12s %12.3f %12.3f %12.3f %12.3f\n", "Performance:",
-                        nbfs/realtime, (mflop > 1000) ? (mflop/1000) : mflop,
-                        runtime*24*3.6/realtime, 1000*realtime/(3600*runtime));
+                        nbfs/time_per_node, (mflop > 1000) ? (mflop/1000) : mflop,
+                        runtime*24*3.6/time_per_node, 1000*time_per_node/(3600*runtime));
             }
         }
         else
@@ -431,7 +431,7 @@ void print_perf(FILE *out, double nodetime, double realtime,
                 fprintf(out, "%12s %14s\n",
                         "", "(steps/hour)");
                 fprintf(out, "%12s %14.1f\n", "Performance:",
-                        nsteps*3600.0/realtime);
+                        nsteps*3600.0/time_per_node);
             }
             else
             {
@@ -439,8 +439,8 @@ void print_perf(FILE *out, double nodetime, double realtime,
                         "", "(Mnbf/s)", (mflop > 1000) ? "(GFlops)" : "(MFlops)",
                         "(steps/hour)");
                 fprintf(out, "%12s %12.3f %12.3f %14.1f\n", "Performance:",
-                        nbfs/realtime, (mflop > 1000) ? (mflop/1000) : mflop,
-                        nsteps*3600.0/realtime);
+                        nbfs/time_per_node, (mflop > 1000) ? (mflop/1000) : mflop,
+                        nsteps*3600.0/time_per_node);
             }
         }
     }
index 8e51d1bb8c3df9ace21380d88f97199cc15a42f6..c38c2f183a9f85a51a41753bf54f1353cc58bf07 100644 (file)
@@ -66,21 +66,11 @@ typedef struct gmx_global_stat *gmx_global_stat_t;
 
 /*! /brief Manages measuring wall clock times for simulations */
 typedef struct {
-    double          real;     //!< Seconds since the epoch recorded at the start of the simulation
-    // TODO rename real
-#ifdef GMX_CRAY_XT3
-    double          proc;     //!< Seconds since the start start of the simulation
-#else
-    clock_t         proc;     //!< Seconds since the start start of the simulation
-#endif
-    double          realtime; //!< Total seconds elapsed over the simulation
-    // TODO rename realtime
-    double          proctime; //!< Total seconds elapsed over the simulation
-    // TODO eliminate use of proc and proctime - also eliminates
-    // dependency on clock() and dclock()
-    double          time_per_step; //!< Local variable TODO kill this
-    double          last;          //!< Local variable TODO kill this
-    gmx_large_int_t nsteps_done;   //!< Used by integrators to report the amount of work they did
+    double          start_time_stamp;            //!< Seconds since the epoch recorded at the start of the simulation
+    double          start_time_stamp_per_thread; //!< Seconds since the epoch recorded at the start of the simulation for this thread
+    double          elapsed_run_time;            //!< Total seconds elapsed over the simulation
+    double          elapsed_run_time_per_thread; //!< Total seconds elapsed over the simulation running this thread
+    gmx_large_int_t nsteps_done;                 //!< Used by integrators to report the amount of work they did
 } gmx_runtime_t;
 
 
@@ -155,10 +145,7 @@ void runtime_start(gmx_runtime_t *runtime);
 
 void runtime_end(gmx_runtime_t *runtime);
 
-void runtime_upd_proc(gmx_runtime_t *runtime);
-/* The processor time should be updated every once in a while,
- * since on 32-bit manchines it loops after 72 minutes.
- */
+double runtime_get_elapsed_time(gmx_runtime_t *runtime);
 
 void print_date_and_time(FILE *log, int pid, const char *title,
                          const gmx_runtime_t *runtime);
index 7c86774c1c35b4ed08c63afd78b4ba095abc351b..91465b0963f47c29e23cd3c27275233fe3c26594 100644 (file)
 #include <config.h>
 #endif
 
-#ifdef GMX_CRAY_XT3
-#include <catamount/dclock.h>
-#endif
-
-
 #include <stdio.h>
-#include <time.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
 #include <math.h>
 #include "typedefs.h"
 #include "string2.h"
index 2b35fb6f9a488cd5f6571ee556a830d6af5c7ec3..03347e3831c14dbfcdcd72b74550531df1115d78 100644 (file)
 #include <config.h>
 #endif
 
-#ifdef GMX_CRAY_XT3
-#include <catamount/dclock.h>
-#endif
-
-
 #include <stdio.h>
 #include <time.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
 #endif
@@ -59,7 +57,6 @@
 #include "pbc.h"
 #include "chargegroup.h"
 #include "vec.h"
-#include <time.h>
 #include "nrnb.h"
 #include "mshift.h"
 #include "mdrun.h"
 double
 gmx_gettime()
 {
-#ifdef HAVE_GETTIMEOFDAY
+#if _POSIX_TIMERS > 0
+    /* Mac and Windows do not support this */
+    struct timespec t;
+    double          seconds;
+
+    clock_gettime(CLOCK_REALTIME, &t);
+    seconds = (double) t.tv_sec + 1e-9*(double)t.tv_nsec;
+    return seconds;
+#elif defined HAVE_GETTIMEOFDAY
     struct timeval t;
     double         seconds;
 
-    // TODO later: gettimeofday() is deprecated by POSIX. We could use
-    // clock_gettime in POSIX (which also offers nanosecond resolution
-    // if the hardware supports it), but that requires linking with
-    // -lrt. Maybe a better option will come along before we have to
-    // really change from gettimeofday().
     gettimeofday(&t, NULL);
-
     seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 
     return seconds;
@@ -127,15 +126,27 @@ gmx_gettime()
 #endif
 }
 
-// TODO Remove this. gmx_gettime returns double, so this is now useless
-#define difftime(end, start) ((double)(end)-(double)(start))
+double
+gmx_gettime_per_thread()
+{
+#if _POSIX_THREAD_CPUTIME > 0
+    struct timespec t;
+    double          seconds;
+
+    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t);
+    seconds = (double) t.tv_sec + 1e-9*(double)t.tv_nsec;
+    return seconds;
+#else
+    return gmx_gettime();
+#endif
+}
 
 void print_time(FILE *out, gmx_runtime_t *runtime, gmx_large_int_t step,
                 t_inputrec *ir, t_commrec gmx_unused *cr)
 {
     time_t finish;
     char   timebuf[STRLEN];
-    double dt;
+    double dt, time_per_step;
     char   buf[48];
 
 #ifndef GMX_THREAD_MPI
@@ -147,17 +158,17 @@ void print_time(FILE *out, gmx_runtime_t *runtime, gmx_large_int_t step,
     fprintf(out, "step %s", gmx_step_str(step, buf));
     if ((step >= ir->nstlist))
     {
-        runtime->last          = gmx_gettime();
-        dt                     = difftime(runtime->last, runtime->real);
-        runtime->time_per_step = dt/(step - ir->init_step + 1);
+        double seconds_since_epoch = gmx_gettime();
+        dt            = seconds_since_epoch - runtime->start_time_stamp;
+        time_per_step = dt/(step - ir->init_step + 1);
 
-        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
+        dt = (ir->nsteps + ir->init_step - step) * time_per_step;
 
         if (ir->nsteps >= 0)
         {
             if (dt >= 300)
             {
-                finish = (time_t) (runtime->last + dt);
+                finish = (time_t) (seconds_since_epoch + dt);
                 gmx_ctime_r(&finish, timebuf, STRLEN);
                 sprintf(buf, "%s", timebuf);
                 buf[strlen(buf)-1] = '\0';
@@ -171,7 +182,7 @@ void print_time(FILE *out, gmx_runtime_t *runtime, gmx_large_int_t step,
         else
         {
             fprintf(out, " performance: %.1f ns/day    ",
-                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
+                    ir->delta_t/1000*24*60*60/time_per_step);
         }
     }
 #ifndef GMX_THREAD_MPI
@@ -184,63 +195,27 @@ void print_time(FILE *out, gmx_runtime_t *runtime, gmx_large_int_t step,
     fflush(out);
 }
 
-// TODO eliminate this
-#ifdef NO_CLOCK
-#define clock() -1
-#endif
-
-static double set_proctime(gmx_runtime_t *runtime)
-{
-    double diff;
-#ifdef GMX_CRAY_XT3
-    double prev;
-
-    prev          = runtime->proc;
-    runtime->proc = dclock();
-
-    diff = runtime->proc - prev;
-#else
-    clock_t prev;
-
-    prev          = runtime->proc;
-    runtime->proc = clock();
-
-    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
-#endif
-    if (diff < 0)
-    {
-        /* The counter has probably looped, ignore this data */
-        diff = 0;
-    }
-
-    return diff;
-}
-
 void runtime_start(gmx_runtime_t *runtime)
 {
-    runtime->real          = gmx_gettime();
-    runtime->proc          = 0;
-    set_proctime(runtime);
-    runtime->realtime      = 0;
-    runtime->proctime      = 0;
-    runtime->last          = 0;
-    runtime->time_per_step = 0;
+    runtime->start_time_stamp            = gmx_gettime();
+    runtime->start_time_stamp_per_thread = gmx_gettime_per_thread();
+    runtime->elapsed_run_time            = 0;
 }
 
 void runtime_end(gmx_runtime_t *runtime)
 {
-    double now;
+    double now, now_per_thread;
 
-    now = gmx_gettime();
+    now            = gmx_gettime();
+    now_per_thread = gmx_gettime_per_thread();
 
-    runtime->proctime += set_proctime(runtime);
-    runtime->realtime  = now - runtime->real;
-    runtime->real      = now;
+    runtime->elapsed_run_time            = now - runtime->start_time_stamp;
+    runtime->elapsed_run_time_per_thread = now_per_thread - runtime->start_time_stamp_per_thread;
 }
 
-void runtime_upd_proc(gmx_runtime_t *runtime)
+double runtime_get_elapsed_time(gmx_runtime_t *runtime)
 {
-    runtime->proctime += set_proctime(runtime);
+    return gmx_gettime() - runtime->start_time_stamp;
 }
 
 void print_date_and_time(FILE *fplog, int nodeid, const char *title,
@@ -255,7 +230,7 @@ void print_date_and_time(FILE *fplog, int nodeid, const char *title,
     {
         if (runtime != NULL)
         {
-            tmptime = (time_t) runtime->real;
+            tmptime = (time_t) runtime->start_time_stamp;
             gmx_ctime_r(&tmptime, timebuf, STRLEN);
         }
         else
@@ -599,7 +574,7 @@ static void do_nb_verlet(t_forcerec *fr,
     int                        nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
     char                      *env;
     nonbonded_verlet_group_t  *nbvg;
-    gmx_bool                  bCUDA;
+    gmx_bool                   bCUDA;
 
     if (!(flags & GMX_FORCE_NONBONDED))
     {
@@ -2476,7 +2451,8 @@ void finish_run(FILE *fplog, t_commrec *cr,
     t_nrnb *nrnb_tot = NULL;
     real    delta_t;
     double  nbfs, mflop;
-
+    double  elapsed_run_time_over_all_ranks            = 0;
+    double  elapsed_run_time_per_thread_over_all_ranks = 0;
     wallcycle_sum(cr, wcycle);
 
     if (cr->nnodes > 1)
@@ -2492,16 +2468,27 @@ void finish_run(FILE *fplog, t_commrec *cr,
         nrnb_tot = nrnb;
     }
 
-#if defined(GMX_MPI) && !defined(GMX_THREAD_MPI)
+#ifdef GMX_MPI
     if (cr->nnodes > 1)
     {
-        /* reduce nodetime over all MPI processes in the current simulation */
-        double sum;
-        MPI_Allreduce(&runtime->proctime, &sum, 1, MPI_DOUBLE, MPI_SUM,
+        /* reduce elapsed_run_time over all MPI ranks in the current simulation */
+        MPI_Allreduce(&runtime->elapsed_run_time,
+                      &elapsed_run_time_over_all_ranks,
+                      1, MPI_DOUBLE, MPI_SUM,
+                      cr->mpi_comm_mysim);
+        elapsed_run_time_over_all_ranks /= cr->nnodes;
+        /* reduce elapsed_run_time_per_thread over all MPI ranks in the current simulation */
+        MPI_Allreduce(&runtime->elapsed_run_time_per_thread,
+                      &elapsed_run_time_per_thread_over_all_ranks,
+                      1, MPI_DOUBLE, MPI_SUM,
                       cr->mpi_comm_mysim);
-        runtime->proctime = sum;
     }
+    else
 #endif
+    {
+        elapsed_run_time_over_all_ranks            = runtime->elapsed_run_time;
+        elapsed_run_time_per_thread_over_all_ranks = runtime->elapsed_run_time_per_thread;
+    }
 
     if (SIMMASTER(cr))
     {
@@ -2546,7 +2533,7 @@ void finish_run(FILE *fplog, t_commrec *cr,
 
     if (SIMMASTER(cr))
     {
-        wallcycle_print(fplog, cr->nnodes, cr->npmenodes, runtime->realtime,
+        wallcycle_print(fplog, cr->nnodes, cr->npmenodes, runtime->elapsed_run_time,
                         wcycle, gputimes);
 
         if (EI_DYNAMICS(inputrec->eI))
@@ -2560,12 +2547,14 @@ void finish_run(FILE *fplog, t_commrec *cr,
 
         if (fplog)
         {
-            print_perf(fplog, runtime->proctime, runtime->realtime,
+            print_perf(fplog, elapsed_run_time_per_thread_over_all_ranks,
+                       elapsed_run_time_over_all_ranks,
                        runtime->nsteps_done, delta_t, nbfs, mflop);
         }
         if (bWriteStat)
         {
-            print_perf(stderr, runtime->proctime, runtime->realtime,
+            print_perf(stderr, elapsed_run_time_per_thread_over_all_ranks,
+                       elapsed_run_time_over_all_ranks,
                        runtime->nsteps_done, delta_t, nbfs, mflop);
         }
     }
index 3cea354ac8b28a5d241a20d78e7dade95410851a..86a0dc5176a5d44e55a1870c7dac4426bbde1caa 100644 (file)
@@ -146,7 +146,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
 {
     gmx_mdoutf_t   *outf;
     gmx_large_int_t step, step_rel;
-    double          run_time;
+    double          elapsed_run_time;
     double          t, t0, lam0[efptNR];
     gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEner;
     gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
@@ -1337,8 +1337,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
             copy_mat(state->fvir_prev, force_vir);
         }
 
-        /* Determine the wallclock run time up till now */
-        run_time = gmx_gettime() - (double)runtime->real;
+        elapsed_run_time = runtime_get_elapsed_time(runtime);
 
         /* Check whether everything is still allright */
         if (((int)gmx_get_stop_condition() > handled_stop_condition)
@@ -1375,7 +1374,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
             handled_stop_condition = (int)gmx_get_stop_condition();
         }
         else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
-                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+                 (max_hours > 0 && elapsed_run_time > max_hours*60.0*60.0*0.99) &&
                  gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
         {
             /* Signal to terminate the run */
@@ -1388,7 +1387,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
         }
 
         if (bResetCountersHalfMaxH && MASTER(cr) &&
-            run_time > max_hours*60.0*60.0*0.495)
+            elapsed_run_time > max_hours*60.0*60.0*0.495)
         {
             gs.sig[eglsRESETCOUNTERS] = 1;
         }
@@ -1425,7 +1424,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
         if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
                            cpt_period >= 0 &&
                            (cpt_period == 0 ||
-                            run_time >= nchkpt*cpt_period*60.0)) &&
+                            elapsed_run_time >= nchkpt*cpt_period*60.0)) &&
             gs.set[eglsCHKPT] == 0)
         {
             gs.sig[eglsCHKPT] = 1;
@@ -1731,12 +1730,6 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
         }
         /* #########  END PREPARING EDR OUTPUT  ###########  */
 
-        /* Time for performance */
-        if (((step % stepout) == 0) || bLastStep)
-        {
-            runtime_upd_proc(runtime);
-        }
-
         /* Output stuff */
         if (MASTER(cr))
         {
@@ -1933,7 +1926,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
                 gmx_pme_send_resetcounters(cr, step);
             }
             /* Correct max_hours for the elapsed time */
-            max_hours                -= run_time/(60.0*60.0);
+            max_hours                -= elapsed_run_time/(60.0*60.0);
             bResetCountersHalfMaxH    = FALSE;
             gs.set[eglsRESETCOUNTERS] = 0;
         }
index e3f6da4d68bac302d5266348251fc675e856ec13..266f018fcd8e319faa1b97959d4dd1c00c401643 100644 (file)
@@ -1603,24 +1603,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
         gmx_pmeonly(*pmedata, cr, nrnb, wcycle, &runtime, ewaldcoeff, inputrec);
     }
 
-    // TODO remove this, nobody reads runtime.real or runtime.proc after
-    // this point - runtime_end() has already been called!
-    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
-    {
-        /* Some timing stats */
-        if (SIMMASTER(cr))
-        {
-            if (runtime.proc == 0)
-            {
-                runtime.proc = runtime.real;
-            }
-        }
-        else
-        {
-            runtime.real = 0;
-        }
-    }
-
     wallcycle_stop(wcycle, ewcRUN);
 
     /* Finish up, write some stuff