1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
45 #include "gmx_fatal.h"
62 typedef struct gmx_wallcycle
65 /* variables for testing/debugging */
70 gmx_cycles_t cycle_prev;
71 gmx_large_int_t reset_counters;
73 MPI_Comm mpi_comm_mygroup;
78 /* Each name should not exceed 19 characters */
79 static const char *wcn[ewcNR] =
80 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Add rot. forces", "Test" };
82 gmx_bool wallcycle_have_counter(void)
84 return gmx_cycles_have_counter();
87 gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr, int omp_nthreads)
92 if (!wallcycle_have_counter())
99 wc->wc_barrier = FALSE;
103 wc->reset_counters = resetstep;
104 wc->omp_nthreads = omp_nthreads;
107 if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
111 fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
113 wc->wc_barrier = TRUE;
114 wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
119 if (getenv("GMX_CYCLE_ALL") != NULL)
121 /*#ifndef GMX_THREAD_MPI*/
124 fprintf(fplog,"\nWill time all the code during the run\n\n");
126 snew(wc->wcc_all,ewcNR*ewcNR);
128 gmx_fatal(FARGS, "GMX_CYCLE_ALL is incompatible with threaded code");
135 void wallcycle_destroy(gmx_wallcycle_t wc)
146 if (wc->wcc_all != NULL)
153 static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
156 wc->cycle_prev = cycle;
159 static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
161 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
162 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
165 void wallcycle_start(gmx_wallcycle_t wc, int ewc)
177 MPI_Barrier(wc->mpi_comm_mygroup);
181 cycle = gmx_cycles_read();
182 wc->wcc[ewc].start = cycle;
183 if (wc->wcc_all != NULL)
188 wallcycle_all_start(wc,ewc,cycle);
190 else if (wc->wc_depth == 3)
192 wallcycle_all_stop(wc,ewc,cycle);
197 double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
199 gmx_cycles_t cycle,last;
209 MPI_Barrier(wc->mpi_comm_mygroup);
213 cycle = gmx_cycles_read();
214 last = cycle - wc->wcc[ewc].start;
215 wc->wcc[ewc].c += last;
222 wallcycle_all_stop(wc,ewc,cycle);
224 else if (wc->wc_depth == 2)
226 wallcycle_all_start(wc,ewc,cycle);
233 void wallcycle_reset_all(gmx_wallcycle_t wc)
242 for(i=0; i<ewcNR; i++)
246 wc->wcc[i].start = 0;
251 static gmx_bool pme_subdivision(int ewc)
253 return (ewc >= ewcPME_REDISTXF && ewc <= ewcPME_SOLVE);
256 void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc,double cycles[])
259 double cycles_n[ewcNR],buf[ewcNR],*cyc_all,*buf_all;
269 if (wc->omp_nthreads>1)
271 for(i=0; i<ewcNR; i++)
273 if (pme_subdivision(i) || i==ewcPMEMESH || (i==ewcRUN && cr->duty == DUTY_PME))
275 wcc[i].c *= wc->omp_nthreads;
280 if (wcc[ewcDDCOMMLOAD].n > 0)
282 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMLOAD].c;
284 if (wcc[ewcDDCOMMBOUND].n > 0)
286 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMBOUND].c;
288 if (wcc[ewcPME_FFTCOMM].n > 0)
290 wcc[ewcPME_FFT].c -= wcc[ewcPME_FFTCOMM].c;
293 if (cr->npmenodes == 0)
295 /* All nodes do PME (or no PME at all) */
296 if (wcc[ewcPMEMESH].n > 0)
298 wcc[ewcFORCE].c -= wcc[ewcPMEMESH].c;
303 /* The are PME-only nodes */
304 if (wcc[ewcPMEMESH].n > 0)
306 /* This must be a PME only node, calculate the Wait + Comm. time */
307 wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
311 /* Store the cycles in a double buffer for summing */
312 for(i=0; i<ewcNR; i++)
314 cycles_n[i] = (double)wcc[i].n;
315 cycles[i] = (double)wcc[i].c;
321 MPI_Allreduce(cycles_n,buf,ewcNR,MPI_DOUBLE,MPI_MAX,
323 for(i=0; i<ewcNR; i++)
325 wcc[i].n = (int)(buf[i] + 0.5);
327 MPI_Allreduce(cycles,buf,ewcNR,MPI_DOUBLE,MPI_SUM,
329 for(i=0; i<ewcNR; i++)
334 if (wc->wcc_all != NULL)
336 snew(cyc_all,ewcNR*ewcNR);
337 snew(buf_all,ewcNR*ewcNR);
338 for(i=0; i<ewcNR*ewcNR; i++)
340 cyc_all[i] = wc->wcc_all[i].c;
342 MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
344 for(i=0; i<ewcNR*ewcNR; i++)
346 wc->wcc_all[i].c = buf_all[i];
355 static void print_cycles(FILE *fplog, double c2t, const char *name, int nnodes,
356 int n, double c, double tot)
364 sprintf(num,"%10d",n);
370 fprintf(fplog," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
371 name,nnodes,num,c*1e-9,c*c2t,100*c/tot);
376 void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
377 gmx_wallcycle_t wc, double cycles[])
382 const char *myline = "-----------------------------------------------------------------------";
398 tot = cycles[ewcRUN];
399 /* PME part has to be multiplied with number of threads */
402 tot += cycles[ewcPMEMESH]*(wc->omp_nthreads-1);
404 /* Conversion factor from cycles to seconds */
407 c2t = (npp+npme*wc->omp_nthreads)*realtime/tot;
414 fprintf(fplog,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
416 fprintf(fplog," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
417 fprintf(fplog,"%s\n",myline);
419 for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
421 if (!pme_subdivision(i))
423 print_cycles(fplog,c2t,wcn[i],
424 (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
425 wc->wcc[i].n,cycles[i],tot);
429 if (wc->wcc_all != NULL)
431 for(i=0; i<ewcNR; i++)
433 for(j=0; j<ewcNR; j++)
435 sprintf(buf,"%-9s",wcn[i]);
437 sprintf(buf+10,"%-9s",wcn[j]);
439 print_cycles(fplog,c2t,buf,
440 (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
441 wc->wcc_all[i*ewcNR+j].n,
442 wc->wcc_all[i*ewcNR+j].c,
447 print_cycles(fplog,c2t,"Rest",npp,0,tot-sum,tot);
448 fprintf(fplog,"%s\n",myline);
449 print_cycles(fplog,c2t,"Total",nnodes,0,tot,tot);
450 fprintf(fplog,"%s\n",myline);
452 if (wc->wcc[ewcPMEMESH].n > 0)
454 fprintf(fplog,"%s\n",myline);
455 for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
457 if (pme_subdivision(i))
459 print_cycles(fplog,c2t,wcn[i],
460 (i>=ewcPMEMESH || i<=ewcPME_SOLVE) ? npme : npp,
461 wc->wcc[i].n,cycles[i],tot);
464 fprintf(fplog,"%s\n",myline);
467 if (cycles[ewcMoveE] > tot*0.05)
470 "NOTE: %d %% of the run time was spent communicating energies,\n"
471 " you might want to use the -gcom option of mdrun\n",
472 (int)(100*cycles[ewcMoveE]/tot+0.5));
475 fprintf(fplog,"\n%s\n",buf);
477 /* Only the sim master calls this function, so always print to stderr */
478 fprintf(stderr,"\n%s\n",buf);
482 extern gmx_large_int_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
489 return wc->reset_counters;
492 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_large_int_t reset_counters)
497 wc->reset_counters = reset_counters;