2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
39 /* the profiling functions */
41 #ifdef HAVE_TMPI_CONFIG_H
42 #include "tmpi_config.h"
57 #if !(defined( _WIN32 ) || defined( _WIN64 ) )
60 /* windows doesn't do standard C */
61 #define snprintf sprintf_s
71 int tMPI_Profile_started = 0;
74 /* this must match the tmpi_functions enum: */
75 const char *tmpi_function_names[] =
106 /* this must match the tmpi_wait_functions enum: */
107 const char *tmpi_waitfn_names[] =
119 /* we intentionally only do the ifdef here; this supresses warnings at the link
120 stage about empty object files */
123 int tMPI_Profile_init(struct tmpi_profile *prof)
128 for (i = 0; i < TMPIFN_Nfunctions; i++)
130 prof->mpifn_calls[i] = 0;
132 #ifdef TMPI_CYCLE_COUNT
133 for (i = 0; i < TMPIFN_Nfunctions; i++)
135 prof->mpifn_cycles[i] = 0;
137 for (i = 0; i < TMPIWAIT_N; i++)
139 prof->wait_cycles[i] = 0;
141 prof->global_start = tMPI_Cycles_read();
142 prof->global_stop = 0;
143 prof->wait_start = 0;
146 prof->buffered_p2p_xfers = 0;
147 prof->buffered_coll_xfers = 0;
148 prof->total_p2p_xfers = 0;
149 prof->total_coll_xfers = 0;
150 tMPI_Profile_started = 1;
157 void tMPI_Profile_destroy(struct tmpi_profile *prof)
164 void tMPI_Profile_stop(struct tmpi_profile *prof)
166 #ifdef TMPI_CYCLE_COUNT
167 prof->global_stop = tMPI_Cycles_read();
169 tMPI_Profile_started = 0;
172 /* output functions */
173 void tMPI_Profiles_summarize(int Nthreads, struct tmpi_thread *threads)
177 printf("\nTMPI PROFILE:\n");
180 for (j = 0; j < Nthreads; j++)
183 snprintf(thrn, sizeof(thrn), "Thread %d", j);
184 printf(" %10s", thrn);
187 printf(" %10s\n", "Total");
191 for (i = 0; i < len; i++)
197 for (i = 0; i < TMPIFN_Nfunctions; i++)
199 long unsigned int total = 0;
201 printf("%11s", tmpi_function_names[i]);
202 for (j = 0; j < Nthreads; j++)
204 long unsigned int count = threads[j].profile.mpifn_calls[i];
207 printf(" %10ld", (long)count);
209 printf(" %10ld\n", (long)total);
212 printf("\nFraction of buffered transfers:\n");
214 long unsigned int tot_buf = 0;
215 long unsigned int tot_count = 0;
216 printf("%11s", "P2p");
217 for (j = 0; j < Nthreads; j++)
219 long unsigned int buf = threads[j].profile.buffered_p2p_xfers;
220 long unsigned int count = threads[j].profile.total_p2p_xfers;
225 printf(" %10.5f", (double)buf/(double)count);
227 printf(" %10.5f\n", (double)tot_buf/(double)tot_count);
231 printf("%11s", "Collective");
232 for (j = 0; j < Nthreads; j++)
234 long unsigned int buf = threads[j].profile.buffered_coll_xfers;
235 long unsigned int count = threads[j].profile.total_coll_xfers;
240 printf(" %10.5f", (double)buf/(double)count);
242 printf(" %10.5f\n", (double)tot_buf/(double)tot_count);
246 #ifdef TMPI_CYCLE_COUNT
247 printf("\nCall times as fraction of total run time:\n");
248 for (j = 0; j < Nthreads; j++)
250 threads[j].profile.totals = 0.;
252 for (i = 0; i < TMPIFN_Nfunctions; i++)
254 double tot_time = 0.;
255 double tot_diff = 0.;
257 printf("%11s", tmpi_function_names[i]);
258 for (j = 0; j < Nthreads; j++)
260 double time = (double)(threads[j].profile.global_stop -
261 threads[j].profile.global_start );
262 double diff = ((double)threads[j].profile.mpifn_cycles[i]);
265 threads[j].profile.totals += diff;
266 printf(" %10.5f", diff/time);
268 printf(" %10.5f\n", tot_diff/tot_time);
271 double tot_time = 0.;
272 double tot_diff = 0.;
274 printf("%11s", "Total");
275 for (j = 0; j < Nthreads; j++)
277 double time = (double)(threads[j].profile.global_stop -
278 threads[j].profile.global_start );
279 double diff = threads[j].profile.totals;
283 printf(" %10.5f", diff/time );
285 printf(" %10.5f\n", tot_diff/tot_time);
289 printf("\nWait times as fraction of total run time:\n");
290 for (j = 0; j < Nthreads; j++)
292 threads[j].profile.totals = 0.;
295 for (i = 0; i < TMPIWAIT_N; i++)
297 double tot_time = 0.;
298 double tot_diff = 0.;
300 printf("%11s", tmpi_waitfn_names[i]);
301 for (j = 0; j < Nthreads; j++)
303 double time = (double)(threads[j].profile.global_stop -
304 threads[j].profile.global_start );
305 double diff = ((double)threads[j].profile.wait_cycles[i]);
308 threads[j].profile.totals += diff;
309 printf(" %10.5f", diff/time);
311 printf(" %10.5f\n", tot_diff/tot_time);
315 double tot_time = 0.;
316 double tot_diff = 0.;
318 printf("%11s", "Total");
319 for (j = 0; j < Nthreads; j++)
321 double time = (double)(threads[j].profile.global_stop -
322 threads[j].profile.global_start );
323 double diff = threads[j].profile.totals;
327 printf(" %10.5f", diff/time );
329 printf(" %10.5f\n", tot_diff/tot_time);
333 for (i = 0; i < len; i++)
339 /* here we make use of the fact that this is how we calculate tMPI_Wtime */
341 double wt = tMPI_Wtime();
342 double wtck = tMPI_Wtick();
343 printf("\nTotal run time: %g +/- %g s.\n", wt, wtck);