2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
39 /* the profiling functions */
41 #ifdef HAVE_TMPI_CONFIG_H
42 #include "tmpi_config.h"
57 #if !(defined( _WIN32 ) || defined( _WIN64 ) )
60 /* windows doesn't do standard C */
61 #define snprintf sprintf_s
71 int tMPI_Profile_started = 0;
74 /* this must match the tmpi_functions enum: */
75 const char *tmpi_function_names[] =
106 /* this must match the tmpi_wait_functions enum: */
107 const char *tmpi_waitfn_names[] =
119 /* we intentionally only do the ifdef here; this supresses warnings at the link
120 stage about empty object files */
123 void tMPI_Profile_init(struct tmpi_profile *prof)
128 for (i = 0; i < TMPIFN_Nfunctions; i++)
130 prof->mpifn_calls[i] = 0;
132 #ifdef TMPI_CYCLE_COUNT
133 for (i = 0; i < TMPIFN_Nfunctions; i++)
135 prof->mpifn_cycles[i] = 0;
137 for (i = 0; i < TMPIWAIT_N; i++)
139 prof->wait_cycles[i] = 0;
141 prof->global_start = tmpi_cycles_read();
142 prof->global_stop = 0;
143 prof->wait_start = 0;
146 prof->buffered_p2p_xfers = 0;
147 prof->buffered_coll_xfers = 0;
148 prof->total_p2p_xfers = 0;
149 prof->total_coll_xfers = 0;
150 tMPI_Profile_started = 1;
155 void tMPI_Profile_destroy(struct tmpi_profile *prof)
162 void tMPI_Profile_stop(struct tmpi_profile *prof)
164 #ifdef TMPI_CYCLE_COUNT
165 prof->global_stop = tmpi_cycles_read();
167 tMPI_Profile_started = 0;
170 /* output functions */
171 void tMPI_Profiles_summarize(int Nthreads, struct tmpi_thread *threads)
175 printf("\nTMPI PROFILE:\n");
178 for (j = 0; j < Nthreads; j++)
181 snprintf(thrn, sizeof(thrn), "Thread %d", j);
182 printf(" %10s", thrn);
185 printf(" %10s\n", "Total");
189 for (i = 0; i < len; i++)
195 for (i = 0; i < TMPIFN_Nfunctions; i++)
197 long unsigned int total = 0;
199 printf("%11s", tmpi_function_names[i]);
200 for (j = 0; j < Nthreads; j++)
202 long unsigned int count = threads[j].profile.mpifn_calls[i];
205 printf(" %10ld", (long)count);
207 printf(" %10ld\n", (long)total);
210 printf("\nFraction of buffered transfers:\n");
212 long unsigned int tot_buf = 0;
213 long unsigned int tot_count = 0;
214 printf("%11s", "P2p");
215 for (j = 0; j < Nthreads; j++)
217 long unsigned int buf = threads[j].profile.buffered_p2p_xfers;
218 long unsigned int count = threads[j].profile.total_p2p_xfers;
223 printf(" %10.5f", (double)buf/(double)count);
225 printf(" %10.5f\n", (double)tot_buf/(double)tot_count);
229 printf("%11s", "Collective");
230 for (j = 0; j < Nthreads; j++)
232 long unsigned int buf = threads[j].profile.buffered_coll_xfers;
233 long unsigned int count = threads[j].profile.total_coll_xfers;
238 printf(" %10.5f", (double)buf/(double)count);
240 printf(" %10.5f\n", (double)tot_buf/(double)tot_count);
244 #ifdef TMPI_CYCLE_COUNT
245 printf("\nCall times as fraction of total run time:\n");
246 for (j = 0; j < Nthreads; j++)
248 threads[j].profile.totals = 0.;
250 for (i = 0; i < TMPIFN_Nfunctions; i++)
252 double tot_time = 0.;
253 double tot_diff = 0.;
255 printf("%11s", tmpi_function_names[i]);
256 for (j = 0; j < Nthreads; j++)
258 double time = (double)(threads[j].profile.global_stop -
259 threads[j].profile.global_start );
260 double diff = ((double)threads[j].profile.mpifn_cycles[i]);
263 threads[j].profile.totals += diff;
264 printf(" %10.5f", diff/time);
266 printf(" %10.5f\n", tot_diff/tot_time);
269 double tot_time = 0.;
270 double tot_diff = 0.;
272 printf("%11s", "Total");
273 for (j = 0; j < Nthreads; j++)
275 double time = (double)(threads[j].profile.global_stop -
276 threads[j].profile.global_start );
277 double diff = threads[j].profile.totals;
281 printf(" %10.5f", diff/time );
283 printf(" %10.5f\n", tot_diff/tot_time);
287 printf("\nWait times as fraction of total run time:\n");
288 for (j = 0; j < Nthreads; j++)
290 threads[j].profile.totals = 0.;
293 for (i = 0; i < TMPIWAIT_N; i++)
295 double tot_time = 0.;
296 double tot_diff = 0.;
298 printf("%11s", tmpi_waitfn_names[i]);
299 for (j = 0; j < Nthreads; j++)
301 double time = (double)(threads[j].profile.global_stop -
302 threads[j].profile.global_start );
303 double diff = ((double)threads[j].profile.wait_cycles[i]);
306 threads[j].profile.totals += diff;
307 printf(" %10.5f", diff/time);
309 printf(" %10.5f\n", tot_diff/tot_time);
313 double tot_time = 0.;
314 double tot_diff = 0.;
316 printf("%11s", "Total");
317 for (j = 0; j < Nthreads; j++)
319 double time = (double)(threads[j].profile.global_stop -
320 threads[j].profile.global_start );
321 double diff = threads[j].profile.totals;
325 printf(" %10.5f", diff/time );
327 printf(" %10.5f\n", tot_diff/tot_time);
331 for (i = 0; i < len; i++)
337 /* here we make use of the fact that this is how we calculate tMPI_Wtime */
339 double wt = tMPI_Wtime();
340 double wtck = tMPI_Wtick();
341 printf("\nTotal run time: %g +/- %g s.\n", wt, wtck);