1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2006
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
20 * High-resolution timestamp or CPU clock cycle counters.
22 * After reading the current value with gmx_cycles_read() you can add or
23 * subtract these numbers as normal integers of type gmx_cycles_t.
25 #ifndef GMX_TIMING_CYCLECOUNTER_H
26 #define GMX_TIMING_CYCLECOUNTER_H
29 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
30 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
45 } /* fixes auto-indentation problems */
48 /* Minor implementation note:
50 * I like to use these counters in other programs too, so to avoid making
51 * it dependent on other Gromacs definitions I use the #ifdef's to set
52 * architecture-specific inline macros instead of using gmx_inline from
53 * gmx_types.h /Erik 2005-12-10
56 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
57 (defined(__i386__) || defined(__x86_64__)))
58 /* x86 or x86-64 with GCC inline assembly */
59 typedef unsigned long long
62 #elif defined(_MSC_VER)
67 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
68 /* HP compiler on ia64 */
69 #include <machine/sys/inline.h>
73 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
74 /* Intel compiler on ia64 */
75 #include <ia64intrin.h>
79 #elif defined(__GNUC__) && defined(__ia64__)
80 /* ia64 with GCC inline assembly */
84 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
85 /* HP PA-RISC, inline asm with gcc */
89 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
90 /* HP PA-RISC, instruction when using HP compiler */
91 #include <machine/inline.h>
95 #elif defined(__GNUC__) && defined(__s390__)
96 /* S390, taken from FFTW who got it from James Treacy */
97 typedef unsigned long long
100 #elif defined(__GNUC__) && defined(__alpha__)
101 /* gcc inline assembly on alpha CPUs */
102 typedef unsigned long
105 #elif defined(__GNUC__) && defined(__sparc_v9__)
106 /* gcc inline assembly on sparc v9 */
107 typedef unsigned long
110 #elif defined(__DECC) && defined(__alpha)
111 /* Digital GEM C compiler on alpha */
113 typedef unsigned long
116 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
117 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
118 typedef unsigned long long
121 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
122 /* Solaris high-resolution timers */
126 #elif defined(__xlC__) && defined (_AIX)
128 #include <sys/time.h>
129 #include <sys/systemcfg.h>
130 typedef unsigned long long
133 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
134 ( defined(__powerpc__) || defined(__ppc__) ) )
135 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
136 typedef unsigned long long
139 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
140 /* Metrowerks on macintosh */
141 typedef unsigned long long
144 #elif defined(__sun) && defined(__sparcv9)
146 typedef unsigned long
150 /*! \brief Integer-like datatype for cycle counter values
152 * Depending on your system this will usually be something like long long,
153 * or a special cycle datatype from the system header files. It is NOT
154 * necessarily real processor cycles - many systems count in nanoseconds
155 * or a special external time register at fixed frequency (not the CPU freq.)
157 * You can subtract or add gmx_cycle_t types just as normal integers, and if
158 * you run the calibration routine you can also multiply it with a factor to
159 * translate the cycle data to seconds.
166 /*! \brief Check if high-resolution cycle counters are available
168 * Not all architectures provide any way to read timestep counters
169 * in the CPU, and on some it is broken. Although we refer to it
170 * as cycle counters, it is not necessarily given in units of
173 * If you notice that system is missing, implement support for it,
174 * find out how to detect the system during preprocessing, and send us a
177 * \return 1 if cycle counters are available, 0 if not.
179 * \note This functions not need to be in the header for performance
180 * reasons, but it is very important that we get exactly the
181 * same detection as for gmx_cycles_read() routines. If you
182 * compile the library with one compiler, and then use a different
183 * one when later linking to the library it might happen that the
184 * library supports cyclecounters but not the headers, or vice versa.
186 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
187 (defined(__i386__) || defined(__x86_64__)))
188 static __inline__ int gmx_cycles_have_counter(void)
190 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
193 #elif (defined(_MSC_VER))
194 static __inline int gmx_cycles_have_counter(void)
198 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
199 static inline int gmx_cycles_have_counter(void)
201 /* HP compiler on ia64, use special instruction to read ITC */
204 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
205 static __inline__ int gmx_cycles_have_counter(void)
207 /* Intel compiler on ia64, use special instruction to read ITC */
210 #elif defined(__GNUC__) && defined(__ia64__)
211 static __inline__ int gmx_cycles_have_counter(void)
213 /* AMD64 with GCC inline assembly - TSC register */
216 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
217 static __inline__ int gmx_cycles_have_counter(void)
219 /* HP PA-RISC, inline asm with gcc */
222 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
223 static inline int gmx_cycles_have_counter(void)
225 /* HP PA-RISC, instruction when using HP compiler */
228 #elif defined(__GNUC__) && defined(__s390__)
229 static __inline__ int gmx_cycles_have_counter(void)
231 /* S390, taken from FFTW who got it from James Treacy */
234 #elif defined(__GNUC__) && defined(__alpha__)
235 static __inline__ int gmx_cycles_have_counter(void)
237 /* gcc inline assembly on alpha CPUs */
240 #elif defined(__GNUC__) && defined(__sparc_v9__)
241 static __inline__ int gmx_cycles_have_counter(void)
243 /* gcc inline assembly on sparc v9 */
246 #elif defined(__DECC) && defined(__alpha)
247 static __inline int gmx_cycles_have_counter(void)
249 /* Digital GEM C compiler on alpha */
252 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
253 static __inline int gmx_cycles_have_counter(void)
255 /* Irix compilers on SGI hardware */
258 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
259 static inline int gmx_cycles_have_counter(void)
261 /* Solaris high-resolution timers */
264 #elif defined(__xlC__) && defined (_AIX)
265 static inline int gmx_cycles_have_counter(void)
270 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
271 ( defined(__powerpc__) || defined(__ppc__) ) )
272 static __inline__ int gmx_cycles_have_counter(void)
274 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
277 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
278 static __inline__ int gmx_cycles_have_counter(void)
280 /* Metrowerks on macintosh */
283 #elif defined(__sun) && defined(__sparcv9)
285 static __inline__ int gmx_cycles_have_counter(void)
287 /* Solaris on SPARC*/
291 static int gmx_cycles_have_counter(void)
293 /* No cycle counter that we know of on this system */
298 /*! \brief Read CPU cycle counter
300 * This routine returns an abstract datatype containing a
301 * cycle counter timestamp.
303 * \return Opaque data corresponding to a cycle reading.
305 * Please note that on most systems it takes several cycles
306 * to read and return the cycle counters. If you are measuring
307 * small intervals, you can compensate for this time by calling
308 * the routine twice and calculating what the difference is.
309 * Subtract this from your other measurements to get an accurate result.
311 * Use gmx_cycles_difference() to get a real number corresponding to
312 * the difference between two gmx_cycles_t values returned from this
315 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
316 (defined(__i386__) || defined(__x86_64__)))
317 static __inline__ gmx_cycles_t gmx_cycles_read(void)
319 /* x86 with GCC inline assembly - pentium TSC register */
324 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
326 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
329 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
333 #elif defined(_MSC_VER)
334 static __inline gmx_cycles_t gmx_cycles_read(void)
338 return __rdtscp(&ui);
343 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
344 static inline gmx_cycles_t gmx_cycles_read(void)
346 /* HP compiler on ia64 */
348 ret = _Asm_mov_from_ar (_AREG_ITC);
351 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
352 static __inline__ gmx_cycles_t gmx_cycles_read(void)
354 /* Intel compiler on ia64 */
355 return __getReg(_IA64_REG_AR_ITC);
357 #elif defined(__GNUC__) && defined(__ia64__)
358 static __inline__ gmx_cycles_t gmx_cycles_read(void)
360 /* ia64 with GCC inline assembly */
362 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
365 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
366 static __inline__ gmx_cycles_t gmx_cycles_read(void)
368 /* HP PA-RISC, inline asm with gcc */
370 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
371 /* no input, nothing else clobbered */
374 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
375 static inline gmx_cycles_t gmx_cycles_read(void)
377 /* HP PA-RISC, instruction when using HP compiler */
382 #elif defined(__GNUC__) && defined(__s390__)
383 static __inline__ gmx_cycles_t gmx_cycles_read(void)
385 /* S390, taken from FFTW who got it from James Treacy */
387 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
390 #elif defined(__GNUC__) && defined(__alpha__)
391 static __inline__ gmx_cycles_t gmx_cycles_read(void)
393 /* gcc inline assembly on alpha CPUs */
395 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
396 return (cycle & 0xFFFFFFFF);
398 #elif defined(__GNUC__) && defined(__sparc_v9__)
399 static __inline__ gmx_cycles_t gmx_cycles_read(void)
401 /* gcc inline assembly on sparc v9 */
403 __asm__("rd %%tick, %0" : "=r" (ret));
406 #elif defined(__DECC) && defined(__alpha)
407 static __inline gmx_cycles_t gmx_cycles_read(void)
409 /* Digital GEM C compiler on alpha */
411 cycle = asm ("rpcc %v0");
412 return (cycle & 0xFFFFFFFF);
414 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
415 static __inline gmx_cycles_t gmx_cycles_read(void)
417 /* Irix compilers on SGI hardware */
419 clock_gettime(CLOCK_SGI_CYCLE, &t);
420 /* Return the number of nanoseconds, so we can subtract/add */
421 return ((unsigned long long)t.tv_sec)*1000000000+
422 (unsigned long long)t.tv_nsec;
424 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
425 static inline gmx_cycles_t gmx_cycles_read(void)
427 /* Solaris high-resolution timers */
430 #elif defined(__xlC__) && defined (_AIX)
431 static inline gmx_cycles_t gmx_cycles_read(void)
433 /* AIX compilers. Inline the calculation instead of using library functions */
435 read_real_time(&t1, TIMEBASE_SZ);
436 /* POWER returns real time (seconds + nanoseconds),
437 * POWER_PC returns high/low 32 bits of a counter.
439 if (t1.flag == RTC_POWER_PC)
441 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
445 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
448 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
449 ( defined(__powerpc__) || defined(__ppc__) ) )
450 static __inline__ gmx_cycles_t gmx_cycles_read(void)
452 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
453 unsigned long low, high1, high2;
456 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
457 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
458 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
460 while (high1 != high2);
462 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
464 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
465 static __inline__ gmx_cycles_t gmx_cycles_read(void)
467 /* Metrowerks on macintosh */
468 unsigned int long low, high1, high2;
471 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
472 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
473 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
475 while (high1 != high2);
477 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
479 #elif defined(__sun) && defined(__sparcv9)
481 static __inline__ gmx_cycles_t gmx_cycles_read(void)
484 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
489 static gmx_cycles_t gmx_cycles_read(void)
495 /*! \brief Calculate number of seconds per cycle tick on host
497 * This routine runs a timer loop to calibrate the number of
498 * seconds per the units returned fro gmx_cycles_read().
500 * \param sampletime Minimum real sample time. It takes some trial-and-error
501 * to find the correct delay loop size, so the total runtime of
502 * this routine is about twice this time.
503 * \return Number of seconds per cycle unit. If it is not possible to
504 * calculate on this system (for whatever reason) the return value
505 * will be -1, so check that it is positive before using it.
508 gmx_cycles_calibrate(double sampletime);