2 * This file is part of the GROMACS molecular simulation package.
4 * This file is part of Gromacs Copyright (c) 1991-2006
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
6 * Copyright (c) 2012, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 #ifndef _GMX_CYCLECOUNTER_H_
39 #define _GMX_CYCLECOUNTER_H_
42 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
43 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
47 /** @file gmx_cyclecounter.h
49 * @brief High-resolution timestamp or CPU clock cycle counters.
51 * After reading the current value with gmx_cycles_read() you can add or
52 * subtract these numbers as normal integers of type gmx_cycles_t.
64 } /* fixes auto-indentation problems */
69 /* Minor implementation note:
71 * I like to use these counters in other programs too, so to avoid making
72 * it dependent on other Gromacs definitions I use the #ifdef's to set
73 * architecture-specific inline macros instead of using gmx_inline from
74 * gmx_types.h /Erik 2005-12-10
77 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
78 (defined(__i386__) || defined(__x86_64__)))
79 /* x86 or x86-64 with GCC inline assembly */
80 typedef unsigned long long
83 #elif defined(_MSC_VER)
88 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
89 /* HP compiler on ia64 */
90 #include <machine/sys/inline.h>
94 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
95 /* Intel compiler on ia64 */
96 #include <ia64intrin.h>
100 #elif defined(__GNUC__) && defined(__ia64__)
101 /* ia64 with GCC inline assembly */
102 typedef unsigned long
105 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
106 /* HP PA-RISC, inline asm with gcc */
107 typedef unsigned long
110 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
111 /* HP PA-RISC, instruction when using HP compiler */
112 #include <machine/inline.h>
113 typedef unsigned long
116 #elif defined(__GNUC__) && defined(__s390__)
117 /* S390, taken from FFTW who got it from James Treacy */
118 typedef unsigned long long
121 #elif defined(__GNUC__) && defined(__alpha__)
122 /* gcc inline assembly on alpha CPUs */
123 typedef unsigned long
126 #elif defined(__GNUC__) && defined(__sparc_v9__)
127 /* gcc inline assembly on sparc v9 */
128 typedef unsigned long
131 #elif defined(__DECC) && defined(__alpha)
132 /* Digital GEM C compiler on alpha */
134 typedef unsigned long
137 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
138 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
139 typedef unsigned long long
142 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
143 /* Solaris high-resolution timers */
147 #elif defined(__xlC__) && defined (_AIX)
149 #include <sys/time.h>
150 #include <sys/systemcfg.h>
151 typedef unsigned long long
154 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
155 ( defined(__powerpc__) || defined(__ppc__) ) )
156 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
157 typedef unsigned long long
160 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
161 /* Metrowerks on macintosh */
162 typedef unsigned long long
165 #elif defined(__sun) && defined(__sparcv9)
167 typedef unsigned long
171 /*! \brief Integer-like datatype for cycle counter values
173 * Depending on your system this will usually be something like long long,
174 * or a special cycle datatype from the system header files. It is NOT
175 * necessarily real processor cycles - many systems count in nanoseconds
176 * or a special external time register at fixed frequency (not the CPU freq.)
178 * You can subtract or add gmx_cycle_t types just as normal integers, and if
179 * you run the calibration routine you can also multiply it with a factor to
180 * translate the cycle data to seconds.
189 /*! \brief Check if high-resolution cycle counters are available
191 * Not all architectures provide any way to read timestep counters
192 * in the CPU, and on some it is broken. Although we refer to it
193 * as cycle counters, it is not necessarily given in units of
196 * If you notice that system is missing, implement support for it,
197 * find out how to detect the system during preprocessing, and send us a
200 * \return 1 if cycle counters are available, 0 if not.
202 * \note This functions not need to be in the header for performance
203 * reasons, but it is very important that we get exactly the
204 * same detection as for gmx_cycles_read() routines. If you
205 * compile the library with one compiler, and then use a different
206 * one when later linking to the library it might happen that the
207 * library supports cyclecounters but not the headers, or vice versa.
209 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
210 (defined(__i386__) || defined(__x86_64__)))
211 static __inline__ int gmx_cycles_have_counter(void)
213 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
216 #elif (defined(_MSC_VER))
217 static __inline int gmx_cycles_have_counter(void)
221 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
222 static inline int gmx_cycles_have_counter(void)
224 /* HP compiler on ia64, use special instruction to read ITC */
227 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
228 static __inline__ int gmx_cycles_have_counter(void)
230 /* Intel compiler on ia64, use special instruction to read ITC */
233 #elif defined(__GNUC__) && defined(__ia64__)
234 static __inline__ int gmx_cycles_have_counter(void)
236 /* AMD64 with GCC inline assembly - TSC register */
239 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
240 static __inline__ int gmx_cycles_have_counter(void)
242 /* HP PA-RISC, inline asm with gcc */
245 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
246 static inline int gmx_cycles_have_counter(void)
248 /* HP PA-RISC, instruction when using HP compiler */
251 #elif defined(__GNUC__) && defined(__s390__)
252 static __inline__ int gmx_cycles_have_counter(void)
254 /* S390, taken from FFTW who got it from James Treacy */
257 #elif defined(__GNUC__) && defined(__alpha__)
258 static __inline__ int gmx_cycles_have_counter(void)
260 /* gcc inline assembly on alpha CPUs */
263 #elif defined(__GNUC__) && defined(__sparc_v9__)
264 static __inline__ int gmx_cycles_have_counter(void)
266 /* gcc inline assembly on sparc v9 */
269 #elif defined(__DECC) && defined(__alpha)
270 static __inline int gmx_cycles_have_counter(void)
272 /* Digital GEM C compiler on alpha */
275 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
276 static __inline int gmx_cycles_have_counter(void)
278 /* Irix compilers on SGI hardware */
281 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
282 static inline int gmx_cycles_have_counter(void)
284 /* Solaris high-resolution timers */
287 #elif defined(__xlC__) && defined (_AIX)
288 static inline int gmx_cycles_have_counter(void)
293 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
294 ( defined(__powerpc__) || defined(__ppc__) ) )
295 static __inline__ int gmx_cycles_have_counter(void)
297 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
300 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
301 static __inline__ int gmx_cycles_have_counter(void)
303 /* Metrowerks on macintosh */
306 #elif defined(__sun) && defined(__sparcv9)
308 static __inline__ int gmx_cycles_have_counter(void)
310 /* Solaris on SPARC*/
314 static int gmx_cycles_have_counter(void)
316 /* No cycle counter that we know of on this system */
326 /*! \brief Read CPU cycle counter
328 * This routine returns an abstract datatype containing a
329 * cycle counter timestamp.
331 * \return Opaque data corresponding to a cycle reading.
333 * Please note that on most systems it takes several cycles
334 * to read and return the cycle counters. If you are measuring
335 * small intervals, you can compensate for this time by calling
336 * the routine twice and calculating what the difference is.
337 * Subtract this from your other measurements to get an accurate result.
339 * Use gmx_cycles_difference() to get a real number corresponding to
340 * the difference between two gmx_cycles_t values returned from this
343 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
344 (defined(__i386__) || defined(__x86_64__)))
345 static __inline__ gmx_cycles_t gmx_cycles_read(void)
347 /* x86 with GCC inline assembly - pentium TSC register */
352 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
354 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
357 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
361 #elif defined(_MSC_VER)
362 static __inline gmx_cycles_t gmx_cycles_read(void)
366 return __rdtscp(&ui);
371 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
372 static inline gmx_cycles_t gmx_cycles_read(void)
374 /* HP compiler on ia64 */
376 ret = _Asm_mov_from_ar (_AREG_ITC);
379 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
380 static __inline__ gmx_cycles_t gmx_cycles_read(void)
382 /* Intel compiler on ia64 */
383 return __getReg(_IA64_REG_AR_ITC);
385 #elif defined(__GNUC__) && defined(__ia64__)
386 static __inline__ gmx_cycles_t gmx_cycles_read(void)
388 /* ia64 with GCC inline assembly */
390 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
393 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
394 static __inline__ gmx_cycles_t gmx_cycles_read(void)
396 /* HP PA-RISC, inline asm with gcc */
398 __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
399 /* no input, nothing else clobbered */
402 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
403 static inline gmx_cycles_t gmx_cycles_read(void)
405 /* HP PA-RISC, instruction when using HP compiler */
410 #elif defined(__GNUC__) && defined(__s390__)
411 static __inline__ gmx_cycles_t gmx_cycles_read(void)
413 /* S390, taken from FFTW who got it from James Treacy */
415 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
418 #elif defined(__GNUC__) && defined(__alpha__)
419 static __inline__ gmx_cycles_t gmx_cycles_read(void)
421 /* gcc inline assembly on alpha CPUs */
423 __asm__ __volatile__ ("rpcc %0" : "=r"(cycle));
424 return (cycle & 0xFFFFFFFF);
426 #elif defined(__GNUC__) && defined(__sparc_v9__)
427 static __inline__ gmx_cycles_t gmx_cycles_read(void)
429 /* gcc inline assembly on sparc v9 */
431 __asm__("rd %%tick, %0" : "=r" (ret));
434 #elif defined(__DECC) && defined(__alpha)
435 static __inline gmx_cycles_t gmx_cycles_read(void)
437 /* Digital GEM C compiler on alpha */
439 cycle = asm("rpcc %v0");
440 return (cycle & 0xFFFFFFFF);
442 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
443 static __inline gmx_cycles_t gmx_cycles_read(void)
445 /* Irix compilers on SGI hardware */
447 clock_gettime(CLOCK_SGI_CYCLE, &t);
448 /* Return the number of nanoseconds, so we can subtract/add */
449 return ((unsigned long long)t.tv_sec)*1000000000+
450 (unsigned long long)t.tv_nsec;
452 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
453 static inline gmx_cycles_t gmx_cycles_read(void)
455 /* Solaris high-resolution timers */
458 #elif defined(__xlC__) && defined (_AIX)
459 static inline gmx_cycles_t gmx_cycles_read(void)
461 /* AIX compilers. Inline the calculation instead of using library functions */
463 read_real_time(&t1, TIMEBASE_SZ);
464 /* POWER returns real time (seconds + nanoseconds),
465 * POWER_PC returns high/low 32 bits of a counter.
467 if(t1.flag==RTC_POWER_PC)
469 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
473 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
476 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
477 ( defined(__powerpc__) || defined(__ppc__) ) )
478 static __inline__ gmx_cycles_t gmx_cycles_read(void)
480 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
481 unsigned long low, high1, high2;
484 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
485 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
486 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
488 while (high1 != high2);
490 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
492 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
493 static __inline__ gmx_cycles_t gmx_cycles_read(void)
495 /* Metrowerks on macintosh */
496 unsigned int long low, high1, high2;
499 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
500 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
501 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
503 while (high1 != high2);
505 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
507 #elif defined(__sun) && defined(__sparcv9)
509 static __inline__ gmx_cycles_t gmx_cycles_read(void)
512 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
517 static gmx_cycles_t gmx_cycles_read(void)
530 /*! \brief Calculate number of seconds per cycle tick on host
532 * This routine runs a timer loop to calibrate the number of
533 * seconds per the units returned from gmx_cycles_difference()
535 * To calculate the time used, call gmx_cycles_read() twice,
536 * and then use this routine to calculate the difference as a double
537 * precision floating-point number.
539 * \param sampletime Minimum number of seconds to sample.
540 * One second should give you a reasonably accurate calibration.
541 * \return Number of seconds per cycle unit. If it is not possible to
542 * calculate on this system (for whatever reason) the return value
543 * will be -1, so check that it is positive before using it.
546 gmx_cycles_calibrate(double sampletime);
555 #endif /* _GMX_CYCLECOUNTER_H_ */