2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
63 } /* fixes auto-indentation problems */
66 /* Minor implementation note:
68 * I like to use these counters in other programs too, so to avoid making
69 * it dependent on other Gromacs definitions I use the #ifdef's to set
70 * architecture-specific inline macros instead of using gmx_inline from
71 * gmx_types.h /Erik 2005-12-10
74 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
75 (defined(__i386__) || defined(__x86_64__)))
76 /* x86 or x86-64 with GCC inline assembly */
77 typedef unsigned long long
80 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
81 /* 64-bit ARM cycle counters with GCC inline assembly */
82 typedef unsigned long long
85 #elif defined(_MSC_VER)
90 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
91 /* HP compiler on ia64 */
92 #include <machine/sys/inline.h>
96 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
97 /* Intel compiler on ia64 */
98 #include <ia64intrin.h>
102 #elif defined(__GNUC__) && defined(__ia64__)
103 /* ia64 with GCC inline assembly */
104 typedef unsigned long
107 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
108 /* HP PA-RISC, inline asm with gcc */
109 typedef unsigned long
112 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
113 /* HP PA-RISC, instruction when using HP compiler */
114 #include <machine/inline.h>
115 typedef unsigned long
118 #elif defined(__GNUC__) && defined(__s390__)
119 /* S390, taken from FFTW who got it from James Treacy */
120 typedef unsigned long long
123 #elif defined(__GNUC__) && defined(__alpha__)
124 /* gcc inline assembly on alpha CPUs */
125 typedef unsigned long
128 #elif defined(__GNUC__) && defined(__sparc_v9__)
129 /* gcc inline assembly on sparc v9 */
130 typedef unsigned long
133 #elif defined(__DECC) && defined(__alpha)
134 /* Digital GEM C compiler on alpha */
136 typedef unsigned long
139 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
140 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
141 typedef unsigned long long
144 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
145 /* Solaris high-resolution timers */
149 #elif defined(__xlC__) && defined (_AIX)
151 #include <sys/time.h>
152 #include <sys/systemcfg.h>
153 typedef unsigned long long
156 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
157 ( defined(__powerpc__) || defined(__ppc__) ) )
158 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
159 typedef unsigned long long
162 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
163 /* Metrowerks on macintosh */
164 typedef unsigned long long
167 #elif defined(__sun) && defined(__sparcv9)
169 typedef unsigned long
173 /*! \brief Integer-like datatype for cycle counter values
175 * Depending on your system this will usually be something like long long,
176 * or a special cycle datatype from the system header files. It is NOT
177 * necessarily real processor cycles - many systems count in nanoseconds
178 * or a special external time register at fixed frequency (not the CPU freq.)
180 * You can subtract or add gmx_cycle_t types just as normal integers, and if
181 * you run the calibration routine you can also multiply it with a factor to
182 * translate the cycle data to seconds.
189 /*! \brief Check if high-resolution cycle counters are available
191 * Not all architectures provide any way to read timestep counters
192 * in the CPU, and on some it is broken. Although we refer to it
193 * as cycle counters, it is not necessarily given in units of
196 * If you notice that system is missing, implement support for it,
197 * find out how to detect the system during preprocessing, and send us a
200 * \return 1 if cycle counters are available, 0 if not.
202 * \note This functions not need to be in the header for performance
203 * reasons, but it is very important that we get exactly the
204 * same detection as for gmx_cycles_read() routines. If you
205 * compile the library with one compiler, and then use a different
206 * one when later linking to the library it might happen that the
207 * library supports cyclecounters but not the headers, or vice versa.
209 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
210 (defined(__i386__) || defined(__x86_64__)))
211 static __inline__ int gmx_cycles_have_counter(void)
213 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
216 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
217 static __inline int gmx_cycles_have_counter(void)
219 /* 64-bit ARM cycle counters with GCC inline assembly */
222 #elif (defined(_MSC_VER))
223 static __inline int gmx_cycles_have_counter(void)
227 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
228 static inline int gmx_cycles_have_counter(void)
230 /* HP compiler on ia64, use special instruction to read ITC */
233 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
234 static __inline__ int gmx_cycles_have_counter(void)
236 /* Intel compiler on ia64, use special instruction to read ITC */
239 #elif defined(__GNUC__) && defined(__ia64__)
240 static __inline__ int gmx_cycles_have_counter(void)
242 /* AMD64 with GCC inline assembly - TSC register */
245 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
246 static __inline__ int gmx_cycles_have_counter(void)
248 /* HP PA-RISC, inline asm with gcc */
251 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
252 static inline int gmx_cycles_have_counter(void)
254 /* HP PA-RISC, instruction when using HP compiler */
257 #elif defined(__GNUC__) && defined(__s390__)
258 static __inline__ int gmx_cycles_have_counter(void)
260 /* S390, taken from FFTW who got it from James Treacy */
263 #elif defined(__GNUC__) && defined(__alpha__)
264 static __inline__ int gmx_cycles_have_counter(void)
266 /* gcc inline assembly on alpha CPUs */
269 #elif defined(__GNUC__) && defined(__sparc_v9__)
270 static __inline__ int gmx_cycles_have_counter(void)
272 /* gcc inline assembly on sparc v9 */
275 #elif defined(__DECC) && defined(__alpha)
276 static __inline int gmx_cycles_have_counter(void)
278 /* Digital GEM C compiler on alpha */
281 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
282 static __inline int gmx_cycles_have_counter(void)
284 /* Irix compilers on SGI hardware */
287 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
288 static inline int gmx_cycles_have_counter(void)
290 /* Solaris high-resolution timers */
293 #elif defined(__xlC__) && defined (_AIX)
294 static inline int gmx_cycles_have_counter(void)
299 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
300 ( defined(__powerpc__) || defined(__ppc__) ) )
301 static __inline__ int gmx_cycles_have_counter(void)
303 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
306 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
307 static __inline__ int gmx_cycles_have_counter(void)
309 /* Metrowerks on macintosh */
312 #elif defined(__sun) && defined(__sparcv9)
314 static __inline__ int gmx_cycles_have_counter(void)
316 /* Solaris on SPARC*/
320 static int gmx_cycles_have_counter(void)
322 /* No cycle counter that we know of on this system */
327 /*! \brief Read CPU cycle counter
329 * This routine returns an abstract datatype containing a
330 * cycle counter timestamp.
332 * \return Opaque data corresponding to a cycle reading.
334 * Please note that on most systems it takes several cycles
335 * to read and return the cycle counters. If you are measuring
336 * small intervals, you can compensate for this time by calling
337 * the routine twice and calculating what the difference is.
338 * Subtract this from your other measurements to get an accurate result.
340 * Use gmx_cycles_difference() to get a real number corresponding to
341 * the difference between two gmx_cycles_t values returned from this
344 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
345 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
346 static __inline__ gmx_cycles_t gmx_cycles_read(void)
348 /* x86 with GCC inline assembly - pentium TSC register */
353 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
355 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
358 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
362 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
363 static __inline__ gmx_cycles_t gmx_cycles_read(void)
365 /* 64-bit ARM cycle counters with GCC inline assembly */
367 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r" (cycle) );
372 #elif defined(_MSC_VER)
373 static __inline gmx_cycles_t gmx_cycles_read(void)
376 /* Windows on 64-bit ARM */
377 return __rdpmccntr64();
382 return __rdtscp(&ui);
388 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
389 static inline gmx_cycles_t gmx_cycles_read(void)
391 /* HP compiler on ia64 */
393 ret = _Asm_mov_from_ar (_AREG_ITC);
396 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
397 static __inline__ gmx_cycles_t gmx_cycles_read(void)
399 /* Intel compiler on ia64 */
400 return __getReg(_IA64_REG_AR_ITC);
402 #elif defined(__GNUC__) && defined(__ia64__)
403 static __inline__ gmx_cycles_t gmx_cycles_read(void)
405 /* ia64 with GCC inline assembly */
407 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
410 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
411 static __inline__ gmx_cycles_t gmx_cycles_read(void)
413 /* HP PA-RISC, inline asm with gcc */
415 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
416 /* no input, nothing else clobbered */
419 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
420 static inline gmx_cycles_t gmx_cycles_read(void)
422 /* HP PA-RISC, instruction when using HP compiler */
427 #elif defined(__GNUC__) && defined(__s390__)
428 static __inline__ gmx_cycles_t gmx_cycles_read(void)
430 /* S390, taken from FFTW who got it from James Treacy */
432 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
435 #elif defined(__GNUC__) && defined(__alpha__)
436 static __inline__ gmx_cycles_t gmx_cycles_read(void)
438 /* gcc inline assembly on alpha CPUs */
440 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
441 return (cycle & 0xFFFFFFFF);
443 #elif defined(__GNUC__) && defined(__sparc_v9__)
444 static __inline__ gmx_cycles_t gmx_cycles_read(void)
446 /* gcc inline assembly on sparc v9 */
448 __asm__("rd %%tick, %0" : "=r" (ret));
451 #elif defined(__DECC) && defined(__alpha)
452 static __inline gmx_cycles_t gmx_cycles_read(void)
454 /* Digital GEM C compiler on alpha */
456 cycle = asm ("rpcc %v0");
457 return (cycle & 0xFFFFFFFF);
459 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
460 static __inline gmx_cycles_t gmx_cycles_read(void)
462 /* Irix compilers on SGI hardware */
464 clock_gettime(CLOCK_SGI_CYCLE, &t);
465 /* Return the number of nanoseconds, so we can subtract/add */
466 return ((unsigned long long)t.tv_sec)*1000000000+
467 (unsigned long long)t.tv_nsec;
469 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
470 static inline gmx_cycles_t gmx_cycles_read(void)
472 /* Solaris high-resolution timers */
475 #elif defined(__xlC__) && defined (_AIX)
476 static inline gmx_cycles_t gmx_cycles_read(void)
478 /* AIX compilers. Inline the calculation instead of using library functions */
480 read_real_time(&t1, TIMEBASE_SZ);
481 /* POWER returns real time (seconds + nanoseconds),
482 * POWER_PC returns high/low 32 bits of a counter.
484 if (t1.flag == RTC_POWER_PC)
486 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
490 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
493 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
494 ( defined(__powerpc__) || defined(__ppc__) ) )
495 static __inline__ gmx_cycles_t gmx_cycles_read(void)
497 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
498 unsigned long low, high1, high2;
501 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
502 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
503 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
505 while (high1 != high2);
507 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
509 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
510 static __inline__ gmx_cycles_t gmx_cycles_read(void)
512 /* Metrowerks on macintosh */
513 unsigned int long low, high1, high2;
516 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
517 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
518 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
520 while (high1 != high2);
522 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
524 #elif defined(__sun) && defined(__sparcv9)
526 static __inline__ gmx_cycles_t gmx_cycles_read(void)
529 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
533 #elif defined(_CRAYC)
534 #include <intrinsics.h>
536 static __inline gmx_cycles_t gmx_cycles_read(void)
541 static gmx_cycles_t gmx_cycles_read(void)
547 /*! \brief Calculate number of seconds per cycle tick on host
549 * This routine runs a timer loop to calibrate the number of
550 * seconds per the units returned fro gmx_cycles_read().
552 * \param sampletime Minimum real sample time. It takes some trial-and-error
553 * to find the correct delay loop size, so the total runtime of
554 * this routine is about twice this time.
555 * \return Number of seconds per cycle unit. If it is not possible to
556 * calculate on this system (for whatever reason) the return value
557 * will be -1, so check that it is positive before using it.
560 gmx_cycles_calibrate(double sampletime);