2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
65 } /* fixes auto-indentation problems */
68 /* Minor implementation note:
70 * I like to use these counters in other programs too, so to avoid making
71 * it dependent on other Gromacs definitions I use the #ifdef's to set
72 * architecture-specific inline macros instead of using gmx_inline from
73 * gmx_types.h /Erik 2005-12-10
76 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
77 (defined(__i386__) || defined(__x86_64__)))
78 /* x86 or x86-64 with GCC inline assembly */
79 typedef unsigned long long
82 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
83 /* 64-bit ARM cycle counters with GCC inline assembly */
84 typedef unsigned long long
87 #elif defined(_MSC_VER)
92 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
93 /* HP compiler on ia64 */
94 #include <machine/sys/inline.h>
98 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
99 /* Intel compiler on ia64 */
100 #include <ia64intrin.h>
101 typedef unsigned long
104 #elif defined(__GNUC__) && defined(__ia64__)
105 /* ia64 with GCC inline assembly */
106 typedef unsigned long
109 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
110 /* HP PA-RISC, inline asm with gcc */
111 typedef unsigned long
114 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
115 /* HP PA-RISC, instruction when using HP compiler */
116 #include <machine/inline.h>
117 typedef unsigned long
120 #elif defined(__GNUC__) && defined(__s390__)
121 /* S390, taken from FFTW who got it from James Treacy */
122 typedef unsigned long long
125 #elif defined(__GNUC__) && defined(__alpha__)
126 /* gcc inline assembly on alpha CPUs */
127 typedef unsigned long
130 #elif defined(__GNUC__) && defined(__sparc_v9__)
131 /* gcc inline assembly on sparc v9 */
132 typedef unsigned long
135 #elif defined(__DECC) && defined(__alpha)
136 /* Digital GEM C compiler on alpha */
138 typedef unsigned long
141 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
142 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
143 typedef unsigned long long
146 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
147 /* Solaris high-resolution timers */
151 #elif defined(__xlC__) && defined (_AIX)
153 #include <sys/time.h>
154 #include <sys/systemcfg.h>
155 typedef unsigned long long
158 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
159 ( defined(__powerpc__) || defined(__ppc__) ) )
160 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
161 typedef unsigned long long
164 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
165 /* Metrowerks on macintosh */
166 typedef unsigned long long
169 #elif defined(__sun) && defined(__sparcv9)
171 typedef unsigned long
175 /*! \brief Integer-like datatype for cycle counter values
177 * Depending on your system this will usually be something like long long,
178 * or a special cycle datatype from the system header files. It is NOT
179 * necessarily real processor cycles - many systems count in nanoseconds
180 * or a special external time register at fixed frequency (not the CPU freq.)
182 * You can subtract or add gmx_cycle_t types just as normal integers, and if
183 * you run the calibration routine you can also multiply it with a factor to
184 * translate the cycle data to seconds.
191 /*! \brief Check if high-resolution cycle counters are available
193 * Not all architectures provide any way to read timestep counters
194 * in the CPU, and on some it is broken. Although we refer to it
195 * as cycle counters, it is not necessarily given in units of
198 * If you notice that system is missing, implement support for it,
199 * find out how to detect the system during preprocessing, and send us a
202 * \return 1 if cycle counters are available, 0 if not.
204 * \note This functions not need to be in the header for performance
205 * reasons, but it is very important that we get exactly the
206 * same detection as for gmx_cycles_read() routines. If you
207 * compile the library with one compiler, and then use a different
208 * one when later linking to the library it might happen that the
209 * library supports cyclecounters but not the headers, or vice versa.
211 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
212 (defined(__i386__) || defined(__x86_64__)))
213 static __inline__ int gmx_cycles_have_counter(void)
215 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
218 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
219 static __inline int gmx_cycles_have_counter(void)
221 /* 64-bit ARM cycle counters with GCC inline assembly */
224 #elif (defined(_MSC_VER))
225 static __inline int gmx_cycles_have_counter(void)
229 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
230 static inline int gmx_cycles_have_counter(void)
232 /* HP compiler on ia64, use special instruction to read ITC */
235 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
236 static __inline__ int gmx_cycles_have_counter(void)
238 /* Intel compiler on ia64, use special instruction to read ITC */
241 #elif defined(__GNUC__) && defined(__ia64__)
242 static __inline__ int gmx_cycles_have_counter(void)
244 /* AMD64 with GCC inline assembly - TSC register */
247 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
248 static __inline__ int gmx_cycles_have_counter(void)
250 /* HP PA-RISC, inline asm with gcc */
253 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
254 static inline int gmx_cycles_have_counter(void)
256 /* HP PA-RISC, instruction when using HP compiler */
259 #elif defined(__GNUC__) && defined(__s390__)
260 static __inline__ int gmx_cycles_have_counter(void)
262 /* S390, taken from FFTW who got it from James Treacy */
265 #elif defined(__GNUC__) && defined(__alpha__)
266 static __inline__ int gmx_cycles_have_counter(void)
268 /* gcc inline assembly on alpha CPUs */
271 #elif defined(__GNUC__) && defined(__sparc_v9__)
272 static __inline__ int gmx_cycles_have_counter(void)
274 /* gcc inline assembly on sparc v9 */
277 #elif defined(__DECC) && defined(__alpha)
278 static __inline int gmx_cycles_have_counter(void)
280 /* Digital GEM C compiler on alpha */
283 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
284 static __inline int gmx_cycles_have_counter(void)
286 /* Irix compilers on SGI hardware */
289 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
290 static inline int gmx_cycles_have_counter(void)
292 /* Solaris high-resolution timers */
295 #elif defined(__xlC__) && defined (_AIX)
296 static inline int gmx_cycles_have_counter(void)
301 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
302 ( defined(__powerpc__) || defined(__ppc__) ) )
303 static __inline__ int gmx_cycles_have_counter(void)
305 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
308 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
309 static __inline__ int gmx_cycles_have_counter(void)
311 /* Metrowerks on macintosh */
314 #elif defined(__sun) && defined(__sparcv9)
316 static __inline__ int gmx_cycles_have_counter(void)
318 /* Solaris on SPARC*/
322 static int gmx_cycles_have_counter(void)
324 /* No cycle counter that we know of on this system */
329 /*! \brief Read CPU cycle counter
331 * This routine returns an abstract datatype containing a
332 * cycle counter timestamp.
334 * \return Opaque data corresponding to a cycle reading.
336 * Please note that on most systems it takes several cycles
337 * to read and return the cycle counters. If you are measuring
338 * small intervals, you can compensate for this time by calling
339 * the routine twice and calculating what the difference is.
340 * Subtract this from your other measurements to get an accurate result.
342 * Use gmx_cycles_difference() to get a real number corresponding to
343 * the difference between two gmx_cycles_t values returned from this
346 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
347 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
348 static __inline__ gmx_cycles_t gmx_cycles_read(void)
350 /* x86 with GCC inline assembly - pentium TSC register */
355 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
357 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
360 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
364 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
365 static __inline__ gmx_cycles_t gmx_cycles_read(void)
367 /* 64-bit ARM cycle counters with GCC inline assembly */
369 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r" (cycle) );
374 #elif defined(_MSC_VER)
375 static __inline gmx_cycles_t gmx_cycles_read(void)
378 /* Windows on 64-bit ARM */
379 return __rdpmccntr64();
384 return __rdtscp(&ui);
390 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
391 static inline gmx_cycles_t gmx_cycles_read(void)
393 /* HP compiler on ia64 */
395 ret = _Asm_mov_from_ar (_AREG_ITC);
398 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
399 static __inline__ gmx_cycles_t gmx_cycles_read(void)
401 /* Intel compiler on ia64 */
402 return __getReg(_IA64_REG_AR_ITC);
404 #elif defined(__GNUC__) && defined(__ia64__)
405 static __inline__ gmx_cycles_t gmx_cycles_read(void)
407 /* ia64 with GCC inline assembly */
409 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
412 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
413 static __inline__ gmx_cycles_t gmx_cycles_read(void)
415 /* HP PA-RISC, inline asm with gcc */
417 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
418 /* no input, nothing else clobbered */
421 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
422 static inline gmx_cycles_t gmx_cycles_read(void)
424 /* HP PA-RISC, instruction when using HP compiler */
429 #elif defined(__GNUC__) && defined(__s390__)
430 static __inline__ gmx_cycles_t gmx_cycles_read(void)
432 /* S390, taken from FFTW who got it from James Treacy */
434 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
437 #elif defined(__GNUC__) && defined(__alpha__)
438 static __inline__ gmx_cycles_t gmx_cycles_read(void)
440 /* gcc inline assembly on alpha CPUs */
442 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
443 return (cycle & 0xFFFFFFFF);
445 #elif defined(__GNUC__) && defined(__sparc_v9__)
446 static __inline__ gmx_cycles_t gmx_cycles_read(void)
448 /* gcc inline assembly on sparc v9 */
450 __asm__("rd %%tick, %0" : "=r" (ret));
453 #elif defined(__DECC) && defined(__alpha)
454 static __inline gmx_cycles_t gmx_cycles_read(void)
456 /* Digital GEM C compiler on alpha */
458 cycle = asm ("rpcc %v0");
459 return (cycle & 0xFFFFFFFF);
461 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
462 static __inline gmx_cycles_t gmx_cycles_read(void)
464 /* Irix compilers on SGI hardware */
466 clock_gettime(CLOCK_SGI_CYCLE, &t);
467 /* Return the number of nanoseconds, so we can subtract/add */
468 return ((unsigned long long)t.tv_sec)*1000000000+
469 (unsigned long long)t.tv_nsec;
471 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
472 static inline gmx_cycles_t gmx_cycles_read(void)
474 /* Solaris high-resolution timers */
477 #elif defined(__xlC__) && defined (_AIX)
478 static inline gmx_cycles_t gmx_cycles_read(void)
480 /* AIX compilers. Inline the calculation instead of using library functions */
482 read_real_time(&t1, TIMEBASE_SZ);
483 /* POWER returns real time (seconds + nanoseconds),
484 * POWER_PC returns high/low 32 bits of a counter.
486 if (t1.flag == RTC_POWER_PC)
488 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
492 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
495 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
496 ( defined(__powerpc__) || defined(__ppc__) ) )
497 static __inline__ gmx_cycles_t gmx_cycles_read(void)
499 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
500 unsigned long low, high1, high2;
503 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
504 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
505 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
507 while (high1 != high2);
509 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
511 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
512 static __inline__ gmx_cycles_t gmx_cycles_read(void)
514 /* Metrowerks on macintosh */
515 unsigned int long low, high1, high2;
518 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
519 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
520 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
522 while (high1 != high2);
524 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
526 #elif defined(__sun) && defined(__sparcv9)
528 static __inline__ gmx_cycles_t gmx_cycles_read(void)
531 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
535 #elif defined(_CRAYC)
536 #include <intrinsics.h>
538 static __inline gmx_cycles_t gmx_cycles_read(void)
543 static gmx_cycles_t gmx_cycles_read(void)
549 /*! \brief Calculate number of seconds per cycle tick on host
551 * This routine runs a timer loop to calibrate the number of
552 * seconds per the units returned fro gmx_cycles_read().
554 * \param sampletime Minimum real sample time. It takes some trial-and-error
555 * to find the correct delay loop size, so the total runtime of
556 * this routine is about twice this time.
557 * \return Number of seconds per cycle unit. If it is not possible to
558 * calculate on this system (for whatever reason) the return value
559 * will be -1, so check that it is positive before using it.
562 gmx_cycles_calibrate(double sampletime);