2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * Define HAVE_RDTSCP=1 to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
58 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
59 && (defined(__i386__) || defined(__x86_64__)))
60 /* x86 or x86-64 with GCC inline assembly */
61 typedef unsigned long long gmx_cycles_t;
63 #elif ((defined __aarch64__) \
64 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
65 /* 64-bit ARM cycle counters with GCC inline assembly */
66 typedef unsigned long long gmx_cycles_t;
68 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
69 /* Armv7A can provide 64-bit cycles by returning two registers */
70 typedef unsigned long long gmx_cycles_t;
72 #elif defined(_MSC_VER)
74 typedef __int64 gmx_cycles_t;
76 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
77 /* HP compiler on ia64 */
78 # include <machine/sys/inline.h>
79 typedef unsigned long gmx_cycles_t;
81 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
82 /* Intel compiler on ia64 */
83 # include <ia64intrin.h>
84 typedef unsigned long gmx_cycles_t;
86 #elif defined(__GNUC__) && defined(__ia64__)
87 /* ia64 with GCC inline assembly */
88 typedef unsigned long gmx_cycles_t;
90 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
91 /* HP PA-RISC, inline asm with gcc */
92 typedef unsigned long gmx_cycles_t;
94 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
95 /* HP PA-RISC, instruction when using HP compiler */
96 # include <machine/inline.h>
97 typedef unsigned long gmx_cycles_t;
99 #elif defined(__GNUC__) && defined(__s390__)
100 /* S390, taken from FFTW who got it from James Treacy */
101 typedef unsigned long long gmx_cycles_t;
103 #elif defined(__GNUC__) && defined(__alpha__)
104 /* gcc inline assembly on alpha CPUs */
105 typedef unsigned long gmx_cycles_t;
107 #elif defined(__GNUC__) && defined(__sparc_v9__)
108 /* gcc inline assembly on sparc v9 */
109 typedef unsigned long gmx_cycles_t;
111 #elif defined(__DECC) && defined(__alpha)
112 /* Digital GEM C compiler on alpha */
114 typedef unsigned long gmx_cycles_t;
116 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
117 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
118 typedef unsigned long long gmx_cycles_t;
120 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
121 /* Solaris high-resolution timers */
122 typedef hrtime_t gmx_cycles_t;
124 #elif defined(__xlC__) && defined(_AIX)
126 # include <sys/systemcfg.h>
127 # include <sys/time.h>
128 typedef unsigned long long gmx_cycles_t;
130 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
131 && (defined(__powerpc__) || defined(__ppc__)))
132 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
133 typedef unsigned long long gmx_cycles_t;
135 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
136 /* Metrowerks on macintosh */
137 typedef unsigned long long gmx_cycles_t;
139 #elif defined(__sun) && defined(__sparcv9)
141 typedef unsigned long gmx_cycles_t;
144 /*! \brief Integer-like datatype for cycle counter values
146 * Depending on your system this will usually be something like long long,
147 * or a special cycle datatype from the system header files. It is NOT
148 * necessarily real processor cycles - many systems count in nanoseconds
149 * or a special external time register at fixed frequency (not the CPU freq.)
151 * You can subtract or add gmx_cycle_t types just as normal integers, and if
152 * you run the calibration routine you can also multiply it with a factor to
153 * translate the cycle data to seconds.
155 typedef long gmx_cycles_t;
159 /*! \brief Read CPU cycle counter
161 * This routine returns an abstract datatype containing a
162 * cycle counter timestamp.
164 * \return Opaque data corresponding to a cycle reading.
166 * Please note that on most systems it takes several cycles
167 * to read and return the cycle counters. If you are measuring
168 * small intervals, you can compensate for this time by calling
169 * the routine twice and calculating what the difference is.
170 * Subtract this from your other measurements to get an accurate result.
172 * Use gmx_cycles_difference() to get a real number corresponding to
173 * the difference between two gmx_cycles_t values returned from this
176 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
177 && (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
178 static __inline__ gmx_cycles_t gmx_cycles_read()
180 /* x86 with GCC inline assembly - pentium TSC register */
184 __asm__ __volatile__("rdtscp" : "=a"(low), "=d"(high)::"ecx");
186 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
188 const gmx_cycles_t c_low = low;
189 const gmx_cycles_t c_high = high;
190 return c_low | c_high << 32;
192 #elif ((defined __aarch64__) \
193 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
194 static __inline__ gmx_cycles_t gmx_cycles_read(void)
196 /* 64-bit ARM cycle counters with GCC inline assembly */
198 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(cycle));
202 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
203 static __inline__ gmx_cycles_t gmx_cycles_read(void)
205 unsigned int cycles_lo, cycles_hi;
206 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(cycles_lo), "=r"(cycles_hi));
207 return ((gmx_cycles_t)cycles_lo) | (((gmx_cycles_t)cycles_hi) << 32);
209 #elif defined(_MSC_VER)
210 static __inline gmx_cycles_t gmx_cycles_read(void)
213 /* Windows on 64-bit ARM */
214 return __rdpmccntr64();
219 return __rdtscp(&ui);
225 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
226 static inline gmx_cycles_t gmx_cycles_read(void)
228 /* HP compiler on ia64 */
230 ret = _Asm_mov_from_ar(_AREG_ITC);
233 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
234 static __inline__ gmx_cycles_t gmx_cycles_read(void)
236 /* Intel compiler on ia64 */
237 return __getReg(_IA64_REG_AR_ITC);
239 #elif defined(__GNUC__) && defined(__ia64__)
240 static __inline__ gmx_cycles_t gmx_cycles_read(void)
242 /* ia64 with GCC inline assembly */
244 __asm__ __volatile__("mov %0=ar.itc" : "=r"(ret));
247 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
248 static __inline__ gmx_cycles_t gmx_cycles_read(void)
250 /* HP PA-RISC, inline asm with gcc */
252 __asm__ __volatile__("mfctl 16, %0" : "=r"(ret));
253 /* no input, nothing else clobbered */
256 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
257 static inline gmx_cycles_t gmx_cycles_read(void)
259 /* HP PA-RISC, instruction when using HP compiler */
264 #elif defined(__GNUC__) && defined(__s390__)
265 static __inline__ gmx_cycles_t gmx_cycles_read(void)
267 /* S390, taken from FFTW who got it from James Treacy */
269 __asm__("stck 0(%0)" : : "a"(&(cycle)) : "memory", "cc");
272 #elif defined(__GNUC__) && defined(__alpha__)
273 static __inline__ gmx_cycles_t gmx_cycles_read(void)
275 /* gcc inline assembly on alpha CPUs */
277 __asm__ __volatile__("rpcc %0" : "=r"(cycle));
278 return (cycle & 0xFFFFFFFF);
280 #elif defined(__GNUC__) && defined(__sparc_v9__)
281 static __inline__ gmx_cycles_t gmx_cycles_read(void)
283 /* gcc inline assembly on sparc v9 */
285 __asm__("rd %%tick, %0" : "=r"(ret));
288 #elif defined(__DECC) && defined(__alpha)
289 static __inline gmx_cycles_t gmx_cycles_read(void)
291 /* Digital GEM C compiler on alpha */
293 cycle = asm("rpcc %v0");
294 return (cycle & 0xFFFFFFFF);
296 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
297 static __inline gmx_cycles_t gmx_cycles_read(void)
299 /* Irix compilers on SGI hardware */
301 clock_gettime(CLOCK_SGI_CYCLE, &t);
302 /* Return the number of nanoseconds, so we can subtract/add */
303 return ((unsigned long long)t.tv_sec) * 1000000000 + (unsigned long long)t.tv_nsec;
305 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
306 static inline gmx_cycles_t gmx_cycles_read(void)
308 /* Solaris high-resolution timers */
311 #elif defined(__xlC__) && defined(_AIX)
312 static inline gmx_cycles_t gmx_cycles_read(void)
314 /* AIX compilers. Inline the calculation instead of using library functions */
316 read_real_time(&t1, TIMEBASE_SZ);
317 /* POWER returns real time (seconds + nanoseconds),
318 * POWER_PC returns high/low 32 bits of a counter.
320 if (t1.flag == RTC_POWER_PC)
322 return ((gmx_cycles_t)t1.tb_high) << 32 | (gmx_cycles_t)t1.tb_low;
326 return ((gmx_cycles_t)t1.tb_high) * 1000000000 + (gmx_cycles_t)t1.tb_low;
329 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
330 && (defined(__powerpc__) || defined(__ppc__)))
331 static __inline__ gmx_cycles_t gmx_cycles_read(void)
333 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc, and clang) */
334 unsigned long low, high1, high2;
337 // clang 3.7 incorrectly warns that mftb* are
338 // deprecated. That's not correct - see
339 // https://llvm.org/bugs/show_bug.cgi?id=23680.
340 __asm__ __volatile__("mftbu %0" : "=r"(high1) :);
341 __asm__ __volatile__("mftb %0" : "=r"(low) :);
342 __asm__ __volatile__("mftbu %0" : "=r"(high2) :);
343 } while (high1 != high2);
345 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
347 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
348 static __inline__ gmx_cycles_t gmx_cycles_read(void)
350 /* Metrowerks on macintosh */
351 unsigned int long low, high1, high2;
354 __asm__ __volatile__("mftbu %0" : "=r"(high1) :);
355 __asm__ __volatile__("mftb %0" : "=r"(low) :);
356 __asm__ __volatile__("mftbu %0" : "=r"(high2) :);
357 } while (high1 != high2);
359 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
361 #elif defined(__sun) && defined(__sparcv9)
363 static __inline__ gmx_cycles_t gmx_cycles_read(void)
366 __asm__ __volatile__("rd %%tick, %0" : "=r"(ret));
370 #elif defined(_CRAYC)
371 # include <intrinsics.h>
373 static __inline gmx_cycles_t gmx_cycles_read(void)
378 static gmx_cycles_t gmx_cycles_read(void)
385 /*! \brief Check if high-resolution cycle counters are available
387 * Not all architectures provide any way to read timestep counters
388 * in the CPU, and on some it is broken. Although we refer to it
389 * as cycle counters, it is not necessarily given in units of
392 * If you notice that system is missing, implement support for it,
393 * find out how to detect the system during preprocessing, and send us a
396 * \return 1 if cycle counters are available, 0 if not.
398 * \note This functions not need to be in the header for performance
399 * reasons, but it is very important that we get exactly the
400 * same detection as for gmx_cycles_read() routines. If you
401 * compile the library with one compiler, and then use a different
402 * one when later linking to the library it might happen that the
403 * library supports cyclecounters but not the headers, or vice versa.
405 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) \
406 || defined(__PGIC__) || defined(_CRAYC)) \
407 && (defined(__i386__) || defined(__x86_64__)))
408 static __inline__ bool gmx_cycles_have_counter()
410 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
413 #elif ((defined __aarch64__) \
414 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
415 static __inline bool gmx_cycles_have_counter(void)
417 /* 64-bit ARM cycle counters with GCC inline assembly */
420 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
421 static __inline bool gmx_cycles_have_counter(void)
423 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
424 * the performance registers have been made available from user space by a kernel module -
425 * otherwise it returns 0.
429 c0 = gmx_cycles_read();
430 c1 = gmx_cycles_read();
432 /* if both counters return 0, support is not present */
433 return (c0 != 0 || c1 != 0);
435 #elif (defined(_MSC_VER))
436 static __inline bool gmx_cycles_have_counter(void)
440 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
441 static inline bool gmx_cycles_have_counter(void)
443 /* HP compiler on ia64, use special instruction to read ITC */
446 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
447 static __inline__ bool gmx_cycles_have_counter(void)
449 /* Intel compiler on ia64, use special instruction to read ITC */
452 #elif defined(__GNUC__) && defined(__ia64__)
453 static __inline__ bool gmx_cycles_have_counter(void)
455 /* AMD64 with GCC inline assembly - TSC register */
458 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
459 static __inline__ bool gmx_cycles_have_counter(void)
461 /* HP PA-RISC, inline asm with gcc */
464 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
465 static inline bool gmx_cycles_have_counter(void)
467 /* HP PA-RISC, instruction when using HP compiler */
470 #elif defined(__GNUC__) && defined(__s390__)
471 static __inline__ bool gmx_cycles_have_counter(void)
473 /* S390, taken from FFTW who got it from James Treacy */
476 #elif defined(__GNUC__) && defined(__alpha__)
477 static __inline__ bool gmx_cycles_have_counter(void)
479 /* gcc inline assembly on alpha CPUs */
482 #elif defined(__GNUC__) && defined(__sparc_v9__)
483 static __inline__ bool gmx_cycles_have_counter(void)
485 /* gcc inline assembly on sparc v9 */
488 #elif defined(__DECC) && defined(__alpha)
489 static __inline bool gmx_cycles_have_counter(void)
491 /* Digital GEM C compiler on alpha */
494 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
495 static __inline bool gmx_cycles_have_counter(void)
497 /* Irix compilers on SGI hardware */
500 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
501 static inline bool gmx_cycles_have_counter(void)
503 /* Solaris high-resolution timers */
506 #elif defined(__xlC__) && defined(_AIX)
507 static inline bool gmx_cycles_have_counter(void)
512 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
513 && (defined(__powerpc__) || defined(__ppc__)))
514 static __inline__ bool gmx_cycles_have_counter(void)
516 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
519 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
520 static __inline__ bool gmx_cycles_have_counter(void)
522 /* Metrowerks on macintosh */
525 #elif defined(__sun) && defined(__sparcv9)
527 static __inline__ bool gmx_cycles_have_counter(void)
529 /* Solaris on SPARC*/
533 static bool gmx_cycles_have_counter(void)
535 /* No cycle counter that we know of on this system */
541 /*! \brief Calculate number of seconds per cycle tick on host
543 * This routine runs a timer loop to calibrate the number of
544 * seconds per the units returned fro gmx_cycles_read().
546 * \param sampletime Minimum real sample time. It takes some trial-and-error
547 * to find the correct delay loop size, so the total runtime of
548 * this routine is about twice this time.
549 * \return Number of seconds per cycle unit. If it is not possible to
550 * calculate on this system (for whatever reason) the return value
551 * will be -1, so check that it is positive before using it.
553 double gmx_cycles_calibrate(double sampletime);