2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * Define HAVE_RDTSCP=1 to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
58 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
59 (defined(__i386__) || defined(__x86_64__)))
60 /* x86 or x86-64 with GCC inline assembly */
61 typedef unsigned long long
64 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
65 /* 64-bit ARM cycle counters with GCC inline assembly */
66 typedef unsigned long long
69 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
70 /* Armv7A can provide 64-bit cycles by returning two registers */
71 typedef unsigned long long
74 #elif defined(_MSC_VER)
79 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
80 /* HP compiler on ia64 */
81 #include <machine/sys/inline.h>
85 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
86 /* Intel compiler on ia64 */
87 #include <ia64intrin.h>
91 #elif defined(__GNUC__) && defined(__ia64__)
92 /* ia64 with GCC inline assembly */
96 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
97 /* HP PA-RISC, inline asm with gcc */
101 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
102 /* HP PA-RISC, instruction when using HP compiler */
103 #include <machine/inline.h>
104 typedef unsigned long
107 #elif defined(__GNUC__) && defined(__s390__)
108 /* S390, taken from FFTW who got it from James Treacy */
109 typedef unsigned long long
112 #elif defined(__GNUC__) && defined(__alpha__)
113 /* gcc inline assembly on alpha CPUs */
114 typedef unsigned long
117 #elif defined(__GNUC__) && defined(__sparc_v9__)
118 /* gcc inline assembly on sparc v9 */
119 typedef unsigned long
122 #elif defined(__DECC) && defined(__alpha)
123 /* Digital GEM C compiler on alpha */
125 typedef unsigned long
128 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
129 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
130 typedef unsigned long long
133 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
134 /* Solaris high-resolution timers */
138 #elif defined(__xlC__) && defined (_AIX)
140 #include <sys/systemcfg.h>
141 #include <sys/time.h>
142 typedef unsigned long long
145 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
146 ( defined(__powerpc__) || defined(__ppc__) ) )
147 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
148 typedef unsigned long long
151 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
152 /* Metrowerks on macintosh */
153 typedef unsigned long long
156 #elif defined(__sun) && defined(__sparcv9)
158 typedef unsigned long
162 /*! \brief Integer-like datatype for cycle counter values
164 * Depending on your system this will usually be something like long long,
165 * or a special cycle datatype from the system header files. It is NOT
166 * necessarily real processor cycles - many systems count in nanoseconds
167 * or a special external time register at fixed frequency (not the CPU freq.)
169 * You can subtract or add gmx_cycle_t types just as normal integers, and if
170 * you run the calibration routine you can also multiply it with a factor to
171 * translate the cycle data to seconds.
178 /*! \brief Read CPU cycle counter
180 * This routine returns an abstract datatype containing a
181 * cycle counter timestamp.
183 * \return Opaque data corresponding to a cycle reading.
185 * Please note that on most systems it takes several cycles
186 * to read and return the cycle counters. If you are measuring
187 * small intervals, you can compensate for this time by calling
188 * the routine twice and calculating what the difference is.
189 * Subtract this from your other measurements to get an accurate result.
191 * Use gmx_cycles_difference() to get a real number corresponding to
192 * the difference between two gmx_cycles_t values returned from this
195 #if (GMX_CYCLECOUNTERS == 0)
196 static __inline__ gmx_cycles_t gmx_cycles_read(void)
200 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
201 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
202 static __inline__ gmx_cycles_t gmx_cycles_read()
204 /* x86 with GCC inline assembly - pentium TSC register */
208 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
210 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
212 const gmx_cycles_t c_low = low;
213 const gmx_cycles_t c_high = high;
214 return c_low | c_high <<32;
216 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
217 static __inline__ gmx_cycles_t gmx_cycles_read(void)
219 /* 64-bit ARM cycle counters with GCC inline assembly */
221 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r" (cycle) );
225 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
226 static __inline__ gmx_cycles_t gmx_cycles_read(void)
228 unsigned int cycles_lo, cycles_hi;
229 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cycles_lo), "=r" (cycles_hi));
230 return ((gmx_cycles_t)cycles_lo) | (((gmx_cycles_t)cycles_hi) << 32);
232 #elif defined(_MSC_VER)
233 static __inline gmx_cycles_t gmx_cycles_read(void)
236 /* Windows on 64-bit ARM */
237 return __rdpmccntr64();
242 return __rdtscp(&ui);
248 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
249 static inline gmx_cycles_t gmx_cycles_read(void)
251 /* HP compiler on ia64 */
253 ret = _Asm_mov_from_ar (_AREG_ITC);
256 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
257 static __inline__ gmx_cycles_t gmx_cycles_read(void)
259 /* Intel compiler on ia64 */
260 return __getReg(_IA64_REG_AR_ITC);
262 #elif defined(__GNUC__) && defined(__ia64__)
263 static __inline__ gmx_cycles_t gmx_cycles_read(void)
265 /* ia64 with GCC inline assembly */
267 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
270 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
271 static __inline__ gmx_cycles_t gmx_cycles_read(void)
273 /* HP PA-RISC, inline asm with gcc */
275 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
276 /* no input, nothing else clobbered */
279 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
280 static inline gmx_cycles_t gmx_cycles_read(void)
282 /* HP PA-RISC, instruction when using HP compiler */
287 #elif defined(__GNUC__) && defined(__s390__)
288 static __inline__ gmx_cycles_t gmx_cycles_read(void)
290 /* S390, taken from FFTW who got it from James Treacy */
292 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
295 #elif defined(__GNUC__) && defined(__alpha__)
296 static __inline__ gmx_cycles_t gmx_cycles_read(void)
298 /* gcc inline assembly on alpha CPUs */
300 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
301 return (cycle & 0xFFFFFFFF);
303 #elif defined(__GNUC__) && defined(__sparc_v9__)
304 static __inline__ gmx_cycles_t gmx_cycles_read(void)
306 /* gcc inline assembly on sparc v9 */
308 __asm__("rd %%tick, %0" : "=r" (ret));
311 #elif defined(__DECC) && defined(__alpha)
312 static __inline gmx_cycles_t gmx_cycles_read(void)
314 /* Digital GEM C compiler on alpha */
316 cycle = asm ("rpcc %v0");
317 return (cycle & 0xFFFFFFFF);
319 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
320 static __inline gmx_cycles_t gmx_cycles_read(void)
322 /* Irix compilers on SGI hardware */
324 clock_gettime(CLOCK_SGI_CYCLE, &t);
325 /* Return the number of nanoseconds, so we can subtract/add */
326 return ((unsigned long long)t.tv_sec)*1000000000+
327 (unsigned long long)t.tv_nsec;
329 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
330 static inline gmx_cycles_t gmx_cycles_read(void)
332 /* Solaris high-resolution timers */
335 #elif defined(__xlC__) && defined (_AIX)
336 static inline gmx_cycles_t gmx_cycles_read(void)
338 /* AIX compilers. Inline the calculation instead of using library functions */
340 read_real_time(&t1, TIMEBASE_SZ);
341 /* POWER returns real time (seconds + nanoseconds),
342 * POWER_PC returns high/low 32 bits of a counter.
344 if (t1.flag == RTC_POWER_PC)
346 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
350 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
353 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
354 ( defined(__powerpc__) || defined(__ppc__) ) )
355 static __inline__ gmx_cycles_t gmx_cycles_read(void)
357 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc, and clang) */
358 unsigned long low, high1, high2;
361 // clang 3.7 incorrectly warns that mftb* are
362 // deprecated. That's not correct - see
363 // https://llvm.org/bugs/show_bug.cgi?id=23680.
364 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
365 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
366 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
368 while (high1 != high2);
370 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
372 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
373 static __inline__ gmx_cycles_t gmx_cycles_read(void)
375 /* Metrowerks on macintosh */
376 unsigned int long low, high1, high2;
379 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
380 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
381 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
383 while (high1 != high2);
385 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
387 #elif defined(__sun) && defined(__sparcv9)
389 static __inline__ gmx_cycles_t gmx_cycles_read(void)
392 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
396 #elif defined(_CRAYC)
397 #include <intrinsics.h>
399 static __inline gmx_cycles_t gmx_cycles_read(void)
404 static gmx_cycles_t gmx_cycles_read(void)
411 /*! \brief Check if high-resolution cycle counters are available
413 * Not all architectures provide any way to read timestep counters
414 * in the CPU, and on some it is broken. Although we refer to it
415 * as cycle counters, it is not necessarily given in units of
418 * If you notice that system is missing, implement support for it,
419 * find out how to detect the system during preprocessing, and send us a
422 * \return 1 if cycle counters are available, 0 if not.
424 * \note This functions not need to be in the header for performance
425 * reasons, but it is very important that we get exactly the
426 * same detection as for gmx_cycles_read() routines. If you
427 * compile the library with one compiler, and then use a different
428 * one when later linking to the library it might happen that the
429 * library supports cyclecounters but not the headers, or vice versa.
431 #if (GMX_CYCLECOUNTERS == 0)
432 static __inline__ bool gmx_cycles_have_counter(void)
436 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
437 (defined(__i386__) || defined(__x86_64__)))
438 static __inline__ bool gmx_cycles_have_counter()
440 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
443 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
444 static __inline bool gmx_cycles_have_counter(void)
446 /* 64-bit ARM cycle counters with GCC inline assembly */
449 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
450 static __inline bool gmx_cycles_have_counter(void)
452 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
453 * the performance registers have been made available from user space by a kernel module -
454 * otherwise it returns 0.
458 c0 = gmx_cycles_read();
459 c1 = gmx_cycles_read();
461 /* if both counters return 0, support is not present */
462 return (c0 != 0 || c1 != 0);
464 #elif (defined(_MSC_VER))
465 static __inline bool gmx_cycles_have_counter(void)
469 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
470 static inline bool gmx_cycles_have_counter(void)
472 /* HP compiler on ia64, use special instruction to read ITC */
475 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
476 static __inline__ bool gmx_cycles_have_counter(void)
478 /* Intel compiler on ia64, use special instruction to read ITC */
481 #elif defined(__GNUC__) && defined(__ia64__)
482 static __inline__ bool gmx_cycles_have_counter(void)
484 /* AMD64 with GCC inline assembly - TSC register */
487 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
488 static __inline__ bool gmx_cycles_have_counter(void)
490 /* HP PA-RISC, inline asm with gcc */
493 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
494 static inline bool gmx_cycles_have_counter(void)
496 /* HP PA-RISC, instruction when using HP compiler */
499 #elif defined(__GNUC__) && defined(__s390__)
500 static __inline__ bool gmx_cycles_have_counter(void)
502 /* S390, taken from FFTW who got it from James Treacy */
505 #elif defined(__GNUC__) && defined(__alpha__)
506 static __inline__ bool gmx_cycles_have_counter(void)
508 /* gcc inline assembly on alpha CPUs */
511 #elif defined(__GNUC__) && defined(__sparc_v9__)
512 static __inline__ bool gmx_cycles_have_counter(void)
514 /* gcc inline assembly on sparc v9 */
517 #elif defined(__DECC) && defined(__alpha)
518 static __inline bool gmx_cycles_have_counter(void)
520 /* Digital GEM C compiler on alpha */
523 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
524 static __inline bool gmx_cycles_have_counter(void)
526 /* Irix compilers on SGI hardware */
529 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
530 static inline bool gmx_cycles_have_counter(void)
532 /* Solaris high-resolution timers */
535 #elif defined(__xlC__) && defined (_AIX)
536 static inline bool gmx_cycles_have_counter(void)
541 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
542 ( defined(__powerpc__) || defined(__ppc__) ) )
543 static __inline__ bool gmx_cycles_have_counter(void)
545 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
548 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
549 static __inline__ bool gmx_cycles_have_counter(void)
551 /* Metrowerks on macintosh */
554 #elif defined(__sun) && defined(__sparcv9)
556 static __inline__ bool gmx_cycles_have_counter(void)
558 /* Solaris on SPARC*/
562 static bool gmx_cycles_have_counter(void)
564 /* No cycle counter that we know of on this system */
570 /*! \brief Calculate number of seconds per cycle tick on host
572 * This routine runs a timer loop to calibrate the number of
573 * seconds per the units returned fro gmx_cycles_read().
575 * \param sampletime Minimum real sample time. It takes some trial-and-error
576 * to find the correct delay loop size, so the total runtime of
577 * this routine is about twice this time.
578 * \return Number of seconds per cycle unit. If it is not possible to
579 * calculate on this system (for whatever reason) the return value
580 * will be -1, so check that it is positive before using it.
583 gmx_cycles_calibrate(double sampletime);