2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * Define HAVE_RDTSCP=1 to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
63 } /* fixes auto-indentation problems */
66 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
67 (defined(__i386__) || defined(__x86_64__)))
68 /* x86 or x86-64 with GCC inline assembly */
69 typedef unsigned long long
72 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
73 /* 64-bit ARM cycle counters with GCC inline assembly */
74 typedef unsigned long long
77 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
78 /* Armv7A can provide 64-bit cycles by returning two registers */
79 typedef unsigned long long
82 #elif defined(_MSC_VER)
87 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
88 /* HP compiler on ia64 */
89 #include <machine/sys/inline.h>
93 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
94 /* Intel compiler on ia64 */
95 #include <ia64intrin.h>
99 #elif defined(__GNUC__) && defined(__ia64__)
100 /* ia64 with GCC inline assembly */
101 typedef unsigned long
104 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
105 /* HP PA-RISC, inline asm with gcc */
106 typedef unsigned long
109 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
110 /* HP PA-RISC, instruction when using HP compiler */
111 #include <machine/inline.h>
112 typedef unsigned long
115 #elif defined(__GNUC__) && defined(__s390__)
116 /* S390, taken from FFTW who got it from James Treacy */
117 typedef unsigned long long
120 #elif defined(__GNUC__) && defined(__alpha__)
121 /* gcc inline assembly on alpha CPUs */
122 typedef unsigned long
125 #elif defined(__GNUC__) && defined(__sparc_v9__)
126 /* gcc inline assembly on sparc v9 */
127 typedef unsigned long
130 #elif defined(__DECC) && defined(__alpha)
131 /* Digital GEM C compiler on alpha */
133 typedef unsigned long
136 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
137 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
138 typedef unsigned long long
141 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
142 /* Solaris high-resolution timers */
146 #elif defined(__xlC__) && defined (_AIX)
148 #include <sys/systemcfg.h>
149 #include <sys/time.h>
150 typedef unsigned long long
153 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
154 ( defined(__powerpc__) || defined(__ppc__) ) )
155 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
156 typedef unsigned long long
159 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
160 /* Metrowerks on macintosh */
161 typedef unsigned long long
164 #elif defined(__sun) && defined(__sparcv9)
166 typedef unsigned long
170 /*! \brief Integer-like datatype for cycle counter values
172 * Depending on your system this will usually be something like long long,
173 * or a special cycle datatype from the system header files. It is NOT
174 * necessarily real processor cycles - many systems count in nanoseconds
175 * or a special external time register at fixed frequency (not the CPU freq.)
177 * You can subtract or add gmx_cycle_t types just as normal integers, and if
178 * you run the calibration routine you can also multiply it with a factor to
179 * translate the cycle data to seconds.
186 /*! \brief Read CPU cycle counter
188 * This routine returns an abstract datatype containing a
189 * cycle counter timestamp.
191 * \return Opaque data corresponding to a cycle reading.
193 * Please note that on most systems it takes several cycles
194 * to read and return the cycle counters. If you are measuring
195 * small intervals, you can compensate for this time by calling
196 * the routine twice and calculating what the difference is.
197 * Subtract this from your other measurements to get an accurate result.
199 * Use gmx_cycles_difference() to get a real number corresponding to
200 * the difference between two gmx_cycles_t values returned from this
203 #if (GMX_CYCLECOUNTERS == 0)
204 static __inline__ gmx_cycles_t gmx_cycles_read(void)
208 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
209 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
210 static __inline__ gmx_cycles_t gmx_cycles_read(void)
212 /* x86 with GCC inline assembly - pentium TSC register */
217 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
219 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
222 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
226 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
227 static __inline__ gmx_cycles_t gmx_cycles_read(void)
229 /* 64-bit ARM cycle counters with GCC inline assembly */
231 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r" (cycle) );
235 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
236 static __inline__ gmx_cycles_t gmx_cycles_read(void)
238 unsigned int cycles_lo, cycles_hi;
239 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cycles_lo), "=r" (cycles_hi));
240 return ((gmx_cycles_t)cycles_lo) | (((gmx_cycles_t)cycles_hi) << 32);
242 #elif defined(_MSC_VER)
243 static __inline gmx_cycles_t gmx_cycles_read(void)
246 /* Windows on 64-bit ARM */
247 return __rdpmccntr64();
252 return __rdtscp(&ui);
258 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
259 static inline gmx_cycles_t gmx_cycles_read(void)
261 /* HP compiler on ia64 */
263 ret = _Asm_mov_from_ar (_AREG_ITC);
266 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
267 static __inline__ gmx_cycles_t gmx_cycles_read(void)
269 /* Intel compiler on ia64 */
270 return __getReg(_IA64_REG_AR_ITC);
272 #elif defined(__GNUC__) && defined(__ia64__)
273 static __inline__ gmx_cycles_t gmx_cycles_read(void)
275 /* ia64 with GCC inline assembly */
277 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
280 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
281 static __inline__ gmx_cycles_t gmx_cycles_read(void)
283 /* HP PA-RISC, inline asm with gcc */
285 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
286 /* no input, nothing else clobbered */
289 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
290 static inline gmx_cycles_t gmx_cycles_read(void)
292 /* HP PA-RISC, instruction when using HP compiler */
297 #elif defined(__GNUC__) && defined(__s390__)
298 static __inline__ gmx_cycles_t gmx_cycles_read(void)
300 /* S390, taken from FFTW who got it from James Treacy */
302 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
305 #elif defined(__GNUC__) && defined(__alpha__)
306 static __inline__ gmx_cycles_t gmx_cycles_read(void)
308 /* gcc inline assembly on alpha CPUs */
310 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
311 return (cycle & 0xFFFFFFFF);
313 #elif defined(__GNUC__) && defined(__sparc_v9__)
314 static __inline__ gmx_cycles_t gmx_cycles_read(void)
316 /* gcc inline assembly on sparc v9 */
318 __asm__("rd %%tick, %0" : "=r" (ret));
321 #elif defined(__DECC) && defined(__alpha)
322 static __inline gmx_cycles_t gmx_cycles_read(void)
324 /* Digital GEM C compiler on alpha */
326 cycle = asm ("rpcc %v0");
327 return (cycle & 0xFFFFFFFF);
329 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
330 static __inline gmx_cycles_t gmx_cycles_read(void)
332 /* Irix compilers on SGI hardware */
334 clock_gettime(CLOCK_SGI_CYCLE, &t);
335 /* Return the number of nanoseconds, so we can subtract/add */
336 return ((unsigned long long)t.tv_sec)*1000000000+
337 (unsigned long long)t.tv_nsec;
339 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
340 static inline gmx_cycles_t gmx_cycles_read(void)
342 /* Solaris high-resolution timers */
345 #elif defined(__xlC__) && defined (_AIX)
346 static inline gmx_cycles_t gmx_cycles_read(void)
348 /* AIX compilers. Inline the calculation instead of using library functions */
350 read_real_time(&t1, TIMEBASE_SZ);
351 /* POWER returns real time (seconds + nanoseconds),
352 * POWER_PC returns high/low 32 bits of a counter.
354 if (t1.flag == RTC_POWER_PC)
356 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
360 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
363 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
364 ( defined(__powerpc__) || defined(__ppc__) ) )
365 static __inline__ gmx_cycles_t gmx_cycles_read(void)
367 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc, and clang) */
368 unsigned long low, high1, high2;
371 // clang 3.7 incorrectly warns that mftb* are
372 // deprecated. That's not correct - see
373 // https://llvm.org/bugs/show_bug.cgi?id=23680.
374 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
375 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
376 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
378 while (high1 != high2);
380 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
382 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
383 static __inline__ gmx_cycles_t gmx_cycles_read(void)
385 /* Metrowerks on macintosh */
386 unsigned int long low, high1, high2;
389 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
390 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
391 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
393 while (high1 != high2);
395 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
397 #elif defined(__sun) && defined(__sparcv9)
399 static __inline__ gmx_cycles_t gmx_cycles_read(void)
402 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
406 #elif defined(_CRAYC)
407 #include <intrinsics.h>
409 static __inline gmx_cycles_t gmx_cycles_read(void)
414 static gmx_cycles_t gmx_cycles_read(void)
421 /*! \brief Check if high-resolution cycle counters are available
423 * Not all architectures provide any way to read timestep counters
424 * in the CPU, and on some it is broken. Although we refer to it
425 * as cycle counters, it is not necessarily given in units of
428 * If you notice that system is missing, implement support for it,
429 * find out how to detect the system during preprocessing, and send us a
432 * \return 1 if cycle counters are available, 0 if not.
434 * \note This functions not need to be in the header for performance
435 * reasons, but it is very important that we get exactly the
436 * same detection as for gmx_cycles_read() routines. If you
437 * compile the library with one compiler, and then use a different
438 * one when later linking to the library it might happen that the
439 * library supports cyclecounters but not the headers, or vice versa.
441 #if (GMX_CYCLECOUNTERS == 0)
442 static __inline__ bool gmx_cycles_have_counter(void)
446 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
447 (defined(__i386__) || defined(__x86_64__)))
448 static __inline__ bool gmx_cycles_have_counter(void)
450 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
453 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
454 static __inline bool gmx_cycles_have_counter(void)
456 /* 64-bit ARM cycle counters with GCC inline assembly */
459 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
460 static __inline bool gmx_cycles_have_counter(void)
462 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
463 * the performance registers have been made available from user space by a kernel module -
464 * otherwise it returns 0.
468 c0 = gmx_cycles_read();
469 c1 = gmx_cycles_read();
471 /* if both counters return 0, support is not present */
472 return (c0 != 0 || c1 != 0);
474 #elif (defined(_MSC_VER))
475 static __inline bool gmx_cycles_have_counter(void)
479 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
480 static inline bool gmx_cycles_have_counter(void)
482 /* HP compiler on ia64, use special instruction to read ITC */
485 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
486 static __inline__ bool gmx_cycles_have_counter(void)
488 /* Intel compiler on ia64, use special instruction to read ITC */
491 #elif defined(__GNUC__) && defined(__ia64__)
492 static __inline__ bool gmx_cycles_have_counter(void)
494 /* AMD64 with GCC inline assembly - TSC register */
497 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
498 static __inline__ bool gmx_cycles_have_counter(void)
500 /* HP PA-RISC, inline asm with gcc */
503 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
504 static inline bool gmx_cycles_have_counter(void)
506 /* HP PA-RISC, instruction when using HP compiler */
509 #elif defined(__GNUC__) && defined(__s390__)
510 static __inline__ bool gmx_cycles_have_counter(void)
512 /* S390, taken from FFTW who got it from James Treacy */
515 #elif defined(__GNUC__) && defined(__alpha__)
516 static __inline__ bool gmx_cycles_have_counter(void)
518 /* gcc inline assembly on alpha CPUs */
521 #elif defined(__GNUC__) && defined(__sparc_v9__)
522 static __inline__ bool gmx_cycles_have_counter(void)
524 /* gcc inline assembly on sparc v9 */
527 #elif defined(__DECC) && defined(__alpha)
528 static __inline bool gmx_cycles_have_counter(void)
530 /* Digital GEM C compiler on alpha */
533 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
534 static __inline bool gmx_cycles_have_counter(void)
536 /* Irix compilers on SGI hardware */
539 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
540 static inline bool gmx_cycles_have_counter(void)
542 /* Solaris high-resolution timers */
545 #elif defined(__xlC__) && defined (_AIX)
546 static inline bool gmx_cycles_have_counter(void)
551 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
552 ( defined(__powerpc__) || defined(__ppc__) ) )
553 static __inline__ bool gmx_cycles_have_counter(void)
555 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
558 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
559 static __inline__ bool gmx_cycles_have_counter(void)
561 /* Metrowerks on macintosh */
564 #elif defined(__sun) && defined(__sparcv9)
566 static __inline__ bool gmx_cycles_have_counter(void)
568 /* Solaris on SPARC*/
572 static bool gmx_cycles_have_counter(void)
574 /* No cycle counter that we know of on this system */
580 /*! \brief Calculate number of seconds per cycle tick on host
582 * This routine runs a timer loop to calibrate the number of
583 * seconds per the units returned fro gmx_cycles_read().
585 * \param sampletime Minimum real sample time. It takes some trial-and-error
586 * to find the correct delay loop size, so the total runtime of
587 * this routine is about twice this time.
588 * \return Number of seconds per cycle unit. If it is not possible to
589 * calculate on this system (for whatever reason) the return value
590 * will be -1, so check that it is positive before using it.
593 gmx_cycles_calibrate(double sampletime);