2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
63 } /* fixes auto-indentation problems */
66 /* Minor implementation note:
68 * I like to use these counters in other programs too, so to avoid making
69 * it dependent on other Gromacs definitions I use the #ifdef's to set
70 * architecture-specific inline macros instead of using gmx_inline from
71 * gmx_types.h /Erik 2005-12-10
74 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
75 (defined(__i386__) || defined(__x86_64__)))
76 /* x86 or x86-64 with GCC inline assembly */
77 typedef unsigned long long
80 #elif defined(_MSC_VER)
85 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
86 /* HP compiler on ia64 */
87 #include <machine/sys/inline.h>
91 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
92 /* Intel compiler on ia64 */
93 #include <ia64intrin.h>
97 #elif defined(__GNUC__) && defined(__ia64__)
98 /* ia64 with GCC inline assembly */
102 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
103 /* HP PA-RISC, inline asm with gcc */
104 typedef unsigned long
107 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
108 /* HP PA-RISC, instruction when using HP compiler */
109 #include <machine/inline.h>
110 typedef unsigned long
113 #elif defined(__GNUC__) && defined(__s390__)
114 /* S390, taken from FFTW who got it from James Treacy */
115 typedef unsigned long long
118 #elif defined(__GNUC__) && defined(__alpha__)
119 /* gcc inline assembly on alpha CPUs */
120 typedef unsigned long
123 #elif defined(__GNUC__) && defined(__sparc_v9__)
124 /* gcc inline assembly on sparc v9 */
125 typedef unsigned long
128 #elif defined(__DECC) && defined(__alpha)
129 /* Digital GEM C compiler on alpha */
131 typedef unsigned long
134 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
135 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
136 typedef unsigned long long
139 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
140 /* Solaris high-resolution timers */
144 #elif defined(__xlC__) && defined (_AIX)
146 #include <sys/time.h>
147 #include <sys/systemcfg.h>
148 typedef unsigned long long
151 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
152 ( defined(__powerpc__) || defined(__ppc__) ) )
153 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
154 typedef unsigned long long
157 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
158 /* Metrowerks on macintosh */
159 typedef unsigned long long
162 #elif defined(__sun) && defined(__sparcv9)
164 typedef unsigned long
168 /*! \brief Integer-like datatype for cycle counter values
170 * Depending on your system this will usually be something like long long,
171 * or a special cycle datatype from the system header files. It is NOT
172 * necessarily real processor cycles - many systems count in nanoseconds
173 * or a special external time register at fixed frequency (not the CPU freq.)
175 * You can subtract or add gmx_cycle_t types just as normal integers, and if
176 * you run the calibration routine you can also multiply it with a factor to
177 * translate the cycle data to seconds.
184 /*! \brief Check if high-resolution cycle counters are available
186 * Not all architectures provide any way to read timestep counters
187 * in the CPU, and on some it is broken. Although we refer to it
188 * as cycle counters, it is not necessarily given in units of
191 * If you notice that system is missing, implement support for it,
192 * find out how to detect the system during preprocessing, and send us a
195 * \return 1 if cycle counters are available, 0 if not.
197 * \note This functions not need to be in the header for performance
198 * reasons, but it is very important that we get exactly the
199 * same detection as for gmx_cycles_read() routines. If you
200 * compile the library with one compiler, and then use a different
201 * one when later linking to the library it might happen that the
202 * library supports cyclecounters but not the headers, or vice versa.
204 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
205 (defined(__i386__) || defined(__x86_64__)))
206 static __inline__ int gmx_cycles_have_counter(void)
208 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
211 #elif (defined(_MSC_VER))
212 static __inline int gmx_cycles_have_counter(void)
216 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
217 static inline int gmx_cycles_have_counter(void)
219 /* HP compiler on ia64, use special instruction to read ITC */
222 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
223 static __inline__ int gmx_cycles_have_counter(void)
225 /* Intel compiler on ia64, use special instruction to read ITC */
228 #elif defined(__GNUC__) && defined(__ia64__)
229 static __inline__ int gmx_cycles_have_counter(void)
231 /* AMD64 with GCC inline assembly - TSC register */
234 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
235 static __inline__ int gmx_cycles_have_counter(void)
237 /* HP PA-RISC, inline asm with gcc */
240 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
241 static inline int gmx_cycles_have_counter(void)
243 /* HP PA-RISC, instruction when using HP compiler */
246 #elif defined(__GNUC__) && defined(__s390__)
247 static __inline__ int gmx_cycles_have_counter(void)
249 /* S390, taken from FFTW who got it from James Treacy */
252 #elif defined(__GNUC__) && defined(__alpha__)
253 static __inline__ int gmx_cycles_have_counter(void)
255 /* gcc inline assembly on alpha CPUs */
258 #elif defined(__GNUC__) && defined(__sparc_v9__)
259 static __inline__ int gmx_cycles_have_counter(void)
261 /* gcc inline assembly on sparc v9 */
264 #elif defined(__DECC) && defined(__alpha)
265 static __inline int gmx_cycles_have_counter(void)
267 /* Digital GEM C compiler on alpha */
270 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
271 static __inline int gmx_cycles_have_counter(void)
273 /* Irix compilers on SGI hardware */
276 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
277 static inline int gmx_cycles_have_counter(void)
279 /* Solaris high-resolution timers */
282 #elif defined(__xlC__) && defined (_AIX)
283 static inline int gmx_cycles_have_counter(void)
288 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
289 ( defined(__powerpc__) || defined(__ppc__) ) )
290 static __inline__ int gmx_cycles_have_counter(void)
292 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
295 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
296 static __inline__ int gmx_cycles_have_counter(void)
298 /* Metrowerks on macintosh */
301 #elif defined(__sun) && defined(__sparcv9)
303 static __inline__ int gmx_cycles_have_counter(void)
305 /* Solaris on SPARC*/
309 static int gmx_cycles_have_counter(void)
311 /* No cycle counter that we know of on this system */
316 /*! \brief Read CPU cycle counter
318 * This routine returns an abstract datatype containing a
319 * cycle counter timestamp.
321 * \return Opaque data corresponding to a cycle reading.
323 * Please note that on most systems it takes several cycles
324 * to read and return the cycle counters. If you are measuring
325 * small intervals, you can compensate for this time by calling
326 * the routine twice and calculating what the difference is.
327 * Subtract this from your other measurements to get an accurate result.
329 * Use gmx_cycles_difference() to get a real number corresponding to
330 * the difference between two gmx_cycles_t values returned from this
333 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
334 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
335 static __inline__ gmx_cycles_t gmx_cycles_read(void)
337 /* x86 with GCC inline assembly - pentium TSC register */
342 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
344 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
347 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
351 #elif defined(_MSC_VER)
352 static __inline gmx_cycles_t gmx_cycles_read(void)
356 return __rdtscp(&ui);
361 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
362 static inline gmx_cycles_t gmx_cycles_read(void)
364 /* HP compiler on ia64 */
366 ret = _Asm_mov_from_ar (_AREG_ITC);
369 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
370 static __inline__ gmx_cycles_t gmx_cycles_read(void)
372 /* Intel compiler on ia64 */
373 return __getReg(_IA64_REG_AR_ITC);
375 #elif defined(__GNUC__) && defined(__ia64__)
376 static __inline__ gmx_cycles_t gmx_cycles_read(void)
378 /* ia64 with GCC inline assembly */
380 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
383 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
384 static __inline__ gmx_cycles_t gmx_cycles_read(void)
386 /* HP PA-RISC, inline asm with gcc */
388 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
389 /* no input, nothing else clobbered */
392 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
393 static inline gmx_cycles_t gmx_cycles_read(void)
395 /* HP PA-RISC, instruction when using HP compiler */
400 #elif defined(__GNUC__) && defined(__s390__)
401 static __inline__ gmx_cycles_t gmx_cycles_read(void)
403 /* S390, taken from FFTW who got it from James Treacy */
405 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
408 #elif defined(__GNUC__) && defined(__alpha__)
409 static __inline__ gmx_cycles_t gmx_cycles_read(void)
411 /* gcc inline assembly on alpha CPUs */
413 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
414 return (cycle & 0xFFFFFFFF);
416 #elif defined(__GNUC__) && defined(__sparc_v9__)
417 static __inline__ gmx_cycles_t gmx_cycles_read(void)
419 /* gcc inline assembly on sparc v9 */
421 __asm__("rd %%tick, %0" : "=r" (ret));
424 #elif defined(__DECC) && defined(__alpha)
425 static __inline gmx_cycles_t gmx_cycles_read(void)
427 /* Digital GEM C compiler on alpha */
429 cycle = asm ("rpcc %v0");
430 return (cycle & 0xFFFFFFFF);
432 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
433 static __inline gmx_cycles_t gmx_cycles_read(void)
435 /* Irix compilers on SGI hardware */
437 clock_gettime(CLOCK_SGI_CYCLE, &t);
438 /* Return the number of nanoseconds, so we can subtract/add */
439 return ((unsigned long long)t.tv_sec)*1000000000+
440 (unsigned long long)t.tv_nsec;
442 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
443 static inline gmx_cycles_t gmx_cycles_read(void)
445 /* Solaris high-resolution timers */
448 #elif defined(__xlC__) && defined (_AIX)
449 static inline gmx_cycles_t gmx_cycles_read(void)
451 /* AIX compilers. Inline the calculation instead of using library functions */
453 read_real_time(&t1, TIMEBASE_SZ);
454 /* POWER returns real time (seconds + nanoseconds),
455 * POWER_PC returns high/low 32 bits of a counter.
457 if (t1.flag == RTC_POWER_PC)
459 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
463 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
466 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
467 ( defined(__powerpc__) || defined(__ppc__) ) )
468 static __inline__ gmx_cycles_t gmx_cycles_read(void)
470 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
471 unsigned long low, high1, high2;
474 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
475 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
476 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
478 while (high1 != high2);
480 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
482 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
483 static __inline__ gmx_cycles_t gmx_cycles_read(void)
485 /* Metrowerks on macintosh */
486 unsigned int long low, high1, high2;
489 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
490 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
491 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
493 while (high1 != high2);
495 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
497 #elif defined(__sun) && defined(__sparcv9)
499 static __inline__ gmx_cycles_t gmx_cycles_read(void)
502 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
506 #elif defined(_CRAYC)
507 #include <intrinsics.h>
509 static __inline gmx_cycles_t gmx_cycles_read(void)
514 static gmx_cycles_t gmx_cycles_read(void)
520 /*! \brief Calculate number of seconds per cycle tick on host
522 * This routine runs a timer loop to calibrate the number of
523 * seconds per the units returned fro gmx_cycles_read().
525 * \param sampletime Minimum real sample time. It takes some trial-and-error
526 * to find the correct delay loop size, so the total runtime of
527 * this routine is about twice this time.
528 * \return Number of seconds per cycle unit. If it is not possible to
529 * calculate on this system (for whatever reason) the return value
530 * will be -1, so check that it is positive before using it.
533 gmx_cycles_calibrate(double sampletime);