2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
65 } /* fixes auto-indentation problems */
68 /* Minor implementation note:
70 * I like to use these counters in other programs too, so to avoid making
71 * it dependent on other Gromacs definitions I use the #ifdef's to set
72 * architecture-specific inline macros instead of using gmx_inline from
73 * gmx_types.h /Erik 2005-12-10
76 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
77 (defined(__i386__) || defined(__x86_64__)))
78 /* x86 or x86-64 with GCC inline assembly */
79 typedef unsigned long long
82 #elif defined(_MSC_VER)
87 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
88 /* HP compiler on ia64 */
89 #include <machine/sys/inline.h>
93 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
94 /* Intel compiler on ia64 */
95 #include <ia64intrin.h>
99 #elif defined(__GNUC__) && defined(__ia64__)
100 /* ia64 with GCC inline assembly */
101 typedef unsigned long
104 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
105 /* HP PA-RISC, inline asm with gcc */
106 typedef unsigned long
109 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
110 /* HP PA-RISC, instruction when using HP compiler */
111 #include <machine/inline.h>
112 typedef unsigned long
115 #elif defined(__GNUC__) && defined(__s390__)
116 /* S390, taken from FFTW who got it from James Treacy */
117 typedef unsigned long long
120 #elif defined(__GNUC__) && defined(__alpha__)
121 /* gcc inline assembly on alpha CPUs */
122 typedef unsigned long
125 #elif defined(__GNUC__) && defined(__sparc_v9__)
126 /* gcc inline assembly on sparc v9 */
127 typedef unsigned long
130 #elif defined(__DECC) && defined(__alpha)
131 /* Digital GEM C compiler on alpha */
133 typedef unsigned long
136 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
137 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
138 typedef unsigned long long
141 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
142 /* Solaris high-resolution timers */
146 #elif defined(__xlC__) && defined (_AIX)
148 #include <sys/time.h>
149 #include <sys/systemcfg.h>
150 typedef unsigned long long
153 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
154 ( defined(__powerpc__) || defined(__ppc__) ) )
155 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
156 typedef unsigned long long
159 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
160 /* Metrowerks on macintosh */
161 typedef unsigned long long
164 #elif defined(__sun) && defined(__sparcv9)
166 typedef unsigned long
170 /*! \brief Integer-like datatype for cycle counter values
172 * Depending on your system this will usually be something like long long,
173 * or a special cycle datatype from the system header files. It is NOT
174 * necessarily real processor cycles - many systems count in nanoseconds
175 * or a special external time register at fixed frequency (not the CPU freq.)
177 * You can subtract or add gmx_cycle_t types just as normal integers, and if
178 * you run the calibration routine you can also multiply it with a factor to
179 * translate the cycle data to seconds.
186 /*! \brief Check if high-resolution cycle counters are available
188 * Not all architectures provide any way to read timestep counters
189 * in the CPU, and on some it is broken. Although we refer to it
190 * as cycle counters, it is not necessarily given in units of
193 * If you notice that system is missing, implement support for it,
194 * find out how to detect the system during preprocessing, and send us a
197 * \return 1 if cycle counters are available, 0 if not.
199 * \note This functions not need to be in the header for performance
200 * reasons, but it is very important that we get exactly the
201 * same detection as for gmx_cycles_read() routines. If you
202 * compile the library with one compiler, and then use a different
203 * one when later linking to the library it might happen that the
204 * library supports cyclecounters but not the headers, or vice versa.
206 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
207 (defined(__i386__) || defined(__x86_64__)))
208 static __inline__ int gmx_cycles_have_counter(void)
210 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
213 #elif (defined(_MSC_VER))
214 static __inline int gmx_cycles_have_counter(void)
218 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
219 static inline int gmx_cycles_have_counter(void)
221 /* HP compiler on ia64, use special instruction to read ITC */
224 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
225 static __inline__ int gmx_cycles_have_counter(void)
227 /* Intel compiler on ia64, use special instruction to read ITC */
230 #elif defined(__GNUC__) && defined(__ia64__)
231 static __inline__ int gmx_cycles_have_counter(void)
233 /* AMD64 with GCC inline assembly - TSC register */
236 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
237 static __inline__ int gmx_cycles_have_counter(void)
239 /* HP PA-RISC, inline asm with gcc */
242 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
243 static inline int gmx_cycles_have_counter(void)
245 /* HP PA-RISC, instruction when using HP compiler */
248 #elif defined(__GNUC__) && defined(__s390__)
249 static __inline__ int gmx_cycles_have_counter(void)
251 /* S390, taken from FFTW who got it from James Treacy */
254 #elif defined(__GNUC__) && defined(__alpha__)
255 static __inline__ int gmx_cycles_have_counter(void)
257 /* gcc inline assembly on alpha CPUs */
260 #elif defined(__GNUC__) && defined(__sparc_v9__)
261 static __inline__ int gmx_cycles_have_counter(void)
263 /* gcc inline assembly on sparc v9 */
266 #elif defined(__DECC) && defined(__alpha)
267 static __inline int gmx_cycles_have_counter(void)
269 /* Digital GEM C compiler on alpha */
272 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
273 static __inline int gmx_cycles_have_counter(void)
275 /* Irix compilers on SGI hardware */
278 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
279 static inline int gmx_cycles_have_counter(void)
281 /* Solaris high-resolution timers */
284 #elif defined(__xlC__) && defined (_AIX)
285 static inline int gmx_cycles_have_counter(void)
290 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
291 ( defined(__powerpc__) || defined(__ppc__) ) )
292 static __inline__ int gmx_cycles_have_counter(void)
294 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
297 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
298 static __inline__ int gmx_cycles_have_counter(void)
300 /* Metrowerks on macintosh */
303 #elif defined(__sun) && defined(__sparcv9)
305 static __inline__ int gmx_cycles_have_counter(void)
307 /* Solaris on SPARC*/
311 static int gmx_cycles_have_counter(void)
313 /* No cycle counter that we know of on this system */
318 /*! \brief Read CPU cycle counter
320 * This routine returns an abstract datatype containing a
321 * cycle counter timestamp.
323 * \return Opaque data corresponding to a cycle reading.
325 * Please note that on most systems it takes several cycles
326 * to read and return the cycle counters. If you are measuring
327 * small intervals, you can compensate for this time by calling
328 * the routine twice and calculating what the difference is.
329 * Subtract this from your other measurements to get an accurate result.
331 * Use gmx_cycles_difference() to get a real number corresponding to
332 * the difference between two gmx_cycles_t values returned from this
335 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
336 (defined(__i386__) || defined(__x86_64__)))
337 static __inline__ gmx_cycles_t gmx_cycles_read(void)
339 /* x86 with GCC inline assembly - pentium TSC register */
344 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
346 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
349 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
353 #elif defined(_MSC_VER)
354 static __inline gmx_cycles_t gmx_cycles_read(void)
358 return __rdtscp(&ui);
363 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
364 static inline gmx_cycles_t gmx_cycles_read(void)
366 /* HP compiler on ia64 */
368 ret = _Asm_mov_from_ar (_AREG_ITC);
371 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
372 static __inline__ gmx_cycles_t gmx_cycles_read(void)
374 /* Intel compiler on ia64 */
375 return __getReg(_IA64_REG_AR_ITC);
377 #elif defined(__GNUC__) && defined(__ia64__)
378 static __inline__ gmx_cycles_t gmx_cycles_read(void)
380 /* ia64 with GCC inline assembly */
382 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
385 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
386 static __inline__ gmx_cycles_t gmx_cycles_read(void)
388 /* HP PA-RISC, inline asm with gcc */
390 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
391 /* no input, nothing else clobbered */
394 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
395 static inline gmx_cycles_t gmx_cycles_read(void)
397 /* HP PA-RISC, instruction when using HP compiler */
402 #elif defined(__GNUC__) && defined(__s390__)
403 static __inline__ gmx_cycles_t gmx_cycles_read(void)
405 /* S390, taken from FFTW who got it from James Treacy */
407 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
410 #elif defined(__GNUC__) && defined(__alpha__)
411 static __inline__ gmx_cycles_t gmx_cycles_read(void)
413 /* gcc inline assembly on alpha CPUs */
415 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
416 return (cycle & 0xFFFFFFFF);
418 #elif defined(__GNUC__) && defined(__sparc_v9__)
419 static __inline__ gmx_cycles_t gmx_cycles_read(void)
421 /* gcc inline assembly on sparc v9 */
423 __asm__("rd %%tick, %0" : "=r" (ret));
426 #elif defined(__DECC) && defined(__alpha)
427 static __inline gmx_cycles_t gmx_cycles_read(void)
429 /* Digital GEM C compiler on alpha */
431 cycle = asm ("rpcc %v0");
432 return (cycle & 0xFFFFFFFF);
434 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
435 static __inline gmx_cycles_t gmx_cycles_read(void)
437 /* Irix compilers on SGI hardware */
439 clock_gettime(CLOCK_SGI_CYCLE, &t);
440 /* Return the number of nanoseconds, so we can subtract/add */
441 return ((unsigned long long)t.tv_sec)*1000000000+
442 (unsigned long long)t.tv_nsec;
444 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
445 static inline gmx_cycles_t gmx_cycles_read(void)
447 /* Solaris high-resolution timers */
450 #elif defined(__xlC__) && defined (_AIX)
451 static inline gmx_cycles_t gmx_cycles_read(void)
453 /* AIX compilers. Inline the calculation instead of using library functions */
455 read_real_time(&t1, TIMEBASE_SZ);
456 /* POWER returns real time (seconds + nanoseconds),
457 * POWER_PC returns high/low 32 bits of a counter.
459 if (t1.flag == RTC_POWER_PC)
461 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
465 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
468 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
469 ( defined(__powerpc__) || defined(__ppc__) ) )
470 static __inline__ gmx_cycles_t gmx_cycles_read(void)
472 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
473 unsigned long low, high1, high2;
476 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
477 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
478 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
480 while (high1 != high2);
482 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
484 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
485 static __inline__ gmx_cycles_t gmx_cycles_read(void)
487 /* Metrowerks on macintosh */
488 unsigned int long low, high1, high2;
491 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
492 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
493 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
495 while (high1 != high2);
497 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
499 #elif defined(__sun) && defined(__sparcv9)
501 static __inline__ gmx_cycles_t gmx_cycles_read(void)
504 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
509 static gmx_cycles_t gmx_cycles_read(void)
515 /*! \brief Calculate number of seconds per cycle tick on host
517 * This routine runs a timer loop to calibrate the number of
518 * seconds per the units returned fro gmx_cycles_read().
520 * \param sampletime Minimum real sample time. It takes some trial-and-error
521 * to find the correct delay loop size, so the total runtime of
522 * this routine is about twice this time.
523 * \return Number of seconds per cycle unit. If it is not possible to
524 * calculate on this system (for whatever reason) the return value
525 * will be -1, so check that it is positive before using it.
528 gmx_cycles_calibrate(double sampletime);