2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "cyclecounter.h"
45 #ifdef HAVE_SYS_TIME_H
46 # include <sys/time.h>
52 #include "gromacs/utility/basedefinitions.h"
54 /*! \brief Calculate number of seconds per cycle tick on host
56 * This routine runs a timer loop to calibrate the number of
57 * seconds per the units returned fro gmx_cycles_read().
59 * \param sampletime Minimum real sample time. It takes some trial-and-error
60 * to find the correct delay loop size, so the total runtime of
61 * this routine is about twice this time.
62 * \return Number of seconds per cycle unit. If it is not possible to
63 * calculate on this system (for whatever reason) the return value
64 * will be -1, so check that it is positive before using it.
66 double gmx_cycles_calibrate(double sampletime)
68 /* On ARM and recent-generation x86-64, we can use the more accurate cycle counters
69 * that allow better timing for things that depend on it (e.g. load balancing, profiling).
71 #if ((defined __aarch64__) \
72 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
73 /* 64-bit ARM cycle counters with GCC inline assembly */
75 __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(cycles));
76 /* Only first 32 bits are significant */
79 GMX_UNUSED_VALUE(sampletime);
81 # if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
82 && defined(__x86_64__) && !defined(__ILP32__) && !defined(_CRAYC))
85 int gmx_unused cpuid2;
90 /* cpuid clobbers ebx but it must be restored for -fPIC so save
96 : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
101 /* This CPU is recent enough so the timer frequency can be directly queried */
106 : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
109 cycles = static_cast<gmx_cycles_t>(cpuid1) * static_cast<gmx_cycles_t>(1000000);
115 /* Windows does not have gettimeofday, but it provides a special
116 * routine that returns the cycle counter frequency.
120 QueryPerformanceFrequency(&i);
122 return 1.0 / static_cast<double>(i.QuadPart);
123 /* end of MS Windows implementation */
125 # elif HAVE_GETTIMEOFDAY
127 /* generic implementation with gettimeofday() */
128 struct timeval t1, t2;
130 double timediff, cyclediff;
131 double d = 0.1; /* Dummy variable so we don't optimize away delay loop */
133 if (!gmx_cycles_have_counter())
138 # if (defined(__alpha__) || defined(__alpha))
139 /* Alpha cannot count to more than 4e9, but I don't expect
140 * that the architecture will go over 2GHz before it dies, so
141 * up to 2.0 seconds of sampling should be safe.
143 if (sampletime > 2.0)
149 /* Start a timing loop. We want this to be largely independent
150 * of machine speed, so we need to start with a very small number
151 * of iterations and repeat it until we reach the requested time.
153 * We call gettimeofday an extra time at the start to avoid cache misses.
155 gettimeofday(&t1, nullptr);
156 gettimeofday(&t1, nullptr);
157 c1 = gmx_cycles_read();
161 /* just a delay loop. To avoid optimizing it away, we calculate a number
162 * that will underflow to zero in most cases. By conditionally adding it
163 * to a result at the end it cannot be removed. n=10000 is arbitrary...
165 for (int i = 0; i < 10000; i++)
167 d = d / (1.0 + static_cast<double>(i));
169 /* Read the time again */
170 gettimeofday(&t2, nullptr);
171 c2 = gmx_cycles_read();
172 timediff = static_cast<double>(t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec) * 1e-6;
173 } while (timediff < sampletime);
177 /* Add a very small result so the delay loop cannot be optimized away */
183 /* Return seconds per cycle */
184 return timediff / cyclediff;
187 /* No timing function available */
189 GMX_UNUSED_VALUE(sampletime);