2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2008, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #ifndef GMX_TIMING_WALLCYCLE_H
39 #define GMX_TIMING_WALLCYCLE_H
41 /* NOTE: None of the routines here are safe to call within an OpenMP
50 #include "gromacs/timing/cyclecounter.h"
51 #include "gromacs/utility/basedefinitions.h"
52 #include "gromacs/utility/enumerationhelpers.h"
55 /*! \brief Enables consistency checking for the counters.
57 * If the macro is set to 1, code checks if you stop a counter different from the last
58 * one that was opened and if you do nest too deep.
60 # define DEBUG_WCYCLE 0
66 /*! \brief Enables consistency checking for the counters.
68 * If the macro is set to 1, code checks if you stop a counter different from the last
69 * one that was opened and if you do nest too deep.
71 # define DEBUG_WCYCLE 0
74 enum class WallCycleCounter : int
107 WaitGpuStatePropagatorData,
124 enum class WallCycleSubCounter : int
145 NonbondedFepReduction,
149 LaunchStatePropagatorData,
158 LaunchGpuUpdateConstrain,
163 static constexpr int sc_numWallCycleCounters = static_cast<int>(WallCycleCounter::Count);
164 static constexpr int sc_numWallCycleSubCounters = static_cast<int>(WallCycleSubCounter::Count);
165 static constexpr int sc_numWallCycleCountersSquared = sc_numWallCycleCounters * sc_numWallCycleCounters;
166 static constexpr bool sc_useCycleSubcounters = GMX_CYCLE_SUBCOUNTERS;
176 static constexpr int c_MaxWallCycleDepth = 6;
182 gmx::EnumerationArray<WallCycleCounter, wallcc_t> wcc;
183 /* did we detect one or more invalid cycle counts */
184 bool haveInvalidCount;
185 /* variables for testing/debugging */
187 std::vector<wallcc_t> wcc_all;
190 std::array<WallCycleCounter, c_MaxWallCycleDepth> counterlist;
194 WallCycleCounter ewc_prev;
195 gmx_cycles_t cycle_prev;
196 int64_t reset_counters;
198 gmx::EnumerationArray<WallCycleSubCounter, wallcc_t> wcsc;
201 //! Returns if cycle counting is supported
202 bool wallcycle_have_counter();
204 //! Returns the wall cycle structure.
205 std::unique_ptr<gmx_wallcycle> wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr);
207 //! Adds custom barrier for wallcycle counting.
208 void wallcycleBarrier(gmx_wallcycle* wc);
210 void wallcycle_sub_get(gmx_wallcycle* wc, WallCycleSubCounter ewcs, int* n, double* c);
211 /* Returns the cumulative count and sub cycle count for ewcs */
213 inline void wallcycle_all_start(gmx_wallcycle* wc, WallCycleCounter ewc, gmx_cycles_t cycle)
216 wc->cycle_prev = cycle;
219 inline void wallcycle_all_stop(gmx_wallcycle* wc, WallCycleCounter ewc, gmx_cycles_t cycle)
221 const int prev = static_cast<int>(wc->ewc_prev);
222 const int current = static_cast<int>(ewc);
223 wc->wcc_all[prev * sc_numWallCycleCounters + current].n += 1;
224 wc->wcc_all[prev * sc_numWallCycleCounters + current].c += cycle - wc->cycle_prev;
227 //! Starts the cycle counter (and increases the call count)
228 inline void wallcycle_start(gmx_wallcycle* wc, WallCycleCounter ewc)
235 wallcycleBarrier(wc);
238 debug_start_check(wc, ewc);
240 gmx_cycles_t cycle = gmx_cycles_read();
241 wc->wcc[ewc].start = cycle;
242 if (!wc->wcc_all.empty())
245 if (ewc == WallCycleCounter::Run)
247 wallcycle_all_start(wc, ewc, cycle);
249 else if (wc->wc_depth == 3)
251 wallcycle_all_stop(wc, ewc, cycle);
256 //! Starts the cycle counter without increasing the call count
257 inline void wallcycle_start_nocount(gmx_wallcycle* wc, WallCycleCounter ewc)
263 wallcycle_start(wc, ewc);
267 //! Stop the cycle count for ewc , returns the last cycle count
268 inline double wallcycle_stop(gmx_wallcycle* wc, WallCycleCounter ewc)
270 gmx_cycles_t cycle, last;
277 wallcycleBarrier(wc);
280 debug_stop_check(wc, ewc);
283 /* When processes or threads migrate between cores, the cycle counting
284 * can get messed up if the cycle counter on different cores are not
285 * synchronized. When this happens we expect both large negative and
286 * positive cycle differences. We can detect negative cycle differences.
287 * Detecting too large positive counts if difficult, since count can be
288 * large, especially for ewcRUN. If we detect a negative count,
289 * we will not print the cycle accounting table.
291 cycle = gmx_cycles_read();
292 if (cycle >= wc->wcc[ewc].start)
294 last = cycle - wc->wcc[ewc].start;
299 wc->haveInvalidCount = true;
301 wc->wcc[ewc].c += last;
303 if (!wc->wcc_all.empty())
306 if (ewc == WallCycleCounter::Run)
308 wallcycle_all_stop(wc, ewc, cycle);
310 else if (wc->wc_depth == 2)
312 wallcycle_all_start(wc, ewc, cycle);
319 //! Only increment call count for ewc by one
320 inline void wallcycle_increment_event_count(gmx_wallcycle* wc, WallCycleCounter ewc)
329 //! Returns the cumulative count and cycle count for ewc
330 void wallcycle_get(gmx_wallcycle* wc, WallCycleCounter ewc, int* n, double* c);
332 //! Resets all cycle counters to zero
333 void wallcycle_reset_all(gmx_wallcycle* wc);
335 //! Scale the cycle counts to reflect how many threads run for that number of cycles
336 void wallcycle_scale_by_num_threads(gmx_wallcycle* wc, bool isPmeRank, int nthreads_pp, int nthreads_pme);
338 //! Return reset_counters from wc struct
339 int64_t wcycle_get_reset_counters(gmx_wallcycle* wc);
341 //! Set reset_counters
342 void wcycle_set_reset_counters(gmx_wallcycle* wc, int64_t reset_counters);
344 //! Set the start sub cycle count for ewcs
345 inline void wallcycle_sub_start(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
347 if (sc_useCycleSubcounters && wc != nullptr)
349 wc->wcsc[ewcs].start = gmx_cycles_read();
353 //! Set the start sub cycle count for ewcs without increasing the call count
354 inline void wallcycle_sub_start_nocount(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
356 if (sc_useCycleSubcounters && wc != nullptr)
358 wallcycle_sub_start(wc, ewcs);
363 //! Stop the sub cycle count for ewcs
364 inline void wallcycle_sub_stop(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
366 if (sc_useCycleSubcounters && wc != nullptr)
368 wc->wcsc[ewcs].c += gmx_cycles_read() - wc->wcsc[ewcs].start;