2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
37 * Declares the reset handler class.
39 * This class resets the various counters based on either the time (master rank sends
40 * checkpointing signal after 49.5% or run time), or based on the number of elapsed
41 * steps (handled locally by all ranks independently). Resets can happen in different
44 * * at a predetermined step (gmx mdrun -resetstep XXX)
45 * * at half of the number of steps (gmx mdrun -resethway and nsteps set)
46 * * at half of the max wall time (gmx mdrun -resethway -maxh XX), which is
47 * implemented triggered when walltime >= 49.5% of max
49 * If two or more of these reset conditions are set, the first condition which is met
50 * resets the counters, there is no second reset happening. Note also that
51 * -resethway with nsteps set overwrites -resetstep
52 * (gmx mdrun -resethway -nsteps 100000 -resetstep 1000 will result in a reset at step
55 * The setting and handling is implemented in private functions. They are only called
56 * if a respective boolean is true. For the trivial case of no reset needed (or no reset
57 * signal setting on any other rank than master), the translation unit of the calling
58 * function is therefore never left. The current implementation also allows the handler
59 * and setters to be ignored once a reset has been done, as a reset is only allowed to
60 * happen once. In the future, many of these cases this will be achieved by adding
61 * (or not adding) handlers / setters to the task graph.
63 * \author Pascal Merz <pascal.merz@colorado.edu>
65 * \ingroup module_mdlib
67 #ifndef GMX_MDLIB_RESETHANDLER_H
68 #define GMX_MDLIB_RESETHANDLER_H
70 #include "gromacs/compat/pointers.h"
71 #include "gromacs/mdlib/simulationsignal.h"
72 #include "gromacs/utility/logger.h"
76 struct gmx_walltime_accounting;
77 struct nonbonded_verlet_t;
78 struct pme_load_balancing_t;
84 /*! \brief Reset signals
86 * Signals set and read by ResetHandler. Possible signals include
88 * * reset counters (as soon as signal is received)
90 enum class ResetSignal
97 * \brief Class handling the reset of counters
99 * Master rank sets the reset signal if half the run time is reached.
100 * All ranks receive the reset signal and reset their respective counters.
101 * This also resets the counters if half the time steps have passed (no communication needed).
103 class ResetHandler final
106 /*! \brief ResetHandler constructor
108 * Needs a pointer to the signal to communicate between ranks, information on whether
109 * multiple simulations need to be synchronized, and additional data to determine
110 * whether counter resetting takes place at all, and whether the current rank can set
111 * the resetting signal.
113 ResetHandler(compat::not_null<SimulationSignal*> signal,
114 bool simulationsShareState,
118 real maximumHoursToRun,
119 const MDLogger& mdlog,
120 gmx_wallcycle* wcycle,
121 gmx_walltime_accounting* walltime_accounting);
123 /*! \brief Decides whether a reset signal needs to be set
125 * Reset signal is set if run time is greater than 49.5% of maximal run time.
127 void setSignal(gmx_walltime_accounting* walltime_accounting)
129 if (rankCanSetSignal_)
131 if (setSignalImpl(walltime_accounting))
133 // need to set the reset signal only once
134 rankCanSetSignal_ = false;
139 /*! \brief Decides whether the counters are reset, and performs the reset if needed
141 * The counters are reset if
143 * * the signal for resetting was received, or
144 * * the (local) number of steps reached the defined counter reset step.
146 * Note that even if two reset conditions are present (at a specific step and a
147 * specific time), the reset will only take place once, whenever the first condition
150 void resetCounters(int64_t step,
152 const MDLogger& mdlog,
155 nonbonded_verlet_t* nbv,
157 const gmx_pme_t* pme,
158 const pme_load_balancing_t* pme_loadbal,
159 gmx_wallcycle* wcycle,
160 gmx_walltime_accounting* walltime_accounting)
162 if (simulationNeedsReset_)
164 if (resetCountersImpl(step, step_rel, mdlog, fplog, cr, nbv, nrnb, pme, pme_loadbal, wcycle, walltime_accounting))
166 // need to reset the counters only once
167 simulationNeedsReset_ = false;
168 rankCanSetSignal_ = false;
174 //! Implementation of the setSignal() function
175 bool setSignalImpl(gmx_walltime_accounting* walltime_accounting);
177 //! Implementation of the resetCounters() function
178 bool resetCountersImpl(int64_t step,
180 const MDLogger& mdlog,
183 nonbonded_verlet_t* nbv,
185 const gmx_pme_t* pme,
186 const pme_load_balancing_t* pme_loadbal,
187 gmx_wallcycle* wcycle,
188 gmx_walltime_accounting* walltime_accounting);
190 SimulationSignal& signal_;
192 bool rankCanSetSignal_;
193 bool simulationNeedsReset_;
194 const real maximumHoursToRun_;
198 #endif // GMX_MDLIB_RESETHANDLER_H