040fd5fb1858a6325a2fa995016d61c90c53fe52
[alexxy/gromacs.git] / src / gromacs / mdlib / stophandler.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*! \internal \file
36  * \brief Defines StopHandler, a helper class and two stop conditions.
37  *
38  * \author Pascal Merz <pascal.merz@colorado.edu>
39  * \ingroup module_mdlib
40  */
41 #include "gmxpre.h"
42
43 #include "stophandler.h"
44
45 #include "config.h"
46
47 #include <memory>
48
49 #include "gromacs/timing/walltime_accounting.h"
50 #include "gromacs/utility/cstringutil.h"
51
52 namespace gmx
53 {
54
55 StopHandler::StopHandler(compat::not_null<SimulationSignal*>      signal,
56                          bool                                     simulationShareState,
57                          std::vector<std::function<StopSignal()>> stopConditions,
58                          bool                                     neverUpdateNeighborList) :
59     signal_(*signal),
60     stopConditions_(std::move(stopConditions)),
61     neverUpdateNeighborlist_(neverUpdateNeighborList)
62 {
63     if (simulationShareState)
64     {
65         signal_.isLocal = false;
66     }
67 }
68
69 StopConditionSignal::StopConditionSignal(int nstList, bool makeBinaryReproducibleSimulation, int nstSignalComm) :
70     handledStopCondition_(gmx_stop_cond_none),
71     makeBinaryReproducibleSimulation_(makeBinaryReproducibleSimulation),
72     nstSignalComm_(nstSignalComm),
73     nstList_(nstList)
74 {
75 }
76
77 StopSignal StopConditionSignal::getSignal(FILE* fplog)
78 {
79     StopSignal signal = StopSignal::noSignal;
80
81     /* Check whether everything is still alright */
82     if (static_cast<int>(gmx_get_stop_condition()) > handledStopCondition_)
83     {
84         int nsteps_stop = -1;
85
86         /* this just makes signals[].sig compatible with the hack
87            of sending signals around by MPI_Reduce together with
88            other floats */
89         if ((gmx_get_stop_condition() == gmx_stop_cond_next_ns)
90             || (makeBinaryReproducibleSimulation_ && gmx_get_stop_condition() == gmx_stop_cond_next))
91         {
92             /* We need at least two global communication steps to pass
93              * around the signal. We stop at a pair-list creation step
94              * to allow for exact continuation, when possible.
95              */
96             signal      = StopSignal::stopAtNextNSStep;
97             nsteps_stop = std::max(nstList_, 2 * nstSignalComm_);
98         }
99         else if (gmx_get_stop_condition() == gmx_stop_cond_next)
100         {
101             /* Stop directly after the next global communication step.
102              * This breaks exact continuation.
103              */
104             signal      = StopSignal::stopImmediately;
105             nsteps_stop = nstSignalComm_ + 1;
106         }
107         if (fplog)
108         {
109             fprintf(fplog, "\n\nReceived the %s signal, stopping within %d steps\n\n",
110                     gmx_get_signal_name(), nsteps_stop);
111             fflush(fplog);
112         }
113         fprintf(stderr, "\n\nReceived the %s signal, stopping within %d steps\n\n",
114                 gmx_get_signal_name(), nsteps_stop);
115         fflush(stderr);
116         handledStopCondition_ = static_cast<int>(gmx_get_stop_condition());
117     }
118
119     return signal;
120 }
121
122 StopConditionTime::StopConditionTime(int nstList, real maximumHoursToRun, int nstSignalComm) :
123     signalSent_(false),
124     maximumHoursToRun_(maximumHoursToRun),
125     nstList_(nstList),
126     nstSignalComm_(nstSignalComm),
127     neverUpdateNeighborlist_(nstList <= 0)
128 {
129 }
130
131 StopSignal StopConditionTime::getSignal(bool bNS, int64_t step, FILE* fplog, gmx_walltime_accounting_t walltime_accounting)
132 {
133     if (signalSent_)
134     {
135         // We only want to send it once, but might be called again before run is terminated
136         return StopSignal::noSignal;
137     }
138     if ((bNS || neverUpdateNeighborlist_)
139         && walltime_accounting_get_time_since_start(walltime_accounting)
140                    > maximumHoursToRun_ * 60.0 * 60.0 * 0.99)
141     {
142         /* Signal to terminate the run */
143         char sbuf[STEPSTRSIZE];
144         int  nsteps_stop = std::max(nstList_, 2 * nstSignalComm_);
145         if (fplog)
146         {
147             fprintf(fplog,
148                     "\nStep %s: Run time exceeded %.3f hours, "
149                     "will terminate the run within %d steps\n",
150                     gmx_step_str(step, sbuf), maximumHoursToRun_ * 0.99, nsteps_stop);
151         }
152         fprintf(stderr,
153                 "\nStep %s: Run time exceeded %.3f hours, "
154                 "will terminate the run within %d steps\n",
155                 gmx_step_str(step, sbuf), maximumHoursToRun_ * 0.99, nsteps_stop);
156         signalSent_ = true;
157         return StopSignal::stopAtNextNSStep;
158     }
159     return StopSignal::noSignal;
160 }
161
162 void StopHandlerBuilder::registerStopCondition(std::function<StopSignal()> stopCondition)
163 {
164     stopConditions_.emplace_back(std::move(stopCondition));
165 };
166
167 std::unique_ptr<StopHandler> StopHandlerBuilder::getStopHandlerMD(compat::not_null<SimulationSignal*> signal,
168                                                                   bool simulationShareState,
169                                                                   bool isMaster,
170                                                                   int  nstList,
171                                                                   bool makeBinaryReproducibleSimulation,
172                                                                   int   nstSignalComm,
173                                                                   real  maximumHoursToRun,
174                                                                   bool  neverUpdateNeighborList,
175                                                                   FILE* fplog,
176                                                                   const int64_t&  step,
177                                                                   const gmx_bool& bNS,
178                                                                   gmx_walltime_accounting_t walltime_accounting)
179 {
180     if (!GMX_THREAD_MPI || isMaster)
181     {
182         // Using shared ptr because move-only callable not supported by std::function.
183         // Would require replacement such as fu2::function or cxx_function.
184         auto stopConditionSignal = std::make_shared<StopConditionSignal>(
185                 nstList, makeBinaryReproducibleSimulation, nstSignalComm);
186         registerStopCondition(
187                 [stopConditionSignal, fplog]() { return stopConditionSignal->getSignal(fplog); });
188     }
189
190     if (isMaster && maximumHoursToRun > 0)
191     {
192         auto stopConditionTime =
193                 std::make_shared<StopConditionTime>(nstList, maximumHoursToRun, nstSignalComm);
194         registerStopCondition([stopConditionTime, &bNS, &step, fplog, walltime_accounting]() {
195             return stopConditionTime->getSignal(bNS, step, fplog, walltime_accounting);
196         });
197     }
198
199     return std::make_unique<StopHandler>(signal, simulationShareState, stopConditions_,
200                                          neverUpdateNeighborList);
201 }
202
203 } // namespace gmx