Add integration tests for exact restarts
[alexxy/gromacs.git] / src / programs / mdrun / tests / exactcontinuation.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief Tests that mdrun restarts are exact, that is that two
38  * successive runs without appending reproduce a single-part run.
39  *
40  * \todo Extend the coverage to the appending case.
41  *
42  * \author Mark Abraham <mark.j.abraham@gmail.com>
43  * \ingroup module_mdrun_integration_tests
44  */
45 #include "gmxpre.h"
46
47 #include "config.h"
48
49 #include <string>
50 #include <tuple>
51
52 #include <gtest/gtest.h>
53
54 #include "gromacs/topology/idef.h"
55 #include "gromacs/trajectory/energyframe.h"
56 #include "gromacs/utility/stringutil.h"
57
58 #include "testutils/mpitest.h"
59 #include "testutils/simulationdatabase.h"
60 #include "testutils/testasserts.h"
61
62 #include "energycomparison.h"
63 #include "energyreader.h"
64 #include "moduletest.h"
65
66 namespace gmx
67 {
68 namespace test
69 {
70 namespace
71 {
72
73
74 /*! \brief Manages comparing a pair of matching energy frames from a
75  * normal run and the same run in two parts.
76  *
77  * The passed frame readers must contain logically equivalent
78  * contents, with no extra or missing frames from either the
79  * one- or two-part run.
80  *
81  * \todo This class has a similar interface with one in
82  * mdcomparison.cpp, but not enough to warrant extracting an interface
83  * class. Perhaps parts of this should be cast as a class that returns
84  * iterators to successive frames, so that the fancy footwork for
85  * pretending a two-part run is one sequence is separate from the
86  * comparison of that sequene with that of the one-part run?
87  *
88  * \tparam FrameReader  Has readNextFrame() and frame() methods
89  *                      useful for returning successive Frame objects */
90 template <class FrameReader>
91 class ContinuationFramePairManager
92 {
93     public:
94         //! Convenience typedef
95         typedef std::unique_ptr<FrameReader> FrameReaderPtr;
96         //! Constructor
97         ContinuationFramePairManager(FrameReaderPtr full,
98                                      FrameReaderPtr firstPart,
99                                      FrameReaderPtr secondPart) :
100             full_(std::move(full)),
101             firstPart_(std::move(firstPart)),
102             secondPart_(std::move(secondPart)),
103             isFirstPart_(true)
104         {}
105         /*! \brief Probe for a pair of valid frames, and return true if both are found.
106          *
107          * Gives a test failure if exactly one frame is found, because
108          * it is an error for either run to have missing or extra
109          * frames.  Note that the frame where the two-part run ends
110          * and begins is duplicated between the two output files by
111          * mdrun, and the test accommodates this.
112          *
113          * \todo This would be straightforward if velocity Verlet
114          * behaved like other integrators. */
115         bool shouldContinueComparing()
116         {
117             if (full_->readNextFrame())
118             {
119                 if (isFirstPart_)
120                 {
121                     if (firstPart_->readNextFrame())
122                     {
123                         // Two valid next frames exist, so we should continue comparing.
124                         return true;
125                     }
126                     else
127                     {
128                         // First part ran out of frames, move on to the second part
129                         isFirstPart_  = false;
130                         if (secondPart_->readNextFrame())
131                         {
132                             // Skip a second-part frame so the one we will
133                             // read can compare with the next full-run
134                             // frames.
135                             secondPart_->frame();
136                             if (secondPart_->readNextFrame())
137                             {
138                                 // Two valid next frames exist, so we should continue comparing.
139                                 return true;
140                             }
141                             else
142                             {
143                                 ADD_FAILURE() << "Second-part energy file had no (new) frames";
144                             }
145                         }
146                         else
147                         {
148                             ADD_FAILURE() << "Second-part energy file had no frames";
149                         }
150                     }
151                 }
152                 else
153                 {
154                     if (secondPart_->readNextFrame())
155                     {
156                         // Two valid next frames exist, so we should continue comparing.
157                         return true;
158                     }
159                     else
160                     {
161                         ADD_FAILURE() << "Full run energy file had at least one more frame than two-part run energy file";
162                     }
163                 }
164             }
165             else
166             {
167                 if (isFirstPart_)
168                 {
169                     ADD_FAILURE() << "Full-run energy file ran out of frames before the first part of the two-part run completed";
170                 }
171                 else
172                 {
173                     if (secondPart_->readNextFrame())
174                     {
175                         ADD_FAILURE() << "Two-part run energy file had at least one more frame than full-run energy file";
176                     }
177                     else
178                     {
179                         // Both files ran out of frames at the same time, which is the expected behaviour.
180                     }
181                 }
182             }
183             // At least one file is out of frames, so should not continue comparing.
184             return false;
185         }
186         /*! \brief Compare all possible pairs of frames using \c compareTwoFrames.
187          *
188          * \tparam Frame  The type of frame used in the comparison (returned
189          *                by FrameReader and used by compareTwoFrames). */
190         template <class Frame>
191         void compareAllFramePairs(std::function<void(const Frame &, const Frame &)> compareTwoFrames)
192         {
193             while (shouldContinueComparing())
194             {
195                 Frame firstFrame  = full_->frame();
196                 Frame secondFrame = isFirstPart_ ? firstPart_->frame() : secondPart_->frame();
197                 SCOPED_TRACE("Comparing frames from two runs '" + firstFrame.frameName() + "' and '" + secondFrame.frameName() + "'");
198                 compareTwoFrames(firstFrame, secondFrame);
199             }
200
201         }
202
203     private:
204         EnergyFrameReaderPtr full_;
205         EnergyFrameReaderPtr firstPart_;
206         EnergyFrameReaderPtr secondPart_;
207         bool                 isFirstPart_;
208 };
209
210 /*! \brief Run grompp for a normal mdrun, the same mdrun stopping part
211  * way, doing a continuation, and compare the results. */
212 void runTest(TestFileManager        *fileManager,
213              SimulationRunner       *runner,
214              const std::string      &simulationName,
215              int                     maxWarningsTolerated,
216              const MdpFieldValues   &mdpFieldValues,
217              const EnergyTolerances &energiesToMatch)
218 {
219     int numRanksAvailable = getNumberOfTestMpiRanks();
220     if (!isNumberOfPpRanksSupported(simulationName, numRanksAvailable))
221     {
222         fprintf(stdout, "Test system '%s' cannot run with %d ranks.\n"
223                 "The supported numbers are: %s\n",
224                 simulationName.c_str(), numRanksAvailable,
225                 reportNumbersOfPpRanksSupported(simulationName).c_str());
226         return;
227     }
228
229     // prepare some names for files to use with the two mdrun calls
230     std::string fullRunTprFileName              = fileManager->getTemporaryFilePath("full.tpr");
231     std::string firstPartRunTprFileName         = fileManager->getTemporaryFilePath("firstpart.tpr");
232     std::string fullRunEdrFileName              = fileManager->getTemporaryFilePath("full.edr");
233     std::string firstPartRunEdrFileName         = fileManager->getTemporaryFilePath("firstpart.edr");
234     std::string firstPartRunCheckpointFileName  = fileManager->getTemporaryFilePath("firstpart.cpt");
235     std::string secondPartRunEdrFileName        = fileManager->getTemporaryFilePath("secondpart");
236
237     // prepare the full run .tpr file, which will be used for the full
238     // run, and for the second part of the two-part run.
239     {
240         // TODO evolve grompp to report the number of warnings issued, so
241         // tests always expect the right number.
242         CommandLine caller;
243         caller.append("grompp");
244         caller.addOption("-maxwarn", maxWarningsTolerated);
245         runner->useTopGroAndNdxFromDatabase(simulationName);
246         runner->useStringAsMdpFile(prepareMdpFileContents(mdpFieldValues));
247         runner->tprFileName_ = fullRunTprFileName;
248         EXPECT_EQ(0, runner->callGrompp(caller));
249     }
250
251     // prepare the .tpr file for the first part of the two-part run
252     {
253         // TODO evolve grompp to report the number of warnings issued, so
254         // tests always expect the right number.
255         CommandLine caller;
256         caller.append("grompp");
257         caller.addOption("-maxwarn", maxWarningsTolerated);
258         runner->useTopGroAndNdxFromDatabase(simulationName);
259         auto firstPartMdpFieldValues = mdpFieldValues;
260         firstPartMdpFieldValues["nsteps"] = "8";
261         runner->useStringAsMdpFile(prepareMdpFileContents(firstPartMdpFieldValues));
262         runner->tprFileName_ = firstPartRunTprFileName;
263         EXPECT_EQ(0, runner->callGrompp(caller));
264     }
265
266     // do a normal mdrun
267     {
268         runner->tprFileName_ = fullRunTprFileName;
269         runner->edrFileName_ = fullRunEdrFileName;
270         CommandLine fullRunCaller;
271         fullRunCaller.append("mdrun");
272         ASSERT_EQ(0, runner->callMdrun(fullRunCaller));
273     }
274
275     // do a repeat of the first part of the same mdrun
276     {
277         runner->tprFileName_ = firstPartRunTprFileName;
278         runner->edrFileName_ = firstPartRunEdrFileName;
279         CommandLine firstPartRunCaller;
280         firstPartRunCaller.append("mdrun");
281         firstPartRunCaller.addOption("-cpo", firstPartRunCheckpointFileName);
282         ASSERT_EQ(0, runner->callMdrun(firstPartRunCaller));
283     }
284
285     // do a continuation (without appending) from the first part of
286     // that same mdrun
287     {
288         runner->tprFileName_ = fullRunTprFileName;
289         runner->edrFileName_ = secondPartRunEdrFileName;
290         CommandLine secondPartRunCaller;
291         secondPartRunCaller.append("mdrun");
292         // TODO We could test with appending but it would need a
293         // different implementation.
294         secondPartRunCaller.append("-noappend");
295         secondPartRunCaller.addOption("-cpi", firstPartRunCheckpointFileName);
296         ASSERT_EQ(0, runner->callMdrun(secondPartRunCaller));
297         // Cope with how -noappend works
298         secondPartRunEdrFileName += ".part0002.edr";
299     }
300
301     // Build the functor that will compare reference and test
302     // energy frames on the chosen energy fields.
303     //
304     // TODO It would be less code if we used a lambda for this, but either
305     // clang 3.4 or libstdc++ 5.2.1 have an issue with capturing a
306     // std::unordered_map
307     EnergyComparator energyComparator(energiesToMatch);
308     // Build the manager that will present matching pairs of frames to compare.
309     //
310     // TODO Here is an unnecessary copy of keys (ie. the energy field
311     // names), for convenience. In the future, use a range.
312     auto namesOfEnergiesToMatch = getKeys(energiesToMatch);
313     ContinuationFramePairManager<EnergyFrameReader>
314          energyManager(openEnergyFileToReadFields(fullRunEdrFileName, namesOfEnergiesToMatch),
315                   openEnergyFileToReadFields(firstPartRunEdrFileName, namesOfEnergiesToMatch),
316                   openEnergyFileToReadFields(secondPartRunEdrFileName, namesOfEnergiesToMatch));
317     // Compare the energy frames.
318     energyManager.compareAllFramePairs<EnergyFrame>(energyComparator);
319 }
320
321 /*! \brief Test fixture for mdrun exact continuations
322  *
323  * This test ensures mdrun can run a simulation, writing a trajectory
324  * and matching energies, and reproduce to within a tolerance the same
325  * energies from runs that stopped part of the way, and restarted from
326  * the checkpoint.
327  *
328  * \todo Is there value in testing with mdrun -reprod? As well as
329  * without it?
330  *
331  * \todo Add FEP case. */
332 class MdrunNoAppendContinuationIsExact : public MdrunTestFixture,
333                                          public ::testing::WithParamInterface <
334                                          std::tuple < std::string, std::string, std::string, std::string >>
335 {
336     public:
337         //! Constructor
338         MdrunNoAppendContinuationIsExact() {}
339 };
340
341 /* Listing all of these is tedious, but there's no other way to get a
342  * usefully descriptive string into the test-case name, so that when
343  * one breaks we can find out which one is broken without referring to
344  * this source code file.
345  *
346  * NOTE The choices for the tolerances are arbitrary but sufficient
347  * for comparison of runs to work on different hardware, and kinds and
348  * degrees parallelism. */
349
350 TEST_P(MdrunNoAppendContinuationIsExact, WithinTolerances)
351 {
352     auto params              = GetParam();
353     auto simulationName      = std::get<0>(params);
354     auto integrator          = std::get<1>(params);
355     auto temperatureCoupling = std::get<2>(params);
356     auto pressureCoupling    = std::get<3>(params);
357     SCOPED_TRACE(formatString("Comparing normal and two-part run of simulation '%s' "
358                               "with integrator '%s'",
359                               simulationName.c_str(), integrator.c_str()));
360
361     auto mdpFieldValues = prepareMdpFieldValues(simulationName.c_str(),
362                                                 integrator.c_str(),
363                                                 temperatureCoupling.c_str(),
364                                                 pressureCoupling.c_str());
365     // The exact lambda state choice is unimportant, so long as there
366     // is one when using an FEP input.
367     mdpFieldValues["other"] += formatString("\ninit-lambda-state = %d", 3);
368
369     // Forces on GPUs are generally not reproducible enough for a tight
370     // tolerance. Similarly, the propagation of sd and bd are not as
371     // reproducible as the others. So we use several ULP tolerance
372     // in all cases. This is looser than needed e.g. for md and md-vv
373     // with forces on CPUs, but there is no real risk of a bug with
374     // those propagators that would only be caught with a tighter
375     // tolerance in this particular test.
376     EnergyTolerances energiesToMatch
377     {{
378          {
379              interaction_function[F_EPOT].longname,
380              relativeToleranceAsPrecisionDependentUlp(10.0, 32, 64)
381          },
382      }};
383
384     int numWarningsToTolerate = 1;
385     runTest(&fileManager_, &runner_,
386             simulationName,
387             numWarningsToTolerate, mdpFieldValues,
388             energiesToMatch);
389 }
390
391 // TODO The time for OpenCL kernel compilation means these tests time
392 // out. Once that compilation is cached for the whole process, these
393 // tests can run in such configurations.
394 #if GMX_GPU != GMX_GPU_OPENCL
395
396 INSTANTIATE_TEST_CASE_P(NormalIntegrators, MdrunNoAppendContinuationIsExact,
397                             ::testing::Combine(::testing::Values("argon12", "spc2", "alanine_vsite_vacuo"),
398                                                    ::testing::Values("md", "md-vv", "bd", "sd"),
399                                                    ::testing::Values("no"),
400                                                    ::testing::Values("no")));
401
402 INSTANTIATE_TEST_CASE_P(NormalIntegratorsWithFEP, MdrunNoAppendContinuationIsExact,
403                             ::testing::Combine(::testing::Values("nonanol_vacuo"),
404                                                    ::testing::Values("md", "md-vv", "bd", "sd"),
405                                                    ::testing::Values("no"),
406                                                    ::testing::Values("no")));
407
408 INSTANTIATE_TEST_CASE_P(NormalNVT, MdrunNoAppendContinuationIsExact,
409                             ::testing::Combine(::testing::Values("argon12"),
410                                                    ::testing::Values("md", "md-vv"),
411                                                    ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
412                                                    ::testing::Values("no")));
413
414 INSTANTIATE_TEST_CASE_P(LeapfrogNPH, MdrunNoAppendContinuationIsExact,
415                             ::testing::Combine(::testing::Values("argon12"),
416                                                    ::testing::Values("md"),
417                                                    ::testing::Values("no"),
418                                                    ::testing::Values("berendsen", "parrinello-rahman")));
419
420 INSTANTIATE_TEST_CASE_P(LeapfrogNPT, MdrunNoAppendContinuationIsExact,
421                             ::testing::Combine(::testing::Values("argon12"),
422                                                    ::testing::Values("md"),
423                                                    ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
424                                                    ::testing::Values("berendsen", "parrinello-rahman")));
425
426 INSTANTIATE_TEST_CASE_P(VelocityVerletNPH, MdrunNoAppendContinuationIsExact,
427                             ::testing::Combine(::testing::Values("argon12"),
428                                                    ::testing::Values("md-vv"),
429                                                    ::testing::Values("no"),
430                                                    ::testing::Values("berendsen")));
431
432 INSTANTIATE_TEST_CASE_P(VelocityVerletNPT, MdrunNoAppendContinuationIsExact,
433                             ::testing::Combine(::testing::Values("argon12"),
434                                                    ::testing::Values("md-vv"),
435                                                    ::testing::Values("v-rescale"),
436                                                    ::testing::Values("berendsen")));
437
438 INSTANTIATE_TEST_CASE_P(MTTK, MdrunNoAppendContinuationIsExact,
439                             ::testing::Combine(::testing::Values("argon12"),
440                                                    ::testing::Values("md-vv"),
441                                                    ::testing::Values("nose-hoover"),
442                                                    ::testing::Values("mttk")));
443
444 #endif
445
446 } // namespace
447 } // namespace test
448 } // namespace gmx