737d98a38946e40ba25e8df3b7925cecc229d4f4
[alexxy/gromacs.git] / src / programs / mdrun / tests / exactcontinuation.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief Tests that mdrun restarts are exact, that is that two
38  * successive runs without appending reproduce a single-part run.
39  *
40  * \todo Extend the coverage to the appending case.
41  *
42  * \author Mark Abraham <mark.j.abraham@gmail.com>
43  * \ingroup module_mdrun_integration_tests
44  */
45 #include "gmxpre.h"
46
47 #include "config.h"
48
49 #include <string>
50 #include <tuple>
51
52 #include <gtest/gtest.h>
53
54 #include "gromacs/topology/idef.h"
55 #include "gromacs/trajectory/energyframe.h"
56 #include "gromacs/utility/stringutil.h"
57
58 #include "testutils/mpitest.h"
59 #include "testutils/simulationdatabase.h"
60 #include "testutils/testasserts.h"
61
62 #include "energycomparison.h"
63 #include "energyreader.h"
64 #include "moduletest.h"
65
66 namespace gmx
67 {
68 namespace test
69 {
70 namespace
71 {
72
73
74 /*! \brief Manages comparing a pair of matching energy frames from a
75  * normal run and the same run in two parts.
76  *
77  * The passed frame readers must contain logically equivalent
78  * contents, with no extra or missing frames from either the
79  * one- or two-part run.
80  *
81  * \todo This class has a similar interface with one in
82  * mdcomparison.h, but not enough to warrant extracting an interface
83  * class. Perhaps parts of this should be cast as a class that returns
84  * iterators to successive frames, so that the fancy footwork for
85  * pretending a two-part run is one sequence is separate from the
86  * comparison of that sequene with that of the one-part run?
87  *
88  * \tparam FrameReader  Has readNextFrame() and frame() methods
89  *                      useful for returning successive Frame objects */
90 template<class FrameReader>
91 class ContinuationFramePairManager
92 {
93 public:
94     //! Convenience typedef
95     typedef std::unique_ptr<FrameReader> FrameReaderPtr;
96     //! Constructor
97     ContinuationFramePairManager(FrameReaderPtr full, FrameReaderPtr firstPart, FrameReaderPtr secondPart) :
98         full_(std::move(full)),
99         firstPart_(std::move(firstPart)),
100         secondPart_(std::move(secondPart)),
101         isFirstPart_(true)
102     {
103     }
104     /*! \brief Probe for a pair of valid frames, and return true if both are found.
105      *
106      * Gives a test failure if exactly one frame is found, because
107      * it is an error for either run to have missing or extra
108      * frames.  Note that the frame where the two-part run ends
109      * and begins is duplicated between the two output files by
110      * mdrun, and the test accommodates this.
111      *
112      * \todo This would be straightforward if velocity Verlet
113      * behaved like other integrators. */
114     bool shouldContinueComparing()
115     {
116         if (full_->readNextFrame())
117         {
118             if (isFirstPart_)
119             {
120                 if (firstPart_->readNextFrame())
121                 {
122                     // Two valid next frames exist, so we should continue comparing.
123                     return true;
124                 }
125                 else
126                 {
127                     // First part ran out of frames, move on to the second part
128                     isFirstPart_ = false;
129                     if (secondPart_->readNextFrame())
130                     {
131                         // Skip a second-part frame so the one we will
132                         // read can compare with the next full-run
133                         // frames.
134                         secondPart_->frame();
135                         if (secondPart_->readNextFrame())
136                         {
137                             // Two valid next frames exist, so we should continue comparing.
138                             return true;
139                         }
140                         else
141                         {
142                             ADD_FAILURE() << "Second-part energy file had no (new) frames";
143                         }
144                     }
145                     else
146                     {
147                         ADD_FAILURE() << "Second-part energy file had no frames";
148                     }
149                 }
150             }
151             else
152             {
153                 if (secondPart_->readNextFrame())
154                 {
155                     // Two valid next frames exist, so we should continue comparing.
156                     return true;
157                 }
158                 else
159                 {
160                     ADD_FAILURE() << "Full run energy file had at least one more frame than "
161                                      "two-part run energy file";
162                 }
163             }
164         }
165         else
166         {
167             if (isFirstPart_)
168             {
169                 ADD_FAILURE() << "Full-run energy file ran out of frames before the first part of "
170                                  "the two-part run completed";
171             }
172             else
173             {
174                 if (secondPart_->readNextFrame())
175                 {
176                     ADD_FAILURE() << "Two-part run energy file had at least one more frame than "
177                                      "full-run energy file";
178                 }
179                 else
180                 {
181                     // Both files ran out of frames at the same time, which is the expected behaviour.
182                 }
183             }
184         }
185         // At least one file is out of frames, so should not continue comparing.
186         return false;
187     }
188     /*! \brief Compare all possible pairs of frames using \c compareTwoFrames.
189      *
190      * \tparam Frame  The type of frame used in the comparison (returned
191      *                by FrameReader and used by compareTwoFrames). */
192     template<class Frame>
193     void compareAllFramePairs(std::function<void(const Frame&, const Frame&)> compareTwoFrames)
194     {
195         while (shouldContinueComparing())
196         {
197             EnergyFrame firstFrame  = full_->frame();
198             EnergyFrame secondFrame = isFirstPart_ ? firstPart_->frame() : secondPart_->frame();
199             SCOPED_TRACE("Comparing frames from two runs '" + firstFrame.frameName() + "' and '"
200                          + secondFrame.frameName() + "'");
201             compareTwoFrames(firstFrame, secondFrame);
202         }
203     }
204
205 private:
206     EnergyFrameReaderPtr full_;
207     EnergyFrameReaderPtr firstPart_;
208     EnergyFrameReaderPtr secondPart_;
209     bool                 isFirstPart_;
210 };
211
212 /*! \brief Run grompp for a normal mdrun, the same mdrun stopping part
213  * way, doing a continuation, and compare the results. */
214 void runTest(TestFileManager*            fileManager,
215              SimulationRunner*           runner,
216              const std::string&          simulationName,
217              int                         maxWarningsTolerated,
218              const MdpFieldValues&       mdpFieldValues,
219              const EnergyTermsToCompare& energyTermsToCompare)
220 {
221     int numRanksAvailable = getNumberOfTestMpiRanks();
222     if (!isNumberOfPpRanksSupported(simulationName, numRanksAvailable))
223     {
224         fprintf(stdout,
225                 "Test system '%s' cannot run with %d ranks.\n"
226                 "The supported numbers are: %s\n",
227                 simulationName.c_str(), numRanksAvailable,
228                 reportNumbersOfPpRanksSupported(simulationName).c_str());
229         return;
230     }
231
232     // prepare some names for files to use with the two mdrun calls
233     std::string fullRunTprFileName             = fileManager->getTemporaryFilePath("full.tpr");
234     std::string firstPartRunTprFileName        = fileManager->getTemporaryFilePath("firstpart.tpr");
235     std::string fullRunEdrFileName             = fileManager->getTemporaryFilePath("full.edr");
236     std::string firstPartRunEdrFileName        = fileManager->getTemporaryFilePath("firstpart.edr");
237     std::string firstPartRunCheckpointFileName = fileManager->getTemporaryFilePath("firstpart.cpt");
238     std::string secondPartRunEdrFileName       = fileManager->getTemporaryFilePath("secondpart");
239
240     // prepare the full run .tpr file, which will be used for the full
241     // run, and for the second part of the two-part run.
242     {
243         // TODO evolve grompp to report the number of warnings issued, so
244         // tests always expect the right number.
245         CommandLine caller;
246         caller.append("grompp");
247         caller.addOption("-maxwarn", maxWarningsTolerated);
248         runner->useTopGroAndNdxFromDatabase(simulationName);
249         runner->useStringAsMdpFile(prepareMdpFileContents(mdpFieldValues));
250         runner->tprFileName_ = fullRunTprFileName;
251         EXPECT_EQ(0, runner->callGrompp(caller));
252     }
253
254     // prepare the .tpr file for the first part of the two-part run
255     {
256         // TODO evolve grompp to report the number of warnings issued, so
257         // tests always expect the right number.
258         CommandLine caller;
259         caller.append("grompp");
260         caller.addOption("-maxwarn", maxWarningsTolerated);
261         runner->useTopGroAndNdxFromDatabase(simulationName);
262         auto firstPartMdpFieldValues      = mdpFieldValues;
263         firstPartMdpFieldValues["nsteps"] = std::to_string(std::stoi(mdpFieldValues.at("nsteps")) / 2);
264         runner->useStringAsMdpFile(prepareMdpFileContents(firstPartMdpFieldValues));
265         runner->tprFileName_ = firstPartRunTprFileName;
266         EXPECT_EQ(0, runner->callGrompp(caller));
267     }
268
269     // do a normal mdrun
270     {
271         runner->tprFileName_ = fullRunTprFileName;
272         runner->edrFileName_ = fullRunEdrFileName;
273         CommandLine fullRunCaller;
274         fullRunCaller.append("mdrun");
275         ASSERT_EQ(0, runner->callMdrun(fullRunCaller));
276     }
277
278     // do a repeat of the first part of the same mdrun
279     {
280         runner->tprFileName_ = firstPartRunTprFileName;
281         runner->edrFileName_ = firstPartRunEdrFileName;
282         CommandLine firstPartRunCaller;
283         firstPartRunCaller.append("mdrun");
284         firstPartRunCaller.addOption("-cpo", firstPartRunCheckpointFileName);
285         ASSERT_EQ(0, runner->callMdrun(firstPartRunCaller));
286     }
287
288     // do a continuation (without appending) from the first part of
289     // that same mdrun
290     {
291         runner->tprFileName_ = fullRunTprFileName;
292         runner->edrFileName_ = secondPartRunEdrFileName;
293         CommandLine secondPartRunCaller;
294         secondPartRunCaller.append("mdrun");
295         // TODO We could test with appending but it would need a
296         // different implementation.
297         secondPartRunCaller.append("-noappend");
298         secondPartRunCaller.addOption("-cpi", firstPartRunCheckpointFileName);
299         ASSERT_EQ(0, runner->callMdrun(secondPartRunCaller));
300         // Cope with how -noappend works
301         secondPartRunEdrFileName += ".part0002.edr";
302     }
303
304     // Build the functor that will compare energy frames on the chosen
305     // energy terms.
306     EnergyComparison energyComparison(energyTermsToCompare, FramesToCompare::AllFrames);
307
308     // Build the manager that will present matching pairs of frames to compare.
309     //
310     // TODO Here is an unnecessary copy of keys (ie. the energy term
311     // names), for convenience. In the future, use a range.
312     auto namesOfEnergiesToMatch = energyComparison.getEnergyNames();
313     ContinuationFramePairManager<EnergyFrameReader> energyManager(
314             openEnergyFileToReadTerms(fullRunEdrFileName, namesOfEnergiesToMatch),
315             openEnergyFileToReadTerms(firstPartRunEdrFileName, namesOfEnergiesToMatch),
316             openEnergyFileToReadTerms(secondPartRunEdrFileName, namesOfEnergiesToMatch));
317     // Compare the energy frames.
318     energyManager.compareAllFramePairs<EnergyFrame>(energyComparison);
319 }
320
321 /*! \brief Test fixture for mdrun exact continuations
322  *
323  * This test ensures mdrun can run a simulation, writing a trajectory
324  * and matching energies, and reproduce to within a tolerance the same
325  * energies from runs that stopped part of the way, and restarted from
326  * the checkpoint.
327  *
328  * \todo Is there value in testing with mdrun -reprod? As well as
329  * without it?
330  *
331  * \todo Add FEP case. */
332 class MdrunNoAppendContinuationIsExact :
333     public MdrunTestFixture,
334     public ::testing::WithParamInterface<std::tuple<std::string, std::string, std::string, std::string>>
335 {
336 public:
337     //! Constructor
338     MdrunNoAppendContinuationIsExact() {}
339 };
340
341 /* Listing all of these is tedious, but there's no other way to get a
342  * usefully descriptive string into the test-case name, so that when
343  * one breaks we can find out which one is broken without referring to
344  * this source code file.
345  *
346  * NOTE The choices for the tolerances are arbitrary but sufficient
347  * for comparison of runs to work on different hardware, and kinds and
348  * degrees parallelism. */
349
350 TEST_P(MdrunNoAppendContinuationIsExact, WithinTolerances)
351 {
352     auto params              = GetParam();
353     auto simulationName      = std::get<0>(params);
354     auto integrator          = std::get<1>(params);
355     auto temperatureCoupling = std::get<2>(params);
356     auto pressureCoupling    = std::get<3>(params);
357
358     // Check for unimplemented functionality
359     // TODO: Update this as modular simulator gains functionality
360     const bool isModularSimulatorExplicitlyDisabled = (getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
361     const bool isTCouplingCompatibleWithModularSimulator =
362             (temperatureCoupling == "no" || temperatureCoupling == "v-rescale"
363              || temperatureCoupling == "berendsen");
364     if (integrator == "md-vv" && pressureCoupling == "parrinello-rahman"
365         && (isModularSimulatorExplicitlyDisabled || !isTCouplingCompatibleWithModularSimulator))
366     {
367         // Under md-vv, Parrinello-Rahman is only implemented for the modular simulator
368         return;
369     }
370     if (integrator == "md-vv" && temperatureCoupling == "nose-hoover"
371         && pressureCoupling == "berendsen")
372     {
373         // This combination is not implemented in either legacy or modular simulator
374         return;
375     }
376
377     SCOPED_TRACE(
378             formatString("Comparing normal and two-part run of simulation '%s' "
379                          "with integrator '%s'",
380                          simulationName.c_str(), integrator.c_str()));
381
382     auto mdpFieldValues = prepareMdpFieldValues(simulationName.c_str(), integrator.c_str(),
383                                                 temperatureCoupling.c_str(), pressureCoupling.c_str());
384     // The exact lambda state choice is unimportant, so long as there
385     // is one when using an FEP input.
386     mdpFieldValues["other"] += formatString("\ninit-lambda-state = %d", 3);
387     mdpFieldValues["nsteps"] = "16";
388
389     // Forces on GPUs are generally not reproducible enough for a tight
390     // tolerance. Similarly, the propagation of sd and bd are not as
391     // reproducible as the others. So we use several ULP tolerance
392     // in all cases. This is looser than needed e.g. for md and md-vv
393     // with forces on CPUs, but there is no real risk of a bug with
394     // those propagators that would only be caught with a tighter
395     // tolerance in this particular test.
396     int ulpToleranceInMixed  = 32;
397     int ulpToleranceInDouble = 64;
398     if (integrator == "bd")
399     {
400         // Somehow, the bd integrator has never been as reproducible
401         // as the others, either in continuations or reruns.
402         ulpToleranceInMixed = 200;
403     }
404     EnergyTermsToCompare energyTermsToCompare{
405         { { interaction_function[F_EPOT].longname,
406             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) },
407           { interaction_function[F_EKIN].longname,
408             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) } }
409     };
410
411     if (temperatureCoupling != "no" || pressureCoupling != "no")
412     {
413         energyTermsToCompare.insert({ interaction_function[F_ECONSERVED].longname,
414                                       relativeToleranceAsPrecisionDependentUlp(
415                                               10.0, ulpToleranceInMixed, ulpToleranceInDouble) });
416     }
417
418     if (pressureCoupling == "parrinello-rahman")
419     {
420         energyTermsToCompare.insert(
421                 { "Box-Vel-XX", relativeToleranceAsPrecisionDependentUlp(1e-12, ulpToleranceInMixed,
422                                                                          ulpToleranceInDouble) });
423         energyTermsToCompare.insert(
424                 { "Box-Vel-YY", relativeToleranceAsPrecisionDependentUlp(1e-12, ulpToleranceInMixed,
425                                                                          ulpToleranceInDouble) });
426         energyTermsToCompare.insert(
427                 { "Box-Vel-ZZ", relativeToleranceAsPrecisionDependentUlp(1e-12, ulpToleranceInMixed,
428                                                                          ulpToleranceInDouble) });
429     }
430
431     int numWarningsToTolerate = 1;
432     runTest(&fileManager_, &runner_, simulationName, numWarningsToTolerate, mdpFieldValues,
433             energyTermsToCompare);
434 }
435
436 // TODO The time for OpenCL kernel compilation means these tests time
437 // out. Once that compilation is cached for the whole process, these
438 // tests can run in such configurations.
439 #if !GMX_GPU_OPENCL
440
441 INSTANTIATE_TEST_CASE_P(
442         NormalIntegrators,
443         MdrunNoAppendContinuationIsExact,
444         ::testing::Combine(::testing::Values("argon12", "spc2", "alanine_vsite_vacuo"),
445                            ::testing::Values("md", "md-vv", "bd", "sd"),
446                            ::testing::Values("no"),
447                            ::testing::Values("no")));
448
449 INSTANTIATE_TEST_CASE_P(NormalIntegratorsWithFEP,
450                         MdrunNoAppendContinuationIsExact,
451                         ::testing::Combine(::testing::Values("nonanol_vacuo"),
452                                            ::testing::Values("md", "md-vv", "bd", "sd"),
453                                            ::testing::Values("no"),
454                                            ::testing::Values("no")));
455
456 INSTANTIATE_TEST_CASE_P(
457         NVT,
458         MdrunNoAppendContinuationIsExact,
459         ::testing::Combine(::testing::Values("argon12"),
460                            ::testing::Values("md", "md-vv"),
461                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
462                            ::testing::Values("no")));
463
464 INSTANTIATE_TEST_CASE_P(NPH,
465                         MdrunNoAppendContinuationIsExact,
466                         ::testing::Combine(::testing::Values("argon12"),
467                                            ::testing::Values("md", "md-vv"),
468                                            ::testing::Values("no"),
469                                            ::testing::Values("berendsen", "parrinello-rahman")));
470
471 INSTANTIATE_TEST_CASE_P(
472         NPT,
473         MdrunNoAppendContinuationIsExact,
474         ::testing::Combine(::testing::Values("argon12"),
475                            ::testing::Values("md", "md-vv"),
476                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
477                            ::testing::Values("berendsen", "parrinello-rahman")));
478
479 INSTANTIATE_TEST_CASE_P(MTTK,
480                         MdrunNoAppendContinuationIsExact,
481                         ::testing::Combine(::testing::Values("argon12"),
482                                            ::testing::Values("md-vv"),
483                                            ::testing::Values("nose-hoover"),
484                                            ::testing::Values("mttk")));
485
486 #endif
487
488 } // namespace
489 } // namespace test
490 } // namespace gmx