Improve stability of ContinuationIsExact tests
[alexxy/gromacs.git] / src / programs / mdrun / tests / exactcontinuation.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief Tests that mdrun restarts are exact, that is that two
38  * successive runs without appending reproduce a single-part run.
39  *
40  * \todo Extend the coverage to the appending case.
41  *
42  * \author Mark Abraham <mark.j.abraham@gmail.com>
43  * \ingroup module_mdrun_integration_tests
44  */
45 #include "gmxpre.h"
46
47 #include "config.h"
48
49 #include <string>
50 #include <tuple>
51
52 #include <gtest/gtest.h>
53
54 #include "gromacs/topology/idef.h"
55 #include "gromacs/trajectory/energyframe.h"
56 #include "gromacs/utility/stringutil.h"
57
58 #include "testutils/mpitest.h"
59 #include "testutils/simulationdatabase.h"
60 #include "testutils/testasserts.h"
61
62 #include "energycomparison.h"
63 #include "energyreader.h"
64 #include "moduletest.h"
65
66 namespace gmx
67 {
68 namespace test
69 {
70 namespace
71 {
72
73
74 /*! \brief Manages comparing a pair of matching energy frames from a
75  * normal run and the same run in two parts.
76  *
77  * The passed frame readers must contain logically equivalent
78  * contents, with no extra or missing frames from either the
79  * one- or two-part run.
80  *
81  * \todo This class has a similar interface with one in
82  * mdcomparison.h, but not enough to warrant extracting an interface
83  * class. Perhaps parts of this should be cast as a class that returns
84  * iterators to successive frames, so that the fancy footwork for
85  * pretending a two-part run is one sequence is separate from the
86  * comparison of that sequene with that of the one-part run?
87  *
88  * \tparam FrameReader  Has readNextFrame() and frame() methods
89  *                      useful for returning successive Frame objects */
90 template<class FrameReader>
91 class ContinuationFramePairManager
92 {
93 public:
94     //! Convenience typedef
95     typedef std::unique_ptr<FrameReader> FrameReaderPtr;
96     //! Constructor
97     ContinuationFramePairManager(FrameReaderPtr full, FrameReaderPtr firstPart, FrameReaderPtr secondPart) :
98         full_(std::move(full)),
99         firstPart_(std::move(firstPart)),
100         secondPart_(std::move(secondPart)),
101         isFirstPart_(true)
102     {
103     }
104     /*! \brief Probe for a pair of valid frames, and return true if both are found.
105      *
106      * Gives a test failure if exactly one frame is found, because
107      * it is an error for either run to have missing or extra
108      * frames.  Note that the frame where the two-part run ends
109      * and begins is duplicated between the two output files by
110      * mdrun, and the test accommodates this.
111      *
112      * \todo This would be straightforward if velocity Verlet
113      * behaved like other integrators. */
114     bool shouldContinueComparing()
115     {
116         if (full_->readNextFrame())
117         {
118             if (isFirstPart_)
119             {
120                 if (firstPart_->readNextFrame())
121                 {
122                     // Two valid next frames exist, so we should continue comparing.
123                     return true;
124                 }
125                 else
126                 {
127                     // First part ran out of frames, move on to the second part
128                     isFirstPart_ = false;
129                     if (secondPart_->readNextFrame())
130                     {
131                         // Skip a second-part frame so the one we will
132                         // read can compare with the next full-run
133                         // frames.
134                         secondPart_->frame();
135                         if (secondPart_->readNextFrame())
136                         {
137                             // Two valid next frames exist, so we should continue comparing.
138                             return true;
139                         }
140                         else
141                         {
142                             ADD_FAILURE() << "Second-part energy file had no (new) frames";
143                         }
144                     }
145                     else
146                     {
147                         ADD_FAILURE() << "Second-part energy file had no frames";
148                     }
149                 }
150             }
151             else
152             {
153                 if (secondPart_->readNextFrame())
154                 {
155                     // Two valid next frames exist, so we should continue comparing.
156                     return true;
157                 }
158                 else
159                 {
160                     ADD_FAILURE() << "Full run energy file had at least one more frame than "
161                                      "two-part run energy file";
162                 }
163             }
164         }
165         else
166         {
167             if (isFirstPart_)
168             {
169                 ADD_FAILURE() << "Full-run energy file ran out of frames before the first part of "
170                                  "the two-part run completed";
171             }
172             else
173             {
174                 if (secondPart_->readNextFrame())
175                 {
176                     ADD_FAILURE() << "Two-part run energy file had at least one more frame than "
177                                      "full-run energy file";
178                 }
179                 else
180                 {
181                     // Both files ran out of frames at the same time, which is the expected behaviour.
182                 }
183             }
184         }
185         // At least one file is out of frames, so should not continue comparing.
186         return false;
187     }
188     /*! \brief Compare all possible pairs of frames using \c compareTwoFrames.
189      *
190      * \tparam Frame  The type of frame used in the comparison (returned
191      *                by FrameReader and used by compareTwoFrames). */
192     template<class Frame>
193     void compareAllFramePairs(std::function<void(const Frame&, const Frame&)> compareTwoFrames)
194     {
195         while (shouldContinueComparing())
196         {
197             EnergyFrame firstFrame  = full_->frame();
198             EnergyFrame secondFrame = isFirstPart_ ? firstPart_->frame() : secondPart_->frame();
199             SCOPED_TRACE("Comparing frames from two runs '" + firstFrame.frameName() + "' and '"
200                          + secondFrame.frameName() + "'");
201             compareTwoFrames(firstFrame, secondFrame);
202         }
203     }
204
205 private:
206     EnergyFrameReaderPtr full_;
207     EnergyFrameReaderPtr firstPart_;
208     EnergyFrameReaderPtr secondPart_;
209     bool                 isFirstPart_;
210 };
211
212 /*! \brief Run grompp for a normal mdrun, the same mdrun stopping part
213  * way, doing a continuation, and compare the results. */
214 void runTest(TestFileManager*            fileManager,
215              SimulationRunner*           runner,
216              const std::string&          simulationName,
217              int                         maxWarningsTolerated,
218              const MdpFieldValues&       mdpFieldValues,
219              const EnergyTermsToCompare& energyTermsToCompare)
220 {
221     int numRanksAvailable = getNumberOfTestMpiRanks();
222     if (!isNumberOfPpRanksSupported(simulationName, numRanksAvailable))
223     {
224         fprintf(stdout,
225                 "Test system '%s' cannot run with %d ranks.\n"
226                 "The supported numbers are: %s\n",
227                 simulationName.c_str(),
228                 numRanksAvailable,
229                 reportNumbersOfPpRanksSupported(simulationName).c_str());
230         return;
231     }
232
233     // prepare some names for files to use with the two mdrun calls
234     std::string fullRunTprFileName             = fileManager->getTemporaryFilePath("full.tpr");
235     std::string firstPartRunTprFileName        = fileManager->getTemporaryFilePath("firstpart.tpr");
236     std::string fullRunEdrFileName             = fileManager->getTemporaryFilePath("full.edr");
237     std::string firstPartRunEdrFileName        = fileManager->getTemporaryFilePath("firstpart.edr");
238     std::string firstPartRunCheckpointFileName = fileManager->getTemporaryFilePath("firstpart.cpt");
239     std::string secondPartRunEdrFileName       = fileManager->getTemporaryFilePath("secondpart");
240
241     // prepare the full run .tpr file, which will be used for the full
242     // run, and for the second part of the two-part run.
243     {
244         // TODO evolve grompp to report the number of warnings issued, so
245         // tests always expect the right number.
246         CommandLine caller;
247         caller.append("grompp");
248         caller.addOption("-maxwarn", maxWarningsTolerated);
249         runner->useTopGroAndNdxFromDatabase(simulationName);
250         runner->useStringAsMdpFile(prepareMdpFileContents(mdpFieldValues));
251         runner->tprFileName_ = fullRunTprFileName;
252         EXPECT_EQ(0, runner->callGrompp(caller));
253     }
254
255     const std::string splitPoint = std::to_string(std::stoi(mdpFieldValues.at("nsteps")) / 2);
256
257     // prepare the .tpr file for the first part of the two-part run
258     {
259         // TODO evolve grompp to report the number of warnings issued, so
260         // tests always expect the right number.
261         CommandLine caller;
262         caller.append("grompp");
263         caller.addOption("-maxwarn", maxWarningsTolerated);
264         runner->useTopGroAndNdxFromDatabase(simulationName);
265         auto firstPartMdpFieldValues      = mdpFieldValues;
266         firstPartMdpFieldValues["nsteps"] = splitPoint;
267         runner->useStringAsMdpFile(prepareMdpFileContents(firstPartMdpFieldValues));
268         runner->tprFileName_ = firstPartRunTprFileName;
269         EXPECT_EQ(0, runner->callGrompp(caller));
270     }
271
272     // do a normal mdrun
273     {
274         runner->tprFileName_ = fullRunTprFileName;
275         runner->edrFileName_ = fullRunEdrFileName;
276         CommandLine fullRunCaller;
277         fullRunCaller.append("mdrun");
278         /* Force neighborlist update at the beginning of the second half of the trajectory.
279          * Doing so through CLI options prevents pairlist tuning from changing it. */
280         fullRunCaller.addOption("-nstlist", splitPoint);
281         ASSERT_EQ(0, runner->callMdrun(fullRunCaller));
282     }
283
284     // do a repeat of the first part of the same mdrun
285     {
286         runner->tprFileName_ = firstPartRunTprFileName;
287         runner->edrFileName_ = firstPartRunEdrFileName;
288         CommandLine firstPartRunCaller;
289         firstPartRunCaller.append("mdrun");
290         firstPartRunCaller.addOption("-cpo", firstPartRunCheckpointFileName);
291         ASSERT_EQ(0, runner->callMdrun(firstPartRunCaller));
292     }
293
294     // do a continuation (without appending) from the first part of
295     // that same mdrun
296     {
297         runner->tprFileName_ = fullRunTprFileName;
298         runner->edrFileName_ = secondPartRunEdrFileName;
299         CommandLine secondPartRunCaller;
300         secondPartRunCaller.append("mdrun");
301         // TODO We could test with appending but it would need a
302         // different implementation.
303         secondPartRunCaller.append("-noappend");
304         secondPartRunCaller.addOption("-cpi", firstPartRunCheckpointFileName);
305         ASSERT_EQ(0, runner->callMdrun(secondPartRunCaller));
306         // Cope with how -noappend works
307         secondPartRunEdrFileName += ".part0002.edr";
308     }
309
310     // Build the functor that will compare energy frames on the chosen
311     // energy terms.
312     EnergyComparison energyComparison(energyTermsToCompare, MaxNumFrames::compareAllFrames());
313
314     // Build the manager that will present matching pairs of frames to compare.
315     //
316     // TODO Here is an unnecessary copy of keys (ie. the energy term
317     // names), for convenience. In the future, use a range.
318     auto namesOfEnergiesToMatch = energyComparison.getEnergyNames();
319     ContinuationFramePairManager<EnergyFrameReader> energyManager(
320             openEnergyFileToReadTerms(fullRunEdrFileName, namesOfEnergiesToMatch),
321             openEnergyFileToReadTerms(firstPartRunEdrFileName, namesOfEnergiesToMatch),
322             openEnergyFileToReadTerms(secondPartRunEdrFileName, namesOfEnergiesToMatch));
323     // Compare the energy frames.
324     energyManager.compareAllFramePairs<EnergyFrame>(energyComparison);
325 }
326
327 /*! \brief Test fixture for mdrun exact continuations
328  *
329  * This test ensures mdrun can run a simulation, writing a trajectory
330  * and matching energies, and reproduce to within a tolerance the same
331  * energies from runs that stopped part of the way, and restarted from
332  * the checkpoint.
333  *
334  * \todo Is there value in testing with mdrun -reprod? As well as
335  * without it?
336  *
337  * \todo Add FEP case. */
338 class MdrunNoAppendContinuationIsExact :
339     public MdrunTestFixture,
340     public ::testing::WithParamInterface<std::tuple<std::string, std::string, std::string, std::string, MdpParameterDatabase>>
341 {
342 public:
343     //! Constructor
344     MdrunNoAppendContinuationIsExact() {}
345 };
346
347 /* Listing all of these is tedious, but there's no other way to get a
348  * usefully descriptive string into the test-case name, so that when
349  * one breaks we can find out which one is broken without referring to
350  * this source code file.
351  *
352  * NOTE The choices for the tolerances are arbitrary but sufficient
353  * for comparison of runs to work on different hardware, and kinds and
354  * degrees parallelism. */
355
356 TEST_P(MdrunNoAppendContinuationIsExact, WithinTolerances)
357 {
358     auto params                  = GetParam();
359     auto simulationName          = std::get<0>(params);
360     auto integrator              = std::get<1>(params);
361     auto temperatureCoupling     = std::get<2>(params);
362     auto pressureCoupling        = std::get<3>(params);
363     auto additionalMdpParameters = std::get<4>(params);
364
365     // Check for unimplemented functionality
366     // TODO: Update this as modular simulator gains functionality
367     const bool isModularSimulatorExplicitlyDisabled = (getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
368     const bool isTCouplingCompatibleWithModularSimulator =
369             (temperatureCoupling == "no" || temperatureCoupling == "v-rescale"
370              || temperatureCoupling == "berendsen");
371     // GPU update is not compatible with modular simulator
372     const bool isGpuUpdateRequested = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
373     if (integrator == "md-vv" && pressureCoupling == "parrinello-rahman"
374         && (isModularSimulatorExplicitlyDisabled || !isTCouplingCompatibleWithModularSimulator
375             || isGpuUpdateRequested))
376     {
377         // Under md-vv, Parrinello-Rahman is only implemented for the modular simulator
378         return;
379     }
380     if (integrator == "md-vv" && temperatureCoupling == "nose-hoover"
381         && pressureCoupling == "berendsen")
382     {
383         // This combination is not implemented in either legacy or modular simulator
384         return;
385     }
386
387     SCOPED_TRACE(
388             formatString("Comparing normal and two-part run of simulation '%s' "
389                          "with integrator '%s'",
390                          simulationName.c_str(),
391                          integrator.c_str()));
392
393     auto mdpFieldValues = prepareMdpFieldValues(simulationName.c_str(),
394                                                 integrator.c_str(),
395                                                 temperatureCoupling.c_str(),
396                                                 pressureCoupling.c_str(),
397                                                 additionalMdpParameters);
398     // The exact lambda state choice is unimportant, so long as there
399     // is one when using an FEP input.
400     mdpFieldValues["init-lambda-state"] = "3";
401     mdpFieldValues["nsteps"]            = "16";
402
403     // Forces and update on GPUs are generally not reproducible enough for a tight
404     // tolerance. Similarly, the propagation of bd is not as
405     // reproducible as the others. So we use several ULP tolerance
406     // in all cases. This is looser than needed e.g. for md and md-vv
407     // with forces on CPUs, but there is no real risk of a bug with
408     // those propagators that would only be caught with a tighter
409     // tolerance in this particular test.
410     int ulpToleranceInMixed  = 128;
411     int ulpToleranceInDouble = 64;
412     if (integrator == "bd")
413     {
414         // Somehow, the bd integrator has never been as reproducible
415         // as the others, either in continuations or reruns.
416         ulpToleranceInMixed = 200;
417     }
418     EnergyTermsToCompare energyTermsToCompare{
419         { { interaction_function[F_EPOT].longname,
420             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) },
421           { interaction_function[F_EKIN].longname,
422             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) } }
423     };
424
425     if (temperatureCoupling != "no" || pressureCoupling != "no")
426     {
427         if (simulationName == "alanine_vacuo")
428         {
429             // This is slightly less reproducible
430             energyTermsToCompare.insert({ interaction_function[F_ECONSERVED].longname,
431                                           relativeToleranceAsPrecisionDependentUlp(
432                                                   10.0, ulpToleranceInMixed * 2, ulpToleranceInDouble) });
433         }
434         else
435         {
436             energyTermsToCompare.insert({ interaction_function[F_ECONSERVED].longname,
437                                           relativeToleranceAsPrecisionDependentUlp(
438                                                   10.0, ulpToleranceInMixed, ulpToleranceInDouble) });
439         }
440     }
441
442     if (pressureCoupling == "parrinello-rahman")
443     {
444         energyTermsToCompare.insert({ "Box-Vel-XX",
445                                       relativeToleranceAsPrecisionDependentUlp(
446                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
447         energyTermsToCompare.insert({ "Box-Vel-YY",
448                                       relativeToleranceAsPrecisionDependentUlp(
449                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
450         energyTermsToCompare.insert({ "Box-Vel-ZZ",
451                                       relativeToleranceAsPrecisionDependentUlp(
452                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
453     }
454
455     int numWarningsToTolerate = 1;
456     runTest(&fileManager_, &runner_, simulationName, numWarningsToTolerate, mdpFieldValues, energyTermsToCompare);
457 }
458
459 // TODO The time for OpenCL kernel compilation means these tests time
460 // out. Once that compilation is cached for the whole process, these
461 // tests can run in such configurations.
462 #if !GMX_GPU_OPENCL
463
464 INSTANTIATE_TEST_SUITE_P(
465         NormalIntegrators,
466         MdrunNoAppendContinuationIsExact,
467         ::testing::Combine(::testing::Values("argon12", "spc2", "alanine_vsite_vacuo"),
468                            ::testing::Values("md", "md-vv", "bd", "sd"),
469                            ::testing::Values("no"),
470                            ::testing::Values("no"),
471                            ::testing::Values(MdpParameterDatabase::Default)));
472
473 INSTANTIATE_TEST_SUITE_P(NormalIntegratorsWithFEP,
474                          MdrunNoAppendContinuationIsExact,
475                          ::testing::Combine(::testing::Values("nonanol_vacuo"),
476                                             ::testing::Values("md", "md-vv", "bd", "sd"),
477                                             ::testing::Values("no"),
478                                             ::testing::Values("no"),
479                                             ::testing::Values(MdpParameterDatabase::Default)));
480
481 INSTANTIATE_TEST_SUITE_P(
482         NVT,
483         MdrunNoAppendContinuationIsExact,
484         ::testing::Combine(::testing::Values("argon12"),
485                            ::testing::Values("md", "md-vv"),
486                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
487                            ::testing::Values("no"),
488                            ::testing::Values(MdpParameterDatabase::Default)));
489
490 INSTANTIATE_TEST_SUITE_P(
491         NPH,
492         MdrunNoAppendContinuationIsExact,
493         ::testing::Combine(::testing::Values("argon12"),
494                            ::testing::Values("md", "md-vv"),
495                            ::testing::Values("no"),
496                            ::testing::Values("berendsen", "parrinello-rahman", "C-rescale"),
497                            ::testing::Values(MdpParameterDatabase::Default)));
498
499 INSTANTIATE_TEST_SUITE_P(
500         NPT,
501         MdrunNoAppendContinuationIsExact,
502         ::testing::Combine(::testing::Values("argon12"),
503                            ::testing::Values("md", "md-vv"),
504                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
505                            ::testing::Values("berendsen", "parrinello-rahman", "C-rescale"),
506                            ::testing::Values(MdpParameterDatabase::Default)));
507
508 INSTANTIATE_TEST_SUITE_P(MTTK,
509                          MdrunNoAppendContinuationIsExact,
510                          ::testing::Combine(::testing::Values("argon12"),
511                                             ::testing::Values("md-vv"),
512                                             ::testing::Values("nose-hoover"),
513                                             ::testing::Values("mttk"),
514                                             ::testing::Values(MdpParameterDatabase::Default)));
515
516 INSTANTIATE_TEST_SUITE_P(Pull,
517                          MdrunNoAppendContinuationIsExact,
518                          ::testing::Combine(::testing::Values("spc2"),
519                                             ::testing::Values("md", "md-vv"),
520                                             ::testing::Values("no"),
521                                             ::testing::Values("no"),
522                                             ::testing::Values(MdpParameterDatabase::Pull)));
523
524 INSTANTIATE_TEST_SUITE_P(Awh,
525                          MdrunNoAppendContinuationIsExact,
526                          ::testing::Combine(::testing::Values("alanine_vacuo"),
527                                             ::testing::Values("md", "md-vv"),
528                                             ::testing::Values("v-rescale"),
529                                             ::testing::Values("no"),
530                                             ::testing::Values(MdpParameterDatabase::Awh)));
531
532 #else
533 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MdrunNoAppendContinuationIsExact);
534 #endif
535
536 } // namespace
537 } // namespace test
538 } // namespace gmx