efd2e806455d90203dfaf334db112c5d3f6c3fef
[alexxy/gromacs.git] / src / programs / mdrun / tests / exactcontinuation.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief Tests that mdrun restarts are exact, that is that two
38  * successive runs without appending reproduce a single-part run.
39  *
40  * \todo Extend the coverage to the appending case.
41  *
42  * \author Mark Abraham <mark.j.abraham@gmail.com>
43  * \ingroup module_mdrun_integration_tests
44  */
45 #include "gmxpre.h"
46
47 #include "config.h"
48
49 #include <string>
50 #include <tuple>
51
52 #include <gtest/gtest.h>
53
54 #include "gromacs/topology/idef.h"
55 #include "gromacs/trajectory/energyframe.h"
56 #include "gromacs/utility/stringutil.h"
57
58 #include "testutils/mpitest.h"
59 #include "testutils/simulationdatabase.h"
60 #include "testutils/testasserts.h"
61
62 #include "energycomparison.h"
63 #include "energyreader.h"
64 #include "moduletest.h"
65
66 namespace gmx
67 {
68 namespace test
69 {
70 namespace
71 {
72
73
74 /*! \brief Manages comparing a pair of matching energy frames from a
75  * normal run and the same run in two parts.
76  *
77  * The passed frame readers must contain logically equivalent
78  * contents, with no extra or missing frames from either the
79  * one- or two-part run.
80  *
81  * \todo This class has a similar interface with one in
82  * mdcomparison.h, but not enough to warrant extracting an interface
83  * class. Perhaps parts of this should be cast as a class that returns
84  * iterators to successive frames, so that the fancy footwork for
85  * pretending a two-part run is one sequence is separate from the
86  * comparison of that sequene with that of the one-part run?
87  *
88  * \tparam FrameReader  Has readNextFrame() and frame() methods
89  *                      useful for returning successive Frame objects */
90 template<class FrameReader>
91 class ContinuationFramePairManager
92 {
93 public:
94     //! Convenience typedef
95     typedef std::unique_ptr<FrameReader> FrameReaderPtr;
96     //! Constructor
97     ContinuationFramePairManager(FrameReaderPtr full, FrameReaderPtr firstPart, FrameReaderPtr secondPart) :
98         full_(std::move(full)),
99         firstPart_(std::move(firstPart)),
100         secondPart_(std::move(secondPart)),
101         isFirstPart_(true)
102     {
103     }
104     /*! \brief Probe for a pair of valid frames, and return true if both are found.
105      *
106      * Gives a test failure if exactly one frame is found, because
107      * it is an error for either run to have missing or extra
108      * frames.  Note that the frame where the two-part run ends
109      * and begins is duplicated between the two output files by
110      * mdrun, and the test accommodates this.
111      *
112      * \todo This would be straightforward if velocity Verlet
113      * behaved like other integrators. */
114     bool shouldContinueComparing()
115     {
116         if (full_->readNextFrame())
117         {
118             if (isFirstPart_)
119             {
120                 if (firstPart_->readNextFrame())
121                 {
122                     // Two valid next frames exist, so we should continue comparing.
123                     return true;
124                 }
125                 else
126                 {
127                     // First part ran out of frames, move on to the second part
128                     isFirstPart_ = false;
129                     if (secondPart_->readNextFrame())
130                     {
131                         // Skip a second-part frame so the one we will
132                         // read can compare with the next full-run
133                         // frames.
134                         secondPart_->frame();
135                         if (secondPart_->readNextFrame())
136                         {
137                             // Two valid next frames exist, so we should continue comparing.
138                             return true;
139                         }
140                         else
141                         {
142                             ADD_FAILURE() << "Second-part energy file had no (new) frames";
143                         }
144                     }
145                     else
146                     {
147                         ADD_FAILURE() << "Second-part energy file had no frames";
148                     }
149                 }
150             }
151             else
152             {
153                 if (secondPart_->readNextFrame())
154                 {
155                     // Two valid next frames exist, so we should continue comparing.
156                     return true;
157                 }
158                 else
159                 {
160                     ADD_FAILURE() << "Full run energy file had at least one more frame than "
161                                      "two-part run energy file";
162                 }
163             }
164         }
165         else
166         {
167             if (isFirstPart_)
168             {
169                 ADD_FAILURE() << "Full-run energy file ran out of frames before the first part of "
170                                  "the two-part run completed";
171             }
172             else
173             {
174                 if (secondPart_->readNextFrame())
175                 {
176                     ADD_FAILURE() << "Two-part run energy file had at least one more frame than "
177                                      "full-run energy file";
178                 }
179                 else
180                 {
181                     // Both files ran out of frames at the same time, which is the expected behaviour.
182                 }
183             }
184         }
185         // At least one file is out of frames, so should not continue comparing.
186         return false;
187     }
188     /*! \brief Compare all possible pairs of frames using \c compareTwoFrames.
189      *
190      * \tparam Frame  The type of frame used in the comparison (returned
191      *                by FrameReader and used by compareTwoFrames). */
192     template<class Frame>
193     void compareAllFramePairs(std::function<void(const Frame&, const Frame&)> compareTwoFrames)
194     {
195         while (shouldContinueComparing())
196         {
197             EnergyFrame firstFrame  = full_->frame();
198             EnergyFrame secondFrame = isFirstPart_ ? firstPart_->frame() : secondPart_->frame();
199             SCOPED_TRACE("Comparing frames from two runs '" + firstFrame.frameName() + "' and '"
200                          + secondFrame.frameName() + "'");
201             compareTwoFrames(firstFrame, secondFrame);
202         }
203     }
204
205 private:
206     EnergyFrameReaderPtr full_;
207     EnergyFrameReaderPtr firstPart_;
208     EnergyFrameReaderPtr secondPart_;
209     bool                 isFirstPart_;
210 };
211
212 /*! \brief Run grompp for a normal mdrun, the same mdrun stopping part
213  * way, doing a continuation, and compare the results. */
214 void runTest(TestFileManager*            fileManager,
215              SimulationRunner*           runner,
216              const std::string&          simulationName,
217              int                         maxWarningsTolerated,
218              const MdpFieldValues&       mdpFieldValues,
219              const EnergyTermsToCompare& energyTermsToCompare)
220 {
221     int numRanksAvailable = getNumberOfTestMpiRanks();
222     if (!isNumberOfPpRanksSupported(simulationName, numRanksAvailable))
223     {
224         fprintf(stdout,
225                 "Test system '%s' cannot run with %d ranks.\n"
226                 "The supported numbers are: %s\n",
227                 simulationName.c_str(),
228                 numRanksAvailable,
229                 reportNumbersOfPpRanksSupported(simulationName).c_str());
230         return;
231     }
232
233     // prepare some names for files to use with the two mdrun calls
234     std::string fullRunTprFileName             = fileManager->getTemporaryFilePath("full.tpr");
235     std::string firstPartRunTprFileName        = fileManager->getTemporaryFilePath("firstpart.tpr");
236     std::string fullRunEdrFileName             = fileManager->getTemporaryFilePath("full.edr");
237     std::string firstPartRunEdrFileName        = fileManager->getTemporaryFilePath("firstpart.edr");
238     std::string firstPartRunCheckpointFileName = fileManager->getTemporaryFilePath("firstpart.cpt");
239     std::string secondPartRunEdrFileName       = fileManager->getTemporaryFilePath("secondpart");
240
241     // prepare the full run .tpr file, which will be used for the full
242     // run, and for the second part of the two-part run.
243     {
244         // TODO evolve grompp to report the number of warnings issued, so
245         // tests always expect the right number.
246         CommandLine caller;
247         caller.append("grompp");
248         caller.addOption("-maxwarn", maxWarningsTolerated);
249         runner->useTopGroAndNdxFromDatabase(simulationName);
250         runner->useStringAsMdpFile(prepareMdpFileContents(mdpFieldValues));
251         runner->tprFileName_ = fullRunTprFileName;
252         EXPECT_EQ(0, runner->callGrompp(caller));
253     }
254
255     // prepare the .tpr file for the first part of the two-part run
256     {
257         // TODO evolve grompp to report the number of warnings issued, so
258         // tests always expect the right number.
259         CommandLine caller;
260         caller.append("grompp");
261         caller.addOption("-maxwarn", maxWarningsTolerated);
262         runner->useTopGroAndNdxFromDatabase(simulationName);
263         auto firstPartMdpFieldValues      = mdpFieldValues;
264         firstPartMdpFieldValues["nsteps"] = std::to_string(std::stoi(mdpFieldValues.at("nsteps")) / 2);
265         runner->useStringAsMdpFile(prepareMdpFileContents(firstPartMdpFieldValues));
266         runner->tprFileName_ = firstPartRunTprFileName;
267         EXPECT_EQ(0, runner->callGrompp(caller));
268     }
269
270     // do a normal mdrun
271     {
272         runner->tprFileName_ = fullRunTprFileName;
273         runner->edrFileName_ = fullRunEdrFileName;
274         CommandLine fullRunCaller;
275         fullRunCaller.append("mdrun");
276         ASSERT_EQ(0, runner->callMdrun(fullRunCaller));
277     }
278
279     // do a repeat of the first part of the same mdrun
280     {
281         runner->tprFileName_ = firstPartRunTprFileName;
282         runner->edrFileName_ = firstPartRunEdrFileName;
283         CommandLine firstPartRunCaller;
284         firstPartRunCaller.append("mdrun");
285         firstPartRunCaller.addOption("-cpo", firstPartRunCheckpointFileName);
286         ASSERT_EQ(0, runner->callMdrun(firstPartRunCaller));
287     }
288
289     // do a continuation (without appending) from the first part of
290     // that same mdrun
291     {
292         runner->tprFileName_ = fullRunTprFileName;
293         runner->edrFileName_ = secondPartRunEdrFileName;
294         CommandLine secondPartRunCaller;
295         secondPartRunCaller.append("mdrun");
296         // TODO We could test with appending but it would need a
297         // different implementation.
298         secondPartRunCaller.append("-noappend");
299         secondPartRunCaller.addOption("-cpi", firstPartRunCheckpointFileName);
300         ASSERT_EQ(0, runner->callMdrun(secondPartRunCaller));
301         // Cope with how -noappend works
302         secondPartRunEdrFileName += ".part0002.edr";
303     }
304
305     // Build the functor that will compare energy frames on the chosen
306     // energy terms.
307     EnergyComparison energyComparison(energyTermsToCompare, MaxNumFrames::compareAllFrames());
308
309     // Build the manager that will present matching pairs of frames to compare.
310     //
311     // TODO Here is an unnecessary copy of keys (ie. the energy term
312     // names), for convenience. In the future, use a range.
313     auto namesOfEnergiesToMatch = energyComparison.getEnergyNames();
314     ContinuationFramePairManager<EnergyFrameReader> energyManager(
315             openEnergyFileToReadTerms(fullRunEdrFileName, namesOfEnergiesToMatch),
316             openEnergyFileToReadTerms(firstPartRunEdrFileName, namesOfEnergiesToMatch),
317             openEnergyFileToReadTerms(secondPartRunEdrFileName, namesOfEnergiesToMatch));
318     // Compare the energy frames.
319     energyManager.compareAllFramePairs<EnergyFrame>(energyComparison);
320 }
321
322 /*! \brief Test fixture for mdrun exact continuations
323  *
324  * This test ensures mdrun can run a simulation, writing a trajectory
325  * and matching energies, and reproduce to within a tolerance the same
326  * energies from runs that stopped part of the way, and restarted from
327  * the checkpoint.
328  *
329  * \todo Is there value in testing with mdrun -reprod? As well as
330  * without it?
331  *
332  * \todo Add FEP case. */
333 class MdrunNoAppendContinuationIsExact :
334     public MdrunTestFixture,
335     public ::testing::WithParamInterface<std::tuple<std::string, std::string, std::string, std::string, MdpParameterDatabase>>
336 {
337 public:
338     //! Constructor
339     MdrunNoAppendContinuationIsExact() {}
340 };
341
342 /* Listing all of these is tedious, but there's no other way to get a
343  * usefully descriptive string into the test-case name, so that when
344  * one breaks we can find out which one is broken without referring to
345  * this source code file.
346  *
347  * NOTE The choices for the tolerances are arbitrary but sufficient
348  * for comparison of runs to work on different hardware, and kinds and
349  * degrees parallelism. */
350
351 TEST_P(MdrunNoAppendContinuationIsExact, WithinTolerances)
352 {
353     auto params                  = GetParam();
354     auto simulationName          = std::get<0>(params);
355     auto integrator              = std::get<1>(params);
356     auto temperatureCoupling     = std::get<2>(params);
357     auto pressureCoupling        = std::get<3>(params);
358     auto additionalMdpParameters = std::get<4>(params);
359
360     // Check for unimplemented functionality
361     // TODO: Update this as modular simulator gains functionality
362     const bool isModularSimulatorExplicitlyDisabled = (getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
363     const bool isTCouplingCompatibleWithModularSimulator =
364             (temperatureCoupling == "no" || temperatureCoupling == "v-rescale"
365              || temperatureCoupling == "berendsen");
366     // GPU update is not compatible with modular simulator
367     const bool isGpuUpdateRequested = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
368     if (integrator == "md-vv" && pressureCoupling == "parrinello-rahman"
369         && (isModularSimulatorExplicitlyDisabled || !isTCouplingCompatibleWithModularSimulator
370             || isGpuUpdateRequested))
371     {
372         // Under md-vv, Parrinello-Rahman is only implemented for the modular simulator
373         return;
374     }
375     if (integrator == "md-vv" && temperatureCoupling == "nose-hoover"
376         && pressureCoupling == "berendsen")
377     {
378         // This combination is not implemented in either legacy or modular simulator
379         return;
380     }
381
382     SCOPED_TRACE(
383             formatString("Comparing normal and two-part run of simulation '%s' "
384                          "with integrator '%s'",
385                          simulationName.c_str(),
386                          integrator.c_str()));
387
388     auto mdpFieldValues = prepareMdpFieldValues(simulationName.c_str(),
389                                                 integrator.c_str(),
390                                                 temperatureCoupling.c_str(),
391                                                 pressureCoupling.c_str(),
392                                                 additionalMdpParameters);
393     // The exact lambda state choice is unimportant, so long as there
394     // is one when using an FEP input.
395     mdpFieldValues["init-lambda-state"] = "3";
396     mdpFieldValues["nsteps"]            = "16";
397
398     // Forces and update on GPUs are generally not reproducible enough for a tight
399     // tolerance. Similarly, the propagation of bd is not as
400     // reproducible as the others. So we use several ULP tolerance
401     // in all cases. This is looser than needed e.g. for md and md-vv
402     // with forces on CPUs, but there is no real risk of a bug with
403     // those propagators that would only be caught with a tighter
404     // tolerance in this particular test.
405     int ulpToleranceInMixed  = 128;
406     int ulpToleranceInDouble = 64;
407     if (integrator == "bd")
408     {
409         // Somehow, the bd integrator has never been as reproducible
410         // as the others, either in continuations or reruns.
411         ulpToleranceInMixed = 200;
412     }
413     EnergyTermsToCompare energyTermsToCompare{
414         { { interaction_function[F_EPOT].longname,
415             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) },
416           { interaction_function[F_EKIN].longname,
417             relativeToleranceAsPrecisionDependentUlp(10.0, ulpToleranceInMixed, ulpToleranceInDouble) } }
418     };
419
420     if (temperatureCoupling != "no" || pressureCoupling != "no")
421     {
422         if (simulationName == "alanine_vacuo")
423         {
424             // This is slightly less reproducible
425             energyTermsToCompare.insert({ interaction_function[F_ECONSERVED].longname,
426                                           relativeToleranceAsPrecisionDependentUlp(
427                                                   10.0, ulpToleranceInMixed * 2, ulpToleranceInDouble) });
428         }
429         else
430         {
431             energyTermsToCompare.insert({ interaction_function[F_ECONSERVED].longname,
432                                           relativeToleranceAsPrecisionDependentUlp(
433                                                   10.0, ulpToleranceInMixed, ulpToleranceInDouble) });
434         }
435     }
436
437     if (pressureCoupling == "parrinello-rahman")
438     {
439         energyTermsToCompare.insert({ "Box-Vel-XX",
440                                       relativeToleranceAsPrecisionDependentUlp(
441                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
442         energyTermsToCompare.insert({ "Box-Vel-YY",
443                                       relativeToleranceAsPrecisionDependentUlp(
444                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
445         energyTermsToCompare.insert({ "Box-Vel-ZZ",
446                                       relativeToleranceAsPrecisionDependentUlp(
447                                               1e-12, ulpToleranceInMixed, ulpToleranceInDouble) });
448     }
449
450     int numWarningsToTolerate = 1;
451     runTest(&fileManager_, &runner_, simulationName, numWarningsToTolerate, mdpFieldValues, energyTermsToCompare);
452 }
453
454 // TODO The time for OpenCL kernel compilation means these tests time
455 // out. Once that compilation is cached for the whole process, these
456 // tests can run in such configurations.
457 #if !GMX_GPU_OPENCL
458
459 INSTANTIATE_TEST_SUITE_P(
460         NormalIntegrators,
461         MdrunNoAppendContinuationIsExact,
462         ::testing::Combine(::testing::Values("argon12", "spc2", "alanine_vsite_vacuo"),
463                            ::testing::Values("md", "md-vv", "bd", "sd"),
464                            ::testing::Values("no"),
465                            ::testing::Values("no"),
466                            ::testing::Values(MdpParameterDatabase::Default)));
467
468 INSTANTIATE_TEST_SUITE_P(NormalIntegratorsWithFEP,
469                          MdrunNoAppendContinuationIsExact,
470                          ::testing::Combine(::testing::Values("nonanol_vacuo"),
471                                             ::testing::Values("md", "md-vv", "bd", "sd"),
472                                             ::testing::Values("no"),
473                                             ::testing::Values("no"),
474                                             ::testing::Values(MdpParameterDatabase::Default)));
475
476 INSTANTIATE_TEST_SUITE_P(
477         NVT,
478         MdrunNoAppendContinuationIsExact,
479         ::testing::Combine(::testing::Values("argon12"),
480                            ::testing::Values("md", "md-vv"),
481                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
482                            ::testing::Values("no"),
483                            ::testing::Values(MdpParameterDatabase::Default)));
484
485 INSTANTIATE_TEST_SUITE_P(
486         NPH,
487         MdrunNoAppendContinuationIsExact,
488         ::testing::Combine(::testing::Values("argon12"),
489                            ::testing::Values("md", "md-vv"),
490                            ::testing::Values("no"),
491                            ::testing::Values("berendsen", "parrinello-rahman", "C-rescale"),
492                            ::testing::Values(MdpParameterDatabase::Default)));
493
494 INSTANTIATE_TEST_SUITE_P(
495         NPT,
496         MdrunNoAppendContinuationIsExact,
497         ::testing::Combine(::testing::Values("argon12"),
498                            ::testing::Values("md", "md-vv"),
499                            ::testing::Values("berendsen", "v-rescale", "nose-hoover"),
500                            ::testing::Values("berendsen", "parrinello-rahman", "C-rescale"),
501                            ::testing::Values(MdpParameterDatabase::Default)));
502
503 INSTANTIATE_TEST_SUITE_P(MTTK,
504                          MdrunNoAppendContinuationIsExact,
505                          ::testing::Combine(::testing::Values("argon12"),
506                                             ::testing::Values("md-vv"),
507                                             ::testing::Values("nose-hoover"),
508                                             ::testing::Values("mttk"),
509                                             ::testing::Values(MdpParameterDatabase::Default)));
510
511 INSTANTIATE_TEST_SUITE_P(Pull,
512                          MdrunNoAppendContinuationIsExact,
513                          ::testing::Combine(::testing::Values("spc2"),
514                                             ::testing::Values("md", "md-vv"),
515                                             ::testing::Values("no"),
516                                             ::testing::Values("no"),
517                                             ::testing::Values(MdpParameterDatabase::Pull)));
518
519 INSTANTIATE_TEST_SUITE_P(Awh,
520                          MdrunNoAppendContinuationIsExact,
521                          ::testing::Combine(::testing::Values("alanine_vacuo"),
522                                             ::testing::Values("md", "md-vv"),
523                                             ::testing::Values("v-rescale"),
524                                             ::testing::Values("no"),
525                                             ::testing::Values(MdpParameterDatabase::Awh)));
526
527 #else
528 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MdrunNoAppendContinuationIsExact);
529 #endif
530
531 } // namespace
532 } // namespace test
533 } // namespace gmx