2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012 by the GROMACS development team.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
42 #include "checkpoint.h"
51 #include "buildinfo.h"
52 #include "gromacs/fileio/filetypes.h"
53 #include "gromacs/fileio/gmxfio.h"
54 #include "gromacs/fileio/gmxfio_xdr.h"
55 #include "gromacs/fileio/xdr_datatype.h"
56 #include "gromacs/fileio/xdrf.h"
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/math/vecdump.h"
60 #include "gromacs/math/vectypes.h"
61 #include "gromacs/mdtypes/awh_correlation_history.h"
62 #include "gromacs/mdtypes/awh_history.h"
63 #include "gromacs/mdtypes/checkpointdata.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/mdtypes/df_history.h"
66 #include "gromacs/mdtypes/edsamhistory.h"
67 #include "gromacs/mdtypes/energyhistory.h"
68 #include "gromacs/mdtypes/inputrec.h"
69 #include "gromacs/mdtypes/md_enums.h"
70 #include "gromacs/mdtypes/mdrunoptions.h"
71 #include "gromacs/mdtypes/observableshistory.h"
72 #include "gromacs/mdtypes/pullhistory.h"
73 #include "gromacs/mdtypes/state.h"
74 #include "gromacs/mdtypes/swaphistory.h"
75 #include "gromacs/trajectory/trajectoryframe.h"
76 #include "gromacs/utility/arrayref.h"
77 #include "gromacs/utility/baseversion.h"
78 #include "gromacs/utility/cstringutil.h"
79 #include "gromacs/utility/fatalerror.h"
80 #include "gromacs/utility/futil.h"
81 #include "gromacs/utility/gmxassert.h"
82 #include "gromacs/utility/int64_to_int.h"
83 #include "gromacs/utility/keyvaluetree.h"
84 #include "gromacs/utility/keyvaluetreebuilder.h"
85 #include "gromacs/utility/keyvaluetreeserializer.h"
86 #include "gromacs/utility/mdmodulenotification.h"
87 #include "gromacs/utility/programcontext.h"
88 #include "gromacs/utility/smalloc.h"
89 #include "gromacs/utility/sysinfo.h"
90 #include "gromacs/utility/txtdump.h"
92 #define CPT_MAGIC1 171817
93 #define CPT_MAGIC2 171819
98 template<typename ValueType>
99 void readKvtCheckpointValue(compat::not_null<ValueType*> value,
100 const std::string& name,
101 const std::string& identifier,
102 const KeyValueTreeObject& kvt)
104 const std::string key = identifier + "-" + name;
105 if (!kvt.keyExists(key))
107 std::string errorMessage = "Cannot read requested checkpoint value " + key + " .";
108 GMX_THROW(InternalError(errorMessage));
110 *value = kvt[key].cast<ValueType>();
113 template void readKvtCheckpointValue(compat::not_null<std::int64_t*> value,
114 const std::string& name,
115 const std::string& identifier,
116 const KeyValueTreeObject& kvt);
117 template void readKvtCheckpointValue(compat::not_null<real*> value,
118 const std::string& name,
119 const std::string& identifier,
120 const KeyValueTreeObject& kvt);
122 template<typename ValueType>
123 void writeKvtCheckpointValue(const ValueType& value,
124 const std::string& name,
125 const std::string& identifier,
126 KeyValueTreeObjectBuilder kvtBuilder)
128 kvtBuilder.addValue<ValueType>(identifier + "-" + name, value);
131 template void writeKvtCheckpointValue(const std::int64_t& value,
132 const std::string& name,
133 const std::string& identifier,
134 KeyValueTreeObjectBuilder kvtBuilder);
135 template void writeKvtCheckpointValue(const real& value,
136 const std::string& name,
137 const std::string& identifier,
138 KeyValueTreeObjectBuilder kvtBuilder);
143 /*! \brief Enum of values that describe the contents of a cpt file
144 * whose format matches a version number
146 * The enum helps the code be more self-documenting and ensure merges
147 * do not silently resolve when two patches make the same bump. When
148 * adding new functionality, add a new element just above cptv_Count
149 * in this enumeration, and write code below that does the right thing
150 * according to the value of file_version.
154 cptv_Unknown = 17, /**< Version before numbering scheme */
155 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
156 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
157 cptv_PullAverage, /**< Added possibility to output average pull force and position */
158 cptv_MdModules, /**< Added checkpointing for MdModules */
159 cptv_ModularSimulator, /**< Added checkpointing for modular simulator */
160 cptv_Count /**< the total number of cptv versions */
163 /*! \brief Version number of the file format written to checkpoint
164 * files by this version of the code.
166 * cpt_version should normally only be changed, via adding a new field
167 * to cptv enumeration, when the header or footer format changes.
169 * The state data format itself is backward and forward compatible.
170 * But old code can not read a new entry that is present in the file
171 * (but can read a new format when new entries are not present).
173 * The cpt_version increases whenever the file format in the main
174 * development branch changes, due to an extension of the cptv enum above.
175 * Backward compatibility for reading old run input files is maintained
176 * by checking this version number against that of the file and then using
177 * the correct code path. */
178 static const int cpt_version = cptv_Count - 1;
181 const char* est_names[estNR] = { "FE-lambda",
187 "thermostat-integral",
192 "LD-rng-unsupported",
193 "LD-rng-i-unsupported",
206 "MC-rng-unsupported",
207 "MC-rng-i-unsupported",
208 "barostat-integral" };
225 static const char* eeks_names[eeksNR] = { "Ekin_n", "Ekinh", "dEkindlambda",
226 "mv_cos", "Ekinf", "Ekinh_old",
227 "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC",
239 eenhENERGY_NSTEPS_SIM,
240 eenhENERGY_DELTA_H_NN,
241 eenhENERGY_DELTA_H_LIST,
242 eenhENERGY_DELTA_H_STARTTIME,
243 eenhENERGY_DELTA_H_STARTLAMBDA,
249 epullhPULL_NUMCOORDINATES,
250 epullhPULL_NUMGROUPS,
251 epullhPULL_NUMVALUESINXSUM,
252 epullhPULL_NUMVALUESINFSUM,
258 epullcoordh_VALUE_REF_SUM,
259 epullcoordh_VALUE_SUM,
260 epullcoordh_DR01_SUM,
261 epullcoordh_DR23_SUM,
262 epullcoordh_DR45_SUM,
263 epullcoordh_FSCAL_SUM,
264 epullcoordh_DYNAX_SUM,
274 static const char* eenh_names[eenhNR] = { "energy_n",
283 "energy_delta_h_list",
284 "energy_delta_h_start_time",
285 "energy_delta_h_start_lambda" };
287 static const char* ePullhNames[epullhNR] = { "pullhistory_numcoordinates",
288 "pullhistory_numgroups",
289 "pullhistory_numvaluesinxsum",
290 "pullhistory_numvaluesinfsum" };
292 /* free energy history variables -- need to be preserved over checkpoint */
311 /* free energy history variable names */
312 static const char* edfh_names[edfhNR] = { "bEquilibrated",
314 "Wang-Landau Histogram",
322 "accumulated_plus_2",
323 "accumulated_minus_2",
327 /* AWH biasing history variables */
331 eawhhEQUILIBRATEHISTOGRAM,
335 eawhhUMBRELLAGRIDPOINT,
337 eawhhLOGSCALEDSAMPLEWEIGHT,
339 eawhhFORCECORRELATIONGRID,
343 static const char* eawhh_names[eawhhNR] = { "awh_in_initial", "awh_equilibrateHistogram",
344 "awh_histsize", "awh_npoints",
345 "awh_coordpoint", "awh_umbrellaGridpoint",
346 "awh_updatelist", "awh_logScaledSampleWeight",
347 "awh_numupdates", "awh_forceCorrelationGrid" };
355 static const char* epull_prev_step_com_names[epullsNR] = { "Pull groups prev step COM" };
358 //! Higher level vector element type, only used for formatting checkpoint dumps
359 enum class CptElementType
361 integer, //!< integer
362 real, //!< float or double, not linked to precision of type real
363 real3, //!< float[3] or double[3], not linked to precision of type real
364 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
367 //! \brief Parts of the checkpoint state, only used for reporting
370 microState, //!< The microstate of the simulated system
371 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
372 energyHistory, //!< Energy observable statistics
373 freeEnergyHistory, //!< Free-energy state and observable statistics
374 accWeightHistogram, //!< Accelerated weight histogram method state
375 pullState, //!< COM of previous step.
376 pullHistory //!< Pull history statistics (sums since last written output)
379 //! \brief Return the name of a checkpoint entry based on part and part entry
380 static const char* entryName(StatePart part, int ecpt)
384 case StatePart::microState: return est_names[ecpt];
385 case StatePart::kineticEnergy: return eeks_names[ecpt];
386 case StatePart::energyHistory: return eenh_names[ecpt];
387 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
388 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
389 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
390 case StatePart::pullHistory: return ePullhNames[ecpt];
396 static void cp_warning(FILE* fp)
398 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
401 [[noreturn]] static void cp_error()
403 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
406 static void do_cpt_string_err(XDR* xd, const char* desc, gmx::ArrayRef<char> s, FILE* list)
408 char* data = s.data();
409 if (xdr_string(xd, &data, s.size()) == 0)
415 fprintf(list, "%s = %s\n", desc, data);
419 static int do_cpt_int(XDR* xd, const char* desc, int* i, FILE* list)
421 if (xdr_int(xd, i) == 0)
427 fprintf(list, "%s = %d\n", desc, *i);
432 static int do_cpt_u_chars(XDR* xd, const char* desc, int n, unsigned char* i, FILE* list)
436 fprintf(list, "%s = ", desc);
439 for (int j = 0; j < n && res; j++)
441 res &= xdr_u_char(xd, &i[j]);
444 fprintf(list, "%02x", i[j]);
459 static void do_cpt_int_err(XDR* xd, const char* desc, int* i, FILE* list)
461 if (do_cpt_int(xd, desc, i, list) < 0)
467 static void do_cpt_bool_err(XDR* xd, const char* desc, bool* b, FILE* list)
469 int i = static_cast<int>(*b);
471 if (do_cpt_int(xd, desc, &i, list) < 0)
479 static void do_cpt_step_err(XDR* xd, const char* desc, int64_t* i, FILE* list)
481 char buf[STEPSTRSIZE];
483 if (xdr_int64(xd, i) == 0)
489 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
493 static void do_cpt_double_err(XDR* xd, const char* desc, double* f, FILE* list)
495 if (xdr_double(xd, f) == 0)
501 fprintf(list, "%s = %f\n", desc, *f);
505 static void do_cpt_real_err(XDR* xd, real* f)
508 bool_t res = xdr_double(xd, f);
510 bool_t res = xdr_float(xd, f);
518 static void do_cpt_n_rvecs_err(XDR* xd, const char* desc, int n, rvec f[], FILE* list)
520 for (int i = 0; i < n; i++)
522 for (int j = 0; j < DIM; j++)
524 do_cpt_real_err(xd, &f[i][j]);
530 pr_rvecs(list, 0, desc, f, n);
542 static const int value = xdr_datatype_int;
546 struct xdr_type<float>
548 static const int value = xdr_datatype_float;
552 struct xdr_type<double>
554 static const int value = xdr_datatype_double;
557 //! \brief Returns size in byte of an xdr_datatype
558 static inline unsigned int sizeOfXdrType(int xdrType)
562 case xdr_datatype_int: return sizeof(int);
563 case xdr_datatype_float: return sizeof(float);
564 case xdr_datatype_double: return sizeof(double);
565 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
571 //! \brief Returns the XDR process function for i/o of an XDR type
572 static inline xdrproc_t xdrProc(int xdrType)
576 case xdr_datatype_int: return reinterpret_cast<xdrproc_t>(xdr_int);
577 case xdr_datatype_float: return reinterpret_cast<xdrproc_t>(xdr_float);
578 case xdr_datatype_double: return reinterpret_cast<xdrproc_t>(xdr_double);
579 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
585 /*! \brief Lists or only reads an xdr vector from checkpoint file
587 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
588 * The header for the print is set by \p part and \p ecpt.
589 * The formatting of the printing is set by \p cptElementType.
590 * When list==NULL only reads the elements.
592 static bool_t listXdrVector(XDR* xd, StatePart part, int ecpt, int nf, int xdrType, FILE* list, CptElementType cptElementType)
596 const unsigned int elemSize = sizeOfXdrType(xdrType);
597 std::vector<char> data(nf * elemSize);
598 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
604 case xdr_datatype_int:
605 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int*>(data.data()), nf, TRUE);
607 case xdr_datatype_float:
609 if (cptElementType == CptElementType::real3)
611 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec*>(data.data()), nf / 3);
616 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
617 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float*>(data.data()), nf, TRUE);
620 case xdr_datatype_double:
622 if (cptElementType == CptElementType::real3)
624 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec*>(data.data()), nf / 3);
629 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
630 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double*>(data.data()), nf, TRUE);
633 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
640 //! \brief Convert a double array, typed char*, to float
641 gmx_unused static void convertArrayRealPrecision(const char* c, float* v, int n)
643 const double* d = reinterpret_cast<const double*>(c);
644 for (int i = 0; i < n; i++)
646 v[i] = static_cast<float>(d[i]);
650 //! \brief Convert a float array, typed char*, to double
651 static void convertArrayRealPrecision(const char* c, double* v, int n)
653 const float* f = reinterpret_cast<const float*>(c);
654 for (int i = 0; i < n; i++)
656 v[i] = static_cast<double>(f[i]);
660 //! \brief Generate an error for trying to convert to integer
661 static void convertArrayRealPrecision(const char gmx_unused* c, int gmx_unused* v, int gmx_unused n)
663 GMX_RELEASE_ASSERT(false,
664 "We only expect type mismatches between float and double, not integer");
667 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
669 * This is the only routine that does the actually i/o of real vector,
670 * all other routines are intermediate level routines for specific real
671 * data types, calling this routine.
672 * Currently this routine is (too) complex, since it handles both real *
673 * and std::vector<real>. Using real * is deprecated and this routine
674 * will simplify a lot when only std::vector needs to be supported.
676 * The routine is generic to vectors with different allocators,
677 * because as part of reading a checkpoint there are vectors whose
678 * size is not known until reading has progressed far enough, so a
679 * resize method must be called.
681 * When not listing, we use either v or vector, depending on which is !=NULL.
682 * If nval >= 0, nval is used; on read this should match the passed value.
683 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
684 * the value is stored in nptr
686 template<typename T, typename AllocatorType>
687 static int doVectorLow(XDR* xd,
694 std::vector<T, AllocatorType>* vector,
696 CptElementType cptElementType)
698 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr)
699 || (v == nullptr && vector != nullptr),
700 "Without list, we should have exactly one of v and vector != NULL");
704 int numElemInTheFile;
709 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
710 numElemInTheFile = nval;
716 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
717 numElemInTheFile = *nptr;
721 numElemInTheFile = vector->size();
725 /* Read/write the vector element count */
726 res = xdr_int(xd, &numElemInTheFile);
731 /* Read/write the element data type */
732 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
733 int xdrTypeInTheFile = xdrTypeInTheCode;
734 res = xdr_int(xd, &xdrTypeInTheFile);
740 if (list == nullptr && (sflags & (1 << ecpt)))
744 if (numElemInTheFile != nval)
747 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
748 entryName(part, ecpt),
753 else if (nptr != nullptr)
755 *nptr = numElemInTheFile;
758 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
763 "mismatch for state entry %s, code precision is %s, file precision is %s",
764 entryName(part, ecpt),
765 xdr_datatype_names[xdrTypeInTheCode],
766 xdr_datatype_names[xdrTypeInTheFile]);
768 /* Matching int and real should never occur, but check anyhow */
769 if (xdrTypeInTheFile == xdr_datatype_int || xdrTypeInTheCode == xdr_datatype_int)
772 "Type %s: incompatible checkpoint formats or corrupted checkpoint file.",
782 snew(*v, numElemInTheFile);
788 /* This conditional ensures that we don't resize on write.
789 * In particular in the state where this code was written
790 * vector has a size of numElemInThefile and we
791 * don't want to lose that padding here.
793 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
795 vector->resize(numElemInTheFile);
803 vChar = reinterpret_cast<char*>(vp);
807 snew(vChar, numElemInTheFile * sizeOfXdrType(xdrTypeInTheFile));
810 xd, vChar, numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile), xdrProc(xdrTypeInTheFile));
818 /* In the old code float-double conversion came for free.
819 * In the new code we still support it, mainly because
820 * the tip4p_continue regression test makes use of this.
821 * It's an open question if we do or don't want to allow this.
823 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
829 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile, list, cptElementType);
835 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
838 doVector(XDR* xd, StatePart part, int ecpt, int sflags, std::vector<T>* vector, FILE* list, int numElements = -1)
840 return doVectorLow<T>(
841 xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list, CptElementType::real);
844 //! \brief Read/Write an ArrayRef<real>.
845 static int doRealArrayRef(XDR* xd, StatePart part, int ecpt, int sflags, gmx::ArrayRef<real> vector, FILE* list)
847 real* v_real = vector.data();
848 return doVectorLow<real, std::allocator<real>>(
849 xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
852 //! Convert from view of RVec to view of real.
853 static gmx::ArrayRef<real> realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
855 return gmx::arrayRefFromArray<real>(reinterpret_cast<real*>(ofRVecs.data()), ofRVecs.size() * DIM);
858 //! \brief Read/Write a PaddedVector whose value_type is RVec.
859 template<typename PaddedVectorOfRVecType>
861 doRvecVector(XDR* xd, StatePart part, int ecpt, int sflags, PaddedVectorOfRVecType* v, int numAtoms, FILE* list)
863 const int numReals = numAtoms * DIM;
868 sflags & (1 << ecpt),
869 "When not listing, the flag for the entry should be set when requesting i/o");
870 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
872 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
876 // Use the rebind facility to change the value_type of the
877 // allocator from RVec to real.
878 using realAllocator =
879 typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
880 return doVectorLow<real, realAllocator>(
881 xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
885 /* This function stores n along with the reals for reading,
886 * but on reading it assumes that n matches the value in the checkpoint file,
887 * a fatal error is generated when this is not the case.
889 static int do_cpte_reals(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
891 return doVectorLow<real, std::allocator<real>>(
892 xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
895 /* This function does the same as do_cpte_reals,
896 * except that on reading it ignores the passed value of *n
897 * and stores the value read from the checkpoint file in *n.
899 static int do_cpte_n_reals(XDR* xd, StatePart part, int ecpt, int sflags, int* n, real** v, FILE* list)
901 return doVectorLow<real, std::allocator<real>>(
902 xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
905 static int do_cpte_real(XDR* xd, StatePart part, int ecpt, int sflags, real* r, FILE* list)
907 return doVectorLow<real, std::allocator<real>>(
908 xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
911 static int do_cpte_ints(XDR* xd, StatePart part, int ecpt, int sflags, int n, int** v, FILE* list)
913 return doVectorLow<int, std::allocator<int>>(
914 xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
917 static int do_cpte_int(XDR* xd, StatePart part, int ecpt, int sflags, int* i, FILE* list)
919 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
922 static int do_cpte_bool(XDR* xd, StatePart part, int ecpt, int sflags, bool* b, FILE* list)
924 int i = static_cast<int>(*b);
925 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
930 static int do_cpte_doubles(XDR* xd, StatePart part, int ecpt, int sflags, int n, double** v, FILE* list)
932 return doVectorLow<double, std::allocator<double>>(
933 xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
936 static int do_cpte_double(XDR* xd, StatePart part, int ecpt, int sflags, double* r, FILE* list)
938 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
941 static int do_cpte_matrix(XDR* xd, StatePart part, int ecpt, int sflags, matrix v, FILE* list)
947 ret = doVectorLow<real, std::allocator<real>>(
948 xd, part, ecpt, sflags, DIM * DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
950 if (list && ret == 0)
952 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
959 static int do_cpte_nmatrix(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
963 char name[CPTSTRLEN];
970 for (i = 0; i < n; i++)
972 reti = doVectorLow<real, std::allocator<real>>(
973 xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
974 if (list && reti == 0)
976 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
977 pr_reals(list, 0, name, v[i], n);
987 static int do_cpte_matrices(XDR* xd, StatePart part, int ecpt, int sflags, int n, matrix** v, FILE* list)
990 matrix *vp, *va = nullptr;
996 res = xdr_int(xd, &nf);
1001 if (list == nullptr && nf != n)
1004 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
1005 entryName(part, ecpt),
1009 if (list || !(sflags & (1 << ecpt)))
1022 snew(vr, nf * DIM * DIM);
1023 for (i = 0; i < nf; i++)
1025 for (j = 0; j < DIM; j++)
1027 for (k = 0; k < DIM; k++)
1029 vr[(i * DIM + j) * DIM + k] = vp[i][j][k];
1033 ret = doVectorLow<real, std::allocator<real>>(
1034 xd, part, ecpt, sflags, nf * DIM * DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
1035 for (i = 0; i < nf; i++)
1037 for (j = 0; j < DIM; j++)
1039 for (k = 0; k < DIM; k++)
1041 vp[i][j][k] = vr[(i * DIM + j) * DIM + k];
1047 if (list && ret == 0)
1049 for (i = 0; i < nf; i++)
1051 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
1062 static void do_cpt_header(XDR* xd, gmx_bool bRead, FILE* list, CheckpointHeaderContents* contents)
1075 res = xdr_int(xd, &magic);
1079 "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
1081 if (magic != CPT_MAGIC1)
1084 "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
1085 "The checkpoint file is corrupted or not a checkpoint file",
1092 gmx_gethostname(fhost, 255);
1094 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
1095 // The following fields are no longer ever written with meaningful
1096 // content, but because they precede the file version, there is no
1097 // good way for new code to read the old and new formats, nor a
1098 // good way for old code to avoid giving an error while reading a
1099 // new format. So we read and write a field that no longer has a
1101 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
1102 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
1103 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
1104 do_cpt_string_err(xd, "generating program", contents->fprog, list);
1105 do_cpt_string_err(xd, "generation time", contents->ftime, list);
1106 contents->file_version = cpt_version;
1107 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
1108 if (contents->file_version > cpt_version)
1111 "Attempting to read a checkpoint file of version %d with code of version %d\n",
1112 contents->file_version,
1115 if (contents->file_version >= 13)
1117 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1121 contents->double_prec = -1;
1123 if (contents->file_version >= 12)
1125 do_cpt_string_err(xd, "generating host", fhost, list);
1127 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1128 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1129 if (contents->file_version >= 10)
1131 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1135 contents->nhchainlength = 1;
1137 if (contents->file_version >= 11)
1139 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1143 contents->nnhpres = 0;
1145 if (contents->file_version >= 14)
1147 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1151 contents->nlambda = 0;
1153 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1154 if (contents->file_version >= 3)
1156 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1160 contents->simulation_part = 1;
1162 if (contents->file_version >= 5)
1164 do_cpt_step_err(xd, "step", &contents->step, list);
1169 do_cpt_int_err(xd, "step", &idum, list);
1170 contents->step = static_cast<int64_t>(idum);
1172 do_cpt_double_err(xd, "t", &contents->t, list);
1173 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1174 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1175 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1176 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1177 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1178 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1179 if (contents->file_version >= 4)
1181 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1182 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1186 contents->flags_eks = 0;
1187 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV + 1));
1188 contents->flags_state = (contents->flags_state
1189 & ~((1 << (estORIRE_DTAV + 1)) | (1 << (estORIRE_DTAV + 2))
1190 | (1 << (estORIRE_DTAV + 3))));
1192 if (contents->file_version >= 14)
1194 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1198 contents->flags_dfh = 0;
1201 if (contents->file_version >= 15)
1203 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1210 if (contents->file_version >= 16)
1212 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1216 contents->eSwapCoords = eswapNO;
1219 if (contents->file_version >= 17)
1221 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1225 contents->flags_awhh = 0;
1228 if (contents->file_version >= 18)
1230 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1234 contents->flagsPullHistory = 0;
1237 if (contents->file_version >= cptv_ModularSimulator)
1240 xd, "Is modular simulator checkpoint", &contents->isModularSimulatorCheckpoint, list);
1244 contents->isModularSimulatorCheckpoint = false;
1248 static int do_cpt_footer(XDR* xd, int file_version)
1253 if (file_version >= 2)
1256 res = xdr_int(xd, &magic);
1261 if (magic != CPT_MAGIC2)
1270 static int do_cpt_state(XDR* xd, int fflags, t_state* state, FILE* list)
1273 const StatePart part = StatePart::microState;
1274 const int sflags = state->flags;
1275 // If reading, state->natoms was probably just read, so
1276 // allocations need to be managed. If writing, this won't change
1277 // anything that matters.
1278 state_change_natoms(state, state->natoms);
1279 for (int i = 0; (i < estNR && ret == 0); i++)
1281 if (fflags & (1 << i))
1286 ret = doRealArrayRef(
1291 gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()),
1295 ret = do_cpte_int(xd, part, i, sflags, &state->fep_state, list);
1297 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1299 ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list);
1301 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1303 ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list);
1306 ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list);
1309 ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list);
1312 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list);
1315 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list);
1318 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list);
1321 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list);
1324 ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list);
1327 ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list);
1329 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1330 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1332 ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list);
1335 ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list);
1337 /* The RNG entries are no longer written,
1338 * the next 4 lines are only for reading old files.
1339 * It's OK that three case statements fall through.
1341 case estLD_RNG_NOTSUPPORTED:
1342 case estLD_RNGI_NOTSUPPORTED:
1343 case estMC_RNG_NOTSUPPORTED:
1344 case estMC_RNGI_NOTSUPPORTED:
1345 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1347 case estDISRE_INITF:
1348 ret = do_cpte_real(xd, part, i, sflags, &state->hist.disre_initf, list);
1350 case estDISRE_RM3TAV:
1351 ret = do_cpte_n_reals(
1352 xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list);
1354 case estORIRE_INITF:
1355 ret = do_cpte_real(xd, part, i, sflags, &state->hist.orire_initf, list);
1358 ret = do_cpte_n_reals(
1359 xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list);
1361 case estPULLCOMPREVSTEP:
1362 ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list);
1366 "Unknown state entry %d\n"
1367 "You are reading a checkpoint file written by different code, which "
1376 static int do_cpt_ekinstate(XDR* xd, int fflags, ekinstate_t* ekins, FILE* list)
1380 const StatePart part = StatePart::kineticEnergy;
1381 for (int i = 0; (i < eeksNR && ret == 0); i++)
1383 if (fflags & (1 << i))
1389 ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list);
1392 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list);
1395 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list);
1398 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list);
1401 ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list);
1403 case eeksEKINSCALEF:
1404 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list);
1407 ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list);
1409 case eeksEKINSCALEH:
1410 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list);
1413 ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list);
1415 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1418 "Unknown ekin data state entry %d\n"
1419 "You are probably reading a new checkpoint file with old code",
1429 static int do_cpt_swapstate(XDR* xd, gmx_bool bRead, int eSwapCoords, swaphistory_t* swapstate, FILE* list)
1431 int swap_cpt_version = 2;
1433 if (eSwapCoords == eswapNO)
1438 swapstate->bFromCpt = bRead;
1439 swapstate->eSwapCoords = eSwapCoords;
1441 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1442 if (bRead && swap_cpt_version < 2)
1445 "Cannot read checkpoint files that were written with old versions"
1446 "of the ion/water position swapping protocol.\n");
1449 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1451 /* When reading, init_swapcoords has not been called yet,
1452 * so we have to allocate memory first. */
1453 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1456 snew(swapstate->ionType, swapstate->nIonTypes);
1459 for (int ic = 0; ic < eCompNR; ic++)
1461 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1463 swapstateIons_t* gs = &swapstate->ionType[ii];
1467 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1471 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1476 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1480 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1483 if (bRead && (nullptr == gs->nMolPast[ic]))
1485 snew(gs->nMolPast[ic], swapstate->nAverage);
1488 for (int j = 0; j < swapstate->nAverage; j++)
1492 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1496 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1502 /* Ion flux per channel */
1503 for (int ic = 0; ic < eChanNR; ic++)
1505 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1507 swapstateIons_t* gs = &swapstate->ionType[ii];
1511 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1515 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1520 /* Ion flux leakage */
1523 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1527 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1531 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1533 swapstateIons_t* gs = &swapstate->ionType[ii];
1535 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1539 snew(gs->channel_label, gs->nMol);
1540 snew(gs->comp_from, gs->nMol);
1543 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1544 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1547 /* Save the last known whole positions to checkpoint
1548 * file to be able to also make multimeric channels whole in PBC */
1549 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1550 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1553 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1554 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1555 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1556 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1561 xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1563 xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1570 static int do_cpt_enerhist(XDR* xd, gmx_bool bRead, int fflags, energyhistory_t* enerhist, FILE* list)
1579 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1581 /* This is stored/read for backward compatibility */
1582 int energyHistoryNumEnergies = 0;
1585 enerhist->nsteps = 0;
1587 enerhist->nsteps_sim = 0;
1588 enerhist->nsum_sim = 0;
1590 else if (enerhist != nullptr)
1592 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1595 delta_h_history_t* deltaH = enerhist->deltaHForeignLambdas.get();
1596 const StatePart part = StatePart::energyHistory;
1597 for (int i = 0; (i < eenhNR && ret == 0); i++)
1599 if (fflags & (1 << i))
1604 ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list);
1606 case eenhENERGY_AVER:
1607 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list);
1609 case eenhENERGY_SUM:
1610 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list);
1612 case eenhENERGY_NSUM:
1613 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list);
1615 case eenhENERGY_SUM_SIM:
1616 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list);
1618 case eenhENERGY_NSUM_SIM:
1619 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list);
1621 case eenhENERGY_NSTEPS:
1622 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list);
1624 case eenhENERGY_NSTEPS_SIM:
1625 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list);
1627 case eenhENERGY_DELTA_H_NN:
1630 if (!bRead && deltaH != nullptr)
1632 numDeltaH = deltaH->dh.size();
1634 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1637 if (deltaH == nullptr)
1639 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1640 deltaH = enerhist->deltaHForeignLambdas.get();
1642 deltaH->dh.resize(numDeltaH);
1643 deltaH->start_lambda_set = FALSE;
1647 case eenhENERGY_DELTA_H_LIST:
1648 for (auto dh : deltaH->dh)
1650 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1653 case eenhENERGY_DELTA_H_STARTTIME:
1654 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list);
1656 case eenhENERGY_DELTA_H_STARTLAMBDA:
1657 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list);
1661 "Unknown energy history entry %d\n"
1662 "You are probably reading a new checkpoint file with old code",
1668 if ((fflags & (1 << eenhENERGY_SUM)) && !(fflags & (1 << eenhENERGY_SUM_SIM)))
1670 /* Assume we have an old file format and copy sum to sum_sim */
1671 enerhist->ener_sum_sim = enerhist->ener_sum;
1674 if ((fflags & (1 << eenhENERGY_NSUM)) && !(fflags & (1 << eenhENERGY_NSTEPS)))
1676 /* Assume we have an old file format and copy nsum to nsteps */
1677 enerhist->nsteps = enerhist->nsum;
1679 if ((fflags & (1 << eenhENERGY_NSUM_SIM)) && !(fflags & (1 << eenhENERGY_NSTEPS_SIM)))
1681 /* Assume we have an old file format and copy nsum to nsteps */
1682 enerhist->nsteps_sim = enerhist->nsum_sim;
1688 static int doCptPullCoordHist(XDR* xd, PullCoordinateHistory* pullCoordHist, const StatePart part, FILE* list)
1693 flags |= ((1 << epullcoordh_VALUE_REF_SUM) | (1 << epullcoordh_VALUE_SUM)
1694 | (1 << epullcoordh_DR01_SUM) | (1 << epullcoordh_DR23_SUM)
1695 | (1 << epullcoordh_DR45_SUM) | (1 << epullcoordh_FSCAL_SUM));
1697 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1701 case epullcoordh_VALUE_REF_SUM:
1702 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list);
1704 case epullcoordh_VALUE_SUM:
1705 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list);
1707 case epullcoordh_DR01_SUM:
1708 for (int j = 0; j < DIM && ret == 0; j++)
1710 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1713 case epullcoordh_DR23_SUM:
1714 for (int j = 0; j < DIM && ret == 0; j++)
1716 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1719 case epullcoordh_DR45_SUM:
1720 for (int j = 0; j < DIM && ret == 0; j++)
1722 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1725 case epullcoordh_FSCAL_SUM:
1726 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list);
1728 case epullcoordh_DYNAX_SUM:
1729 for (int j = 0; j < DIM && ret == 0; j++)
1731 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1740 static int doCptPullGroupHist(XDR* xd, PullGroupHistory* pullGroupHist, const StatePart part, FILE* list)
1745 flags |= ((1 << epullgrouph_X_SUM));
1747 for (int i = 0; i < epullgrouph_NR; i++)
1751 case epullgrouph_X_SUM:
1752 for (int j = 0; j < DIM && ret == 0; j++)
1754 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1764 static int doCptPullHist(XDR* xd, gmx_bool bRead, int fflags, PullHistory* pullHist, const StatePart part, FILE* list)
1767 int pullHistoryNumCoordinates = 0;
1768 int pullHistoryNumGroups = 0;
1770 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1771 * average pull forces and coordinates) in the pull history, in temporary variables,
1772 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1775 pullHist->numValuesInXSum = 0;
1776 pullHist->numValuesInFSum = 0;
1778 else if (pullHist != nullptr)
1780 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1781 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1785 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1788 for (int i = 0; (i < epullhNR && ret == 0); i++)
1790 if (fflags & (1 << i))
1794 case epullhPULL_NUMCOORDINATES:
1795 ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list);
1797 case epullhPULL_NUMGROUPS:
1798 do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list);
1800 case epullhPULL_NUMVALUESINXSUM:
1801 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list);
1803 case epullhPULL_NUMVALUESINFSUM:
1804 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list);
1808 "Unknown pull history entry %d\n"
1809 "You are probably reading a new checkpoint file with old code",
1816 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1817 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1819 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1821 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1823 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1825 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1827 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1834 static int do_cpt_df_hist(XDR* xd, int fflags, int nlambda, df_history_t** dfhistPtr, FILE* list)
1843 if (*dfhistPtr == nullptr)
1845 snew(*dfhistPtr, 1);
1846 (*dfhistPtr)->nlambda = nlambda;
1847 init_df_history(*dfhistPtr, nlambda);
1849 df_history_t* dfhist = *dfhistPtr;
1851 const StatePart part = StatePart::freeEnergyHistory;
1852 for (int i = 0; (i < edfhNR && ret == 0); i++)
1854 if (fflags & (1 << i))
1859 ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list);
1862 ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list);
1865 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list);
1868 ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list);
1870 case edfhSUMWEIGHTS:
1871 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list);
1874 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list);
1877 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list);
1880 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list);
1883 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list);
1886 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list);
1889 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list);
1892 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list);
1895 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list);
1898 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list);
1903 "Unknown df history entry %d\n"
1904 "You are probably reading a new checkpoint file with old code",
1914 /* This function stores the last whole configuration of the reference and
1915 * average structure in the .cpt file
1917 static int do_cpt_EDstate(XDR* xd, gmx_bool bRead, int nED, edsamhistory_t* EDstate, FILE* list)
1924 EDstate->bFromCpt = bRead;
1927 /* When reading, init_edsam has not been called yet,
1928 * so we have to allocate memory first. */
1931 snew(EDstate->nref, EDstate->nED);
1932 snew(EDstate->old_sref, EDstate->nED);
1933 snew(EDstate->nav, EDstate->nED);
1934 snew(EDstate->old_sav, EDstate->nED);
1937 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1938 for (int i = 0; i < EDstate->nED; i++)
1942 /* Reference structure SREF */
1943 sprintf(buf, "ED%d # of atoms in reference structure", i + 1);
1944 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1945 sprintf(buf, "ED%d x_ref", i + 1);
1948 snew(EDstate->old_sref[i], EDstate->nref[i]);
1949 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1953 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1956 /* Average structure SAV */
1957 sprintf(buf, "ED%d # of atoms in average structure", i + 1);
1958 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1959 sprintf(buf, "ED%d x_av", i + 1);
1962 snew(EDstate->old_sav[i], EDstate->nav[i]);
1963 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1967 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1974 static int do_cpt_correlation_grid(XDR* xd,
1976 gmx_unused int fflags,
1977 gmx::CorrelationGridHistory* corrGrid,
1983 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1984 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1985 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1989 initCorrelationGridHistory(
1990 corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1993 for (gmx::CorrelationBlockDataHistory& blockData : corrGrid->blockDataBuffer)
1995 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1996 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1997 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1998 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1999 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
2000 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
2001 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
2002 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
2003 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
2004 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
2005 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
2011 static int do_cpt_awh_bias(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhBiasHistory* biasHistory, FILE* list)
2015 gmx::AwhBiasStateHistory* state = &biasHistory->state;
2016 for (int i = 0; (i < eawhhNR && ret == 0); i++)
2018 if (fflags & (1 << i))
2022 case eawhhIN_INITIAL:
2023 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list);
2025 case eawhhEQUILIBRATEHISTOGRAM:
2026 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list);
2029 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list);
2036 numPoints = biasHistory->pointState.size();
2038 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
2041 biasHistory->pointState.resize(numPoints);
2045 case eawhhCOORDPOINT:
2046 for (auto& psh : biasHistory->pointState)
2048 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
2049 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
2050 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
2051 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
2052 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
2053 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
2054 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
2055 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
2056 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
2057 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
2058 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
2061 case eawhhUMBRELLAGRIDPOINT:
2062 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list);
2064 case eawhhUPDATELIST:
2065 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
2066 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
2068 case eawhhLOGSCALEDSAMPLEWEIGHT:
2069 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
2070 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
2072 case eawhhNUMUPDATES:
2073 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
2075 case eawhhFORCECORRELATIONGRID:
2076 ret = do_cpt_correlation_grid(
2077 xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
2079 default: gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
2087 static int do_cpt_awh(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhHistory* awhHistory, FILE* list)
2093 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
2095 if (awhHistory == nullptr)
2097 GMX_RELEASE_ASSERT(bRead,
2098 "do_cpt_awh should not be called for writing without an AwhHistory");
2100 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
2101 awhHistory = awhHistoryLocal.get();
2104 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
2105 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
2106 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
2107 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
2108 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
2109 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
2111 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
2112 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
2117 numBias = awhHistory->bias.size();
2119 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
2123 awhHistory->bias.resize(numBias);
2125 for (auto& bias : awhHistory->bias)
2127 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
2133 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
2138 static void do_cpt_mdmodules(int fileVersion,
2139 t_fileio* checkpointFileHandle,
2140 const gmx::MdModulesNotifier& mdModulesNotifier)
2142 if (fileVersion >= cptv_MdModules)
2144 gmx::FileIOXdrSerializer serializer(checkpointFileHandle);
2145 gmx::KeyValueTreeObject mdModuleCheckpointParameterTree =
2146 gmx::deserializeKeyValueTree(&serializer);
2147 gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
2148 mdModuleCheckpointParameterTree, fileVersion
2150 mdModulesNotifier.checkpointingNotifications_.notify(mdModuleCheckpointReadingDataOnMaster);
2154 static int do_cpt_files(XDR* xd, gmx_bool bRead, std::vector<gmx_file_position_t>* outputfiles, FILE* list, int file_version)
2157 gmx_off_t mask = 0xFFFFFFFFL;
2158 int offset_high, offset_low;
2159 std::array<char, CPTSTRLEN> buf;
2160 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
2162 // Ensure that reading pre-allocates outputfiles, while writing
2163 // writes what is already there.
2164 int nfiles = outputfiles->size();
2165 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
2171 outputfiles->resize(nfiles);
2174 for (auto& outputfile : *outputfiles)
2176 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
2179 do_cpt_string_err(xd, "output filename", buf, list);
2180 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
2182 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2186 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2190 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32)
2191 | (static_cast<gmx_off_t>(offset_low) & mask);
2195 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
2197 offset = outputfile.offset;
2205 offset_low = static_cast<int>(offset & mask);
2206 offset_high = static_cast<int>((offset >> 32) & mask);
2208 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2212 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2217 if (file_version >= 8)
2219 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize, list) != 0)
2223 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(), outputfile.checksum.data(), list)
2231 outputfile.checksumSize = -1;
2237 void write_checkpoint_data(t_fileio* fp,
2238 CheckpointHeaderContents headerContents,
2242 ObservablesHistory* observablesHistory,
2243 const gmx::MdModulesNotifier& mdModulesNotifier,
2244 std::vector<gmx_file_position_t>* outputfiles,
2245 gmx::WriteCheckpointDataHolder* modularSimulatorCheckpointData)
2247 headerContents.flags_eks = 0;
2248 if (state->ekinstate.bUpToDate)
2250 headerContents.flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF)
2251 | (1 << eeksEKINO) | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH)
2252 | (1 << eeksVSCALE) | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
2254 headerContents.isModularSimulatorCheckpoint = !modularSimulatorCheckpointData->empty();
2256 energyhistory_t* enerhist = observablesHistory->energyHistory.get();
2257 headerContents.flags_enh = 0;
2258 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2260 headerContents.flags_enh |=
2261 (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
2262 if (enerhist->nsum > 0)
2264 headerContents.flags_enh |=
2265 ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
2267 if (enerhist->nsum_sim > 0)
2269 headerContents.flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
2271 if (enerhist->deltaHForeignLambdas != nullptr)
2273 headerContents.flags_enh |=
2274 ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
2275 | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
2279 PullHistory* pullHist = observablesHistory->pullHistory.get();
2280 headerContents.flagsPullHistory = 0;
2281 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2283 headerContents.flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
2284 headerContents.flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
2285 | (1 << epullhPULL_NUMVALUESINFSUM));
2288 headerContents.flags_dfh = 0;
2291 headerContents.flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
2292 | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
2295 headerContents.flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
2297 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
2298 || (elamstats == elamstatsMETROPOLIS))
2300 headerContents.flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
2301 | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
2305 headerContents.flags_awhh = 0;
2306 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2308 headerContents.flags_awhh |=
2309 ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
2310 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
2311 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
2312 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
2315 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2317 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
2318 || (do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr) < 0)
2319 || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, headerContents.flags_enh, enerhist, nullptr) < 0)
2320 || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, headerContents.flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
2322 || (do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr)
2325 gmx_fio_getxdr(fp), FALSE, headerContents.nED, observablesHistory->edsamHistory.get(), nullptr)
2327 || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, headerContents.flags_awhh, state->awhHistory.get(), nullptr) < 0)
2328 || (do_cpt_swapstate(gmx_fio_getxdr(fp),
2330 headerContents.eSwapCoords,
2331 observablesHistory->swapHistory.get(),
2334 || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, outputfiles, nullptr, headerContents.file_version) < 0))
2336 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2339 // Checkpointing MdModules
2341 gmx::KeyValueTreeBuilder builder;
2342 gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
2343 headerContents.file_version };
2344 mdModulesNotifier.checkpointingNotifications_.notify(mdModulesWriteCheckpoint);
2345 auto tree = builder.build();
2346 gmx::FileIOXdrSerializer serializer(fp);
2347 gmx::serializeKeyValueTree(tree, &serializer);
2350 // Checkpointing modular simulator
2352 gmx::FileIOXdrSerializer serializer(fp);
2353 modularSimulatorCheckpointData->serialize(&serializer);
2356 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2358 /* Always FAH checkpoint immediately after a Gromacs checkpoint.
2360 * Note that it is critical that we save a FAH checkpoint directly
2361 * after writing a Gromacs checkpoint. If the program dies, either
2362 * by the machine powering off suddenly or the process being,
2363 * killed, FAH can recover files that have only appended data by
2364 * truncating them to the last recorded length. The Gromacs
2365 * checkpoint does not just append data, it is fully rewritten each
2366 * time so a crash between moving the new Gromacs checkpoint file in
2367 * to place and writing a FAH checkpoint is not recoverable. Thus
2368 * the time between these operations must be kept as short a
2375 static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
2377 bool foundMismatch = (p != f);
2385 fprintf(fplog, " %s mismatch,\n", type);
2386 fprintf(fplog, " current program: %d\n", p);
2387 fprintf(fplog, " checkpoint file: %d\n", f);
2388 fprintf(fplog, "\n");
2392 static void check_string(FILE* fplog, const char* type, const char* p, const char* f, gmx_bool* mm)
2394 bool foundMismatch = (std::strcmp(p, f) != 0);
2402 fprintf(fplog, " %s mismatch,\n", type);
2403 fprintf(fplog, " current program: %s\n", p);
2404 fprintf(fplog, " checkpoint file: %s\n", f);
2405 fprintf(fplog, "\n");
2409 static void check_match(FILE* fplog,
2410 const t_commrec* cr,
2412 const CheckpointHeaderContents& headerContents,
2413 gmx_bool reproducibilityRequested)
2415 /* Note that this check_string on the version will also print a message
2416 * when only the minor version differs. But we only print a warning
2417 * message further down with reproducibilityRequested=TRUE.
2419 gmx_bool versionDiffers = FALSE;
2420 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2422 gmx_bool precisionDiffers = FALSE;
2423 check_int(fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2424 if (precisionDiffers)
2426 const char msg_precision_difference[] =
2427 "You are continuing a simulation with a different precision. Not matching\n"
2428 "single/double precision will lead to precision or performance loss.\n";
2431 fprintf(fplog, "%s\n", msg_precision_difference);
2435 gmx_bool mm = (versionDiffers || precisionDiffers);
2437 if (reproducibilityRequested)
2440 fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2442 check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2445 if (cr->sizeOfDefaultCommunicator > 1 && reproducibilityRequested)
2447 // TODO: These checks are incorrect (see redmine #3309)
2448 check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2450 int npp = cr->sizeOfDefaultCommunicator;
2451 if (cr->npmenodes >= 0)
2453 npp -= cr->npmenodes;
2455 int npp_f = headerContents.nnodes;
2456 if (headerContents.npme >= 0)
2458 npp_f -= headerContents.npme;
2462 check_int(fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2463 check_int(fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2464 check_int(fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2470 /* Gromacs should be able to continue from checkpoints between
2471 * different patch level versions, but we do not guarantee
2472 * compatibility between different major/minor versions - check this.
2476 sscanf(gmx_version(), "%5d", &gmx_major);
2477 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2478 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2480 const char msg_major_version_difference[] =
2481 "The current GROMACS major version is not identical to the one that\n"
2482 "generated the checkpoint file. In principle GROMACS does not support\n"
2483 "continuation from checkpoints between different versions, so we advise\n"
2484 "against this. If you still want to try your luck we recommend that you use\n"
2485 "the -noappend flag to keep your output files from the two versions separate.\n"
2486 "This might also work around errors where the output fields in the energy\n"
2487 "file have changed between the different versions.\n";
2489 const char msg_mismatch_notice[] =
2490 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2491 "Continuation is exact, but not guaranteed to be binary identical.\n";
2493 if (majorVersionDiffers)
2497 fprintf(fplog, "%s\n", msg_major_version_difference);
2500 else if (reproducibilityRequested)
2502 /* Major & minor versions match at least, but something is different. */
2505 fprintf(fplog, "%s\n", msg_mismatch_notice);
2511 static void read_checkpoint(const char* fn,
2513 const t_commrec* cr,
2516 int* init_fep_state,
2517 CheckpointHeaderContents* headerContents,
2519 ObservablesHistory* observablesHistory,
2520 gmx_bool reproducibilityRequested,
2521 const gmx::MdModulesNotifier& mdModulesNotifier,
2522 gmx::ReadCheckpointDataHolder* modularSimulatorCheckpointData,
2523 bool useModularSimulator)
2526 char buf[STEPSTRSIZE];
2529 fp = gmx_fio_open(fn, "r");
2530 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2532 // If we are appending, then we don't want write to the open log
2533 // file because we still need to compute a checksum for it. In
2534 // that case, the filehandle will be nullptr. Otherwise, we report
2535 // to the new log file about the checkpoint file that we are
2537 FILE* fplog = gmx_fio_getfp(logfio);
2540 fprintf(fplog, "\n");
2541 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2542 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2543 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2544 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2545 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2546 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2547 fprintf(fplog, " time: %f\n", headerContents->t);
2548 fprintf(fplog, "\n");
2551 if (headerContents->natoms != state->natoms)
2554 "Checkpoint file is for a system of %d atoms, while the current system consists "
2556 headerContents->natoms,
2559 if (headerContents->ngtc != state->ngtc)
2562 "Checkpoint file is for a system of %d T-coupling groups, while the current "
2563 "system consists of %d T-coupling groups",
2564 headerContents->ngtc,
2567 if (headerContents->nnhpres != state->nnhpres)
2570 "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the "
2571 "current system consists of %d NH-pressure-coupling variables",
2572 headerContents->nnhpres,
2576 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2577 if (headerContents->nlambda != nlambdaHistory)
2580 "Checkpoint file is for a system with %d lambda states, while the current system "
2581 "consists of %d lambda states",
2582 headerContents->nlambda,
2586 init_gtc_state(state,
2589 headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2590 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2592 if (headerContents->eIntegrator != eIntegrator)
2595 "Cannot change integrator during a checkpoint restart. Perhaps you should make a "
2596 "new .tpr with grompp -f new.mdp -t %s",
2600 // For modular simulator, no state object is populated, so we cannot do this check here!
2601 if (headerContents->flags_state != state->flags && !useModularSimulator)
2604 "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
2605 "should make a new .tpr with grompp -f new.mdp -t %s",
2609 GMX_ASSERT(!(headerContents->isModularSimulatorCheckpoint && !useModularSimulator),
2610 "Checkpoint file was written by modular simulator, but the current simulation uses "
2611 "the legacy simulator.");
2612 GMX_ASSERT(!(!headerContents->isModularSimulatorCheckpoint && useModularSimulator),
2613 "Checkpoint file was written by legacy simulator, but the current simulation uses "
2614 "the modular simulator.");
2618 check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
2621 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2622 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it
2623 here. Investigate for 5.0. */
2628 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2633 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1 << eeksEKINH)) != 0)
2634 || ((headerContents->flags_eks & (1 << eeksEKINF)) != 0)
2635 || ((headerContents->flags_eks & (1 << eeksEKINO)) != 0)
2636 || (((headerContents->flags_eks & (1 << eeksEKINSCALEF))
2637 | (headerContents->flags_eks & (1 << eeksEKINSCALEH))
2638 | (headerContents->flags_eks & (1 << eeksVSCALE)))
2641 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2643 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2645 ret = do_cpt_enerhist(
2646 gmx_fio_getxdr(fp), TRUE, headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2652 if (headerContents->flagsPullHistory)
2654 if (observablesHistory->pullHistory == nullptr)
2656 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2658 ret = doCptPullHist(gmx_fio_getxdr(fp),
2660 headerContents->flagsPullHistory,
2661 observablesHistory->pullHistory.get(),
2662 StatePart::pullHistory,
2670 if (headerContents->file_version < 6)
2673 "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2676 ret = do_cpt_df_hist(
2677 gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2683 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2685 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t{});
2687 ret = do_cpt_EDstate(
2688 gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2694 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2696 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2698 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2704 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2706 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t{});
2708 ret = do_cpt_swapstate(
2709 gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2715 std::vector<gmx_file_position_t> outputfiles;
2716 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2721 do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
2722 if (headerContents->file_version >= cptv_ModularSimulator)
2724 gmx::FileIOXdrSerializer serializer(fp);
2725 modularSimulatorCheckpointData->deserialize(&serializer);
2727 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2732 if (gmx_fio_close(fp) != 0)
2734 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2739 void load_checkpoint(const char* fn,
2741 const t_commrec* cr,
2745 ObservablesHistory* observablesHistory,
2746 gmx_bool reproducibilityRequested,
2747 const gmx::MdModulesNotifier& mdModulesNotifier,
2748 gmx::ReadCheckpointDataHolder* modularSimulatorCheckpointData,
2749 bool useModularSimulator)
2751 CheckpointHeaderContents headerContents;
2754 /* Read the state from the checkpoint file */
2760 &(ir->fepvals->init_fep_state),
2764 reproducibilityRequested,
2766 modularSimulatorCheckpointData,
2767 useModularSimulator);
2771 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr->mpiDefaultCommunicator);
2772 gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = {
2773 cr->mpiDefaultCommunicator, PAR(cr), headerContents.file_version
2775 mdModulesNotifier.checkpointingNotifications_.notify(broadcastCheckPointData);
2777 ir->bContinuation = TRUE;
2778 if (ir->nsteps >= 0)
2780 // TODO Should the following condition be <=? Currently if you
2781 // pass a checkpoint written by an normal completion to a restart,
2782 // mdrun will read all input, does some work but no steps, and
2783 // write successful output. But perhaps that is not desirable.
2784 // Note that we do not intend to support the use of mdrun
2785 // -nsteps to circumvent this condition.
2786 if (ir->nsteps + ir->init_step < headerContents.step)
2788 char buf[STEPSTRSIZE];
2789 std::string message =
2790 gmx::formatString("The input requested %s steps, ", gmx_step_str(ir->nsteps, buf));
2791 if (ir->init_step > 0)
2793 message += gmx::formatString("starting from step %s, ", gmx_step_str(ir->init_step, buf));
2795 message += gmx::formatString(
2796 "however the checkpoint "
2797 "file has already reached step %s. The simulation will not "
2798 "proceed, because either your simulation is already complete, "
2799 "or your combination of input files don't match.",
2800 gmx_step_str(headerContents.step, buf));
2801 gmx_fatal(FARGS, "%s", message.c_str());
2803 ir->nsteps += ir->init_step - headerContents.step;
2805 ir->init_step = headerContents.step;
2806 ir->simulation_part = headerContents.simulation_part + 1;
2809 void read_checkpoint_part_and_step(const char* filename, int* simulation_part, int64_t* step)
2813 if (filename == nullptr || !gmx_fexist(filename) || ((fp = gmx_fio_open(filename, "r")) == nullptr))
2815 *simulation_part = 0;
2820 CheckpointHeaderContents headerContents;
2821 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2823 *simulation_part = headerContents.simulation_part;
2824 *step = headerContents.step;
2827 static CheckpointHeaderContents read_checkpoint_data(t_fileio* fp,
2829 std::vector<gmx_file_position_t>* outputfiles)
2831 CheckpointHeaderContents headerContents;
2832 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2833 state->natoms = headerContents.natoms;
2834 state->ngtc = headerContents.ngtc;
2835 state->nnhpres = headerContents.nnhpres;
2836 state->nhchainlength = headerContents.nhchainlength;
2837 state->flags = headerContents.flags_state;
2838 int ret = do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2843 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2849 energyhistory_t enerhist;
2850 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, nullptr);
2855 PullHistory pullHist = {};
2856 ret = doCptPullHist(
2857 gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, nullptr);
2863 ret = do_cpt_df_hist(
2864 gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2870 edsamhistory_t edsamhist = {};
2871 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2877 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2883 swaphistory_t swaphist = {};
2884 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2890 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, outputfiles, nullptr, headerContents.file_version);
2896 gmx::MdModulesNotifier mdModuleNotifier;
2897 do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
2898 if (headerContents.file_version >= cptv_ModularSimulator)
2900 // In the scope of the current function, we can just throw away the content
2901 // of the modular checkpoint, but we need to read it to move the file pointer
2902 gmx::FileIOXdrSerializer serializer(fp);
2903 gmx::ReadCheckpointDataHolder modularSimulatorCheckpointData;
2904 modularSimulatorCheckpointData.deserialize(&serializer);
2906 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2911 return headerContents;
2914 void read_checkpoint_trxframe(t_fileio* fp, t_trxframe* fr)
2917 std::vector<gmx_file_position_t> outputfiles;
2918 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, &outputfiles);
2920 fr->natoms = state.natoms;
2922 fr->step = int64_to_int(headerContents.step, "conversion of checkpoint to trajectory");
2924 fr->time = headerContents.t;
2926 fr->lambda = state.lambda[efptFEP];
2927 fr->fep_state = state.fep_state;
2929 fr->bX = ((state.flags & (1 << estX)) != 0);
2932 fr->x = makeRvecArray(state.x, state.natoms);
2934 fr->bV = ((state.flags & (1 << estV)) != 0);
2937 fr->v = makeRvecArray(state.v, state.natoms);
2940 fr->bBox = ((state.flags & (1 << estBOX)) != 0);
2943 copy_mat(state.box, fr->box);
2947 void list_checkpoint(const char* fn, FILE* out)
2954 fp = gmx_fio_open(fn, "r");
2955 CheckpointHeaderContents headerContents;
2956 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2957 state.natoms = headerContents.natoms;
2958 state.ngtc = headerContents.ngtc;
2959 state.nnhpres = headerContents.nnhpres;
2960 state.nhchainlength = headerContents.nhchainlength;
2961 state.flags = headerContents.flags_state;
2962 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2967 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2973 energyhistory_t enerhist;
2974 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, out);
2978 PullHistory pullHist = {};
2979 ret = doCptPullHist(
2980 gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, out);
2985 ret = do_cpt_df_hist(
2986 gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2991 edsamhistory_t edsamhist = {};
2992 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2997 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state.awhHistory.get(), out);
3002 swaphistory_t swaphist = {};
3003 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
3008 std::vector<gmx_file_position_t> outputfiles;
3009 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
3014 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
3021 if (gmx_fio_close(fp) != 0)
3023 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3027 /* This routine cannot print tons of data, since it is called before the log file is opened. */
3028 CheckpointHeaderContents read_checkpoint_simulation_part_and_filenames(t_fileio* fp,
3029 std::vector<gmx_file_position_t>* outputfiles)
3032 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, outputfiles);
3033 if (gmx_fio_close(fp) != 0)
3035 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3037 return headerContents;