2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012 by the GROMACS development team.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
42 #include "checkpoint.h"
53 #include "buildinfo.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/gmxfio_xdr.h"
57 #include "gromacs/fileio/xdr_datatype.h"
58 #include "gromacs/fileio/xdrf.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/math/vecdump.h"
62 #include "gromacs/math/vectypes.h"
63 #include "gromacs/mdtypes/awh_correlation_history.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/commrec.h"
66 #include "gromacs/mdtypes/df_history.h"
67 #include "gromacs/mdtypes/edsamhistory.h"
68 #include "gromacs/mdtypes/energyhistory.h"
69 #include "gromacs/mdtypes/inputrec.h"
70 #include "gromacs/mdtypes/md_enums.h"
71 #include "gromacs/mdtypes/mdrunoptions.h"
72 #include "gromacs/mdtypes/observableshistory.h"
73 #include "gromacs/mdtypes/pullhistory.h"
74 #include "gromacs/mdtypes/state.h"
75 #include "gromacs/mdtypes/swaphistory.h"
76 #include "gromacs/trajectory/trajectoryframe.h"
77 #include "gromacs/utility/arrayref.h"
78 #include "gromacs/utility/baseversion.h"
79 #include "gromacs/utility/cstringutil.h"
80 #include "gromacs/utility/fatalerror.h"
81 #include "gromacs/utility/futil.h"
82 #include "gromacs/utility/gmxassert.h"
83 #include "gromacs/utility/int64_to_int.h"
84 #include "gromacs/utility/keyvaluetree.h"
85 #include "gromacs/utility/keyvaluetreebuilder.h"
86 #include "gromacs/utility/keyvaluetreeserializer.h"
87 #include "gromacs/utility/mdmodulenotification.h"
88 #include "gromacs/utility/programcontext.h"
89 #include "gromacs/utility/smalloc.h"
90 #include "gromacs/utility/sysinfo.h"
91 #include "gromacs/utility/txtdump.h"
94 # include "corewrap.h"
97 #define CPT_MAGIC1 171817
98 #define CPT_MAGIC2 171819
100 /*! \brief Enum of values that describe the contents of a cpt file
101 * whose format matches a version number
103 * The enum helps the code be more self-documenting and ensure merges
104 * do not silently resolve when two patches make the same bump. When
105 * adding new functionality, add a new element just above cptv_Count
106 * in this enumeration, and write code below that does the right thing
107 * according to the value of file_version.
111 cptv_Unknown = 17, /**< Version before numbering scheme */
112 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
113 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
114 cptv_PullAverage, /**< Added possibility to output average pull force and position */
115 cptv_MdModules, /**< Added checkpointing for MdModules */
116 cptv_Count /**< the total number of cptv versions */
119 /*! \brief Version number of the file format written to checkpoint
120 * files by this version of the code.
122 * cpt_version should normally only be changed, via adding a new field
123 * to cptv enumeration, when the header or footer format changes.
125 * The state data format itself is backward and forward compatible.
126 * But old code can not read a new entry that is present in the file
127 * (but can read a new format when new entries are not present).
129 * The cpt_version increases whenever the file format in the main
130 * development branch changes, due to an extension of the cptv enum above.
131 * Backward compatibility for reading old run input files is maintained
132 * by checking this version number against that of the file and then using
133 * the correct code path. */
134 static const int cpt_version = cptv_Count - 1;
137 const char* est_names[estNR] = { "FE-lambda",
143 "thermostat-integral",
148 "LD-rng-unsupported",
149 "LD-rng-i-unsupported",
162 "MC-rng-unsupported",
163 "MC-rng-i-unsupported",
164 "barostat-integral" };
181 static const char* eeks_names[eeksNR] = { "Ekin_n", "Ekinh", "dEkindlambda",
182 "mv_cos", "Ekinf", "Ekinh_old",
183 "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC",
195 eenhENERGY_NSTEPS_SIM,
196 eenhENERGY_DELTA_H_NN,
197 eenhENERGY_DELTA_H_LIST,
198 eenhENERGY_DELTA_H_STARTTIME,
199 eenhENERGY_DELTA_H_STARTLAMBDA,
205 epullhPULL_NUMCOORDINATES,
206 epullhPULL_NUMGROUPS,
207 epullhPULL_NUMVALUESINXSUM,
208 epullhPULL_NUMVALUESINFSUM,
214 epullcoordh_VALUE_REF_SUM,
215 epullcoordh_VALUE_SUM,
216 epullcoordh_DR01_SUM,
217 epullcoordh_DR23_SUM,
218 epullcoordh_DR45_SUM,
219 epullcoordh_FSCAL_SUM,
220 epullcoordh_DYNAX_SUM,
230 static const char* eenh_names[eenhNR] = { "energy_n",
239 "energy_delta_h_list",
240 "energy_delta_h_start_time",
241 "energy_delta_h_start_lambda" };
243 static const char* ePullhNames[epullhNR] = { "pullhistory_numcoordinates", "pullhistory_numgroups",
244 "pullhistory_numvaluesinxsum",
245 "pullhistory_numvaluesinfsum" };
247 /* free energy history variables -- need to be preserved over checkpoint */
266 /* free energy history variable names */
267 static const char* edfh_names[edfhNR] = { "bEquilibrated",
269 "Wang-Landau Histogram",
277 "accumulated_plus_2",
278 "accumulated_minus_2",
282 /* AWH biasing history variables */
286 eawhhEQUILIBRATEHISTOGRAM,
290 eawhhUMBRELLAGRIDPOINT,
292 eawhhLOGSCALEDSAMPLEWEIGHT,
294 eawhhFORCECORRELATIONGRID,
298 static const char* eawhh_names[eawhhNR] = { "awh_in_initial", "awh_equilibrateHistogram",
299 "awh_histsize", "awh_npoints",
300 "awh_coordpoint", "awh_umbrellaGridpoint",
301 "awh_updatelist", "awh_logScaledSampleWeight",
302 "awh_numupdates", "awh_forceCorrelationGrid" };
310 static const char* epull_prev_step_com_names[epullsNR] = { "Pull groups prev step COM" };
313 //! Higher level vector element type, only used for formatting checkpoint dumps
314 enum class CptElementType
316 integer, //!< integer
317 real, //!< float or double, not linked to precision of type real
318 real3, //!< float[3] or double[3], not linked to precision of type real
319 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
322 //! \brief Parts of the checkpoint state, only used for reporting
325 microState, //!< The microstate of the simulated system
326 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
327 energyHistory, //!< Energy observable statistics
328 freeEnergyHistory, //!< Free-energy state and observable statistics
329 accWeightHistogram, //!< Accelerated weight histogram method state
330 pullState, //!< COM of previous step.
331 pullHistory //!< Pull history statistics (sums since last written output)
334 //! \brief Return the name of a checkpoint entry based on part and part entry
335 static const char* entryName(StatePart part, int ecpt)
339 case StatePart::microState: return est_names[ecpt];
340 case StatePart::kineticEnergy: return eeks_names[ecpt];
341 case StatePart::energyHistory: return eenh_names[ecpt];
342 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
343 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
344 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
345 case StatePart::pullHistory: return ePullhNames[ecpt];
351 static void cp_warning(FILE* fp)
353 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
356 [[noreturn]] static void cp_error()
358 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
361 static void do_cpt_string_err(XDR* xd, const char* desc, gmx::ArrayRef<char> s, FILE* list)
363 char* data = s.data();
364 if (xdr_string(xd, &data, s.size()) == 0)
370 fprintf(list, "%s = %s\n", desc, data);
374 static int do_cpt_int(XDR* xd, const char* desc, int* i, FILE* list)
376 if (xdr_int(xd, i) == 0)
382 fprintf(list, "%s = %d\n", desc, *i);
387 static int do_cpt_u_chars(XDR* xd, const char* desc, int n, unsigned char* i, FILE* list)
391 fprintf(list, "%s = ", desc);
394 for (int j = 0; j < n && res; j++)
396 res &= xdr_u_char(xd, &i[j]);
399 fprintf(list, "%02x", i[j]);
414 static void do_cpt_int_err(XDR* xd, const char* desc, int* i, FILE* list)
416 if (do_cpt_int(xd, desc, i, list) < 0)
422 static void do_cpt_bool_err(XDR* xd, const char* desc, bool* b, FILE* list)
424 int i = static_cast<int>(*b);
426 if (do_cpt_int(xd, desc, &i, list) < 0)
434 static void do_cpt_step_err(XDR* xd, const char* desc, int64_t* i, FILE* list)
436 char buf[STEPSTRSIZE];
438 if (xdr_int64(xd, i) == 0)
444 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
448 static void do_cpt_double_err(XDR* xd, const char* desc, double* f, FILE* list)
450 if (xdr_double(xd, f) == 0)
456 fprintf(list, "%s = %f\n", desc, *f);
460 static void do_cpt_real_err(XDR* xd, real* f)
463 bool_t res = xdr_double(xd, f);
465 bool_t res = xdr_float(xd, f);
473 static void do_cpt_n_rvecs_err(XDR* xd, const char* desc, int n, rvec f[], FILE* list)
475 for (int i = 0; i < n; i++)
477 for (int j = 0; j < DIM; j++)
479 do_cpt_real_err(xd, &f[i][j]);
485 pr_rvecs(list, 0, desc, f, n);
497 static const int value = xdr_datatype_int;
501 struct xdr_type<float>
503 static const int value = xdr_datatype_float;
507 struct xdr_type<double>
509 static const int value = xdr_datatype_double;
512 //! \brief Returns size in byte of an xdr_datatype
513 static inline unsigned int sizeOfXdrType(int xdrType)
517 case xdr_datatype_int: return sizeof(int);
518 case xdr_datatype_float: return sizeof(float);
519 case xdr_datatype_double: return sizeof(double);
520 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
526 //! \brief Returns the XDR process function for i/o of an XDR type
527 static inline xdrproc_t xdrProc(int xdrType)
531 case xdr_datatype_int: return reinterpret_cast<xdrproc_t>(xdr_int);
532 case xdr_datatype_float: return reinterpret_cast<xdrproc_t>(xdr_float);
533 case xdr_datatype_double: return reinterpret_cast<xdrproc_t>(xdr_double);
534 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
540 /*! \brief Lists or only reads an xdr vector from checkpoint file
542 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
543 * The header for the print is set by \p part and \p ecpt.
544 * The formatting of the printing is set by \p cptElementType.
545 * When list==NULL only reads the elements.
547 static bool_t listXdrVector(XDR* xd, StatePart part, int ecpt, int nf, int xdrType, FILE* list, CptElementType cptElementType)
551 const unsigned int elemSize = sizeOfXdrType(xdrType);
552 std::vector<char> data(nf * elemSize);
553 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
559 case xdr_datatype_int:
560 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int*>(data.data()), nf, TRUE);
562 case xdr_datatype_float:
564 if (cptElementType == CptElementType::real3)
566 pr_rvecs(list, 0, entryName(part, ecpt),
567 reinterpret_cast<const rvec*>(data.data()), nf / 3);
572 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
573 pr_fvec(list, 0, entryName(part, ecpt),
574 reinterpret_cast<const float*>(data.data()), nf, TRUE);
577 case xdr_datatype_double:
579 if (cptElementType == CptElementType::real3)
581 pr_rvecs(list, 0, entryName(part, ecpt),
582 reinterpret_cast<const rvec*>(data.data()), nf / 3);
587 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
588 pr_dvec(list, 0, entryName(part, ecpt),
589 reinterpret_cast<const double*>(data.data()), nf, TRUE);
592 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
599 //! \brief Convert a double array, typed char*, to float
600 gmx_unused static void convertArrayRealPrecision(const char* c, float* v, int n)
602 const double* d = reinterpret_cast<const double*>(c);
603 for (int i = 0; i < n; i++)
605 v[i] = static_cast<float>(d[i]);
609 //! \brief Convert a float array, typed char*, to double
610 static void convertArrayRealPrecision(const char* c, double* v, int n)
612 const float* f = reinterpret_cast<const float*>(c);
613 for (int i = 0; i < n; i++)
615 v[i] = static_cast<double>(f[i]);
619 //! \brief Generate an error for trying to convert to integer
620 static void convertArrayRealPrecision(const char gmx_unused* c, int gmx_unused* v, int gmx_unused n)
622 GMX_RELEASE_ASSERT(false,
623 "We only expect type mismatches between float and double, not integer");
626 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
628 * This is the only routine that does the actually i/o of real vector,
629 * all other routines are intermediate level routines for specific real
630 * data types, calling this routine.
631 * Currently this routine is (too) complex, since it handles both real *
632 * and std::vector<real>. Using real * is deprecated and this routine
633 * will simplify a lot when only std::vector needs to be supported.
635 * The routine is generic to vectors with different allocators,
636 * because as part of reading a checkpoint there are vectors whose
637 * size is not known until reading has progressed far enough, so a
638 * resize method must be called.
640 * When not listing, we use either v or vector, depending on which is !=NULL.
641 * If nval >= 0, nval is used; on read this should match the passed value.
642 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
643 * the value is stored in nptr
645 template<typename T, typename AllocatorType>
646 static int doVectorLow(XDR* xd,
653 std::vector<T, AllocatorType>* vector,
655 CptElementType cptElementType)
657 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr)
658 || (v == nullptr && vector != nullptr),
659 "Without list, we should have exactly one of v and vector != NULL");
663 int numElemInTheFile;
668 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
669 numElemInTheFile = nval;
675 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
676 numElemInTheFile = *nptr;
680 numElemInTheFile = vector->size();
684 /* Read/write the vector element count */
685 res = xdr_int(xd, &numElemInTheFile);
690 /* Read/write the element data type */
691 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
692 int xdrTypeInTheFile = xdrTypeInTheCode;
693 res = xdr_int(xd, &xdrTypeInTheFile);
699 if (list == nullptr && (sflags & (1 << ecpt)))
703 if (numElemInTheFile != nval)
706 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
707 entryName(part, ecpt), nval, numElemInTheFile);
710 else if (nptr != nullptr)
712 *nptr = numElemInTheFile;
715 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
719 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
720 entryName(part, ecpt), xdr_datatype_names[xdrTypeInTheCode],
721 xdr_datatype_names[xdrTypeInTheFile]);
723 /* Matching int and real should never occur, but check anyhow */
724 if (xdrTypeInTheFile == xdr_datatype_int || xdrTypeInTheCode == xdr_datatype_int)
727 "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
736 snew(*v, numElemInTheFile);
742 /* This conditional ensures that we don't resize on write.
743 * In particular in the state where this code was written
744 * vector has a size of numElemInThefile and we
745 * don't want to lose that padding here.
747 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
749 vector->resize(numElemInTheFile);
757 vChar = reinterpret_cast<char*>(vp);
761 snew(vChar, numElemInTheFile * sizeOfXdrType(xdrTypeInTheFile));
763 res = xdr_vector(xd, vChar, numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
764 xdrProc(xdrTypeInTheFile));
772 /* In the old code float-double conversion came for free.
773 * In the new code we still support it, mainly because
774 * the tip4p_continue regression test makes use of this.
775 * It's an open question if we do or don't want to allow this.
777 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
783 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile, list, cptElementType);
789 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
792 doVector(XDR* xd, StatePart part, int ecpt, int sflags, std::vector<T>* vector, FILE* list, int numElements = -1)
794 return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list,
795 CptElementType::real);
798 //! \brief Read/Write an ArrayRef<real>.
799 static int doRealArrayRef(XDR* xd, StatePart part, int ecpt, int sflags, gmx::ArrayRef<real> vector, FILE* list)
801 real* v_real = vector.data();
802 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, vector.size(), nullptr,
803 &v_real, nullptr, list, CptElementType::real);
806 //! Convert from view of RVec to view of real.
807 static gmx::ArrayRef<real> realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
809 return gmx::arrayRefFromArray<real>(reinterpret_cast<real*>(ofRVecs.data()), ofRVecs.size() * DIM);
812 //! \brief Read/Write a PaddedVector whose value_type is RVec.
813 template<typename PaddedVectorOfRVecType>
815 doRvecVector(XDR* xd, StatePart part, int ecpt, int sflags, PaddedVectorOfRVecType* v, int numAtoms, FILE* list)
817 const int numReals = numAtoms * DIM;
822 sflags & (1 << ecpt),
823 "When not listing, the flag for the entry should be set when requesting i/o");
824 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
826 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
830 // Use the rebind facility to change the value_type of the
831 // allocator from RVec to real.
832 using realAllocator =
833 typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
834 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr,
835 nullptr, list, CptElementType::real);
839 /* This function stores n along with the reals for reading,
840 * but on reading it assumes that n matches the value in the checkpoint file,
841 * a fatal error is generated when this is not the case.
843 static int do_cpte_reals(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
845 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
846 list, CptElementType::real);
849 /* This function does the same as do_cpte_reals,
850 * except that on reading it ignores the passed value of *n
851 * and stores the value read from the checkpoint file in *n.
853 static int do_cpte_n_reals(XDR* xd, StatePart part, int ecpt, int sflags, int* n, real** v, FILE* list)
855 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list,
856 CptElementType::real);
859 static int do_cpte_real(XDR* xd, StatePart part, int ecpt, int sflags, real* r, FILE* list)
861 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr,
862 list, CptElementType::real);
865 static int do_cpte_ints(XDR* xd, StatePart part, int ecpt, int sflags, int n, int** v, FILE* list)
867 return doVectorLow<int, std::allocator<int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
868 list, CptElementType::integer);
871 static int do_cpte_int(XDR* xd, StatePart part, int ecpt, int sflags, int* i, FILE* list)
873 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
876 static int do_cpte_bool(XDR* xd, StatePart part, int ecpt, int sflags, bool* b, FILE* list)
878 int i = static_cast<int>(*b);
879 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
884 static int do_cpte_doubles(XDR* xd, StatePart part, int ecpt, int sflags, int n, double** v, FILE* list)
886 return doVectorLow<double, std::allocator<double>>(xd, part, ecpt, sflags, n, nullptr, v,
887 nullptr, list, CptElementType::real);
890 static int do_cpte_double(XDR* xd, StatePart part, int ecpt, int sflags, double* r, FILE* list)
892 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
895 static int do_cpte_matrix(XDR* xd, StatePart part, int ecpt, int sflags, matrix v, FILE* list)
901 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, DIM * DIM, nullptr, &vr,
902 nullptr, nullptr, CptElementType::matrix3x3);
904 if (list && ret == 0)
906 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
913 static int do_cpte_nmatrix(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
917 char name[CPTSTRLEN];
924 for (i = 0; i < n; i++)
926 reti = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]),
927 nullptr, nullptr, CptElementType::matrix3x3);
928 if (list && reti == 0)
930 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
931 pr_reals(list, 0, name, v[i], n);
941 static int do_cpte_matrices(XDR* xd, StatePart part, int ecpt, int sflags, int n, matrix** v, FILE* list)
944 matrix *vp, *va = nullptr;
950 res = xdr_int(xd, &nf);
955 if (list == nullptr && nf != n)
957 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n",
958 entryName(part, ecpt), n, nf);
960 if (list || !(sflags & (1 << ecpt)))
973 snew(vr, nf * DIM * DIM);
974 for (i = 0; i < nf; i++)
976 for (j = 0; j < DIM; j++)
978 for (k = 0; k < DIM; k++)
980 vr[(i * DIM + j) * DIM + k] = vp[i][j][k];
984 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, nf * DIM * DIM, nullptr,
985 &vr, nullptr, nullptr, CptElementType::matrix3x3);
986 for (i = 0; i < nf; i++)
988 for (j = 0; j < DIM; j++)
990 for (k = 0; k < DIM; k++)
992 vp[i][j][k] = vr[(i * DIM + j) * DIM + k];
998 if (list && ret == 0)
1000 for (i = 0; i < nf; i++)
1002 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
1013 static void do_cpt_header(XDR* xd, gmx_bool bRead, FILE* list, CheckpointHeaderContents* contents)
1026 res = xdr_int(xd, &magic);
1030 "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
1032 if (magic != CPT_MAGIC1)
1035 "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
1036 "The checkpoint file is corrupted or not a checkpoint file",
1042 gmx_gethostname(fhost, 255);
1044 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
1045 // The following fields are no longer ever written with meaningful
1046 // content, but because they precede the file version, there is no
1047 // good way for new code to read the old and new formats, nor a
1048 // good way for old code to avoid giving an error while reading a
1049 // new format. So we read and write a field that no longer has a
1051 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
1052 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
1053 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
1054 do_cpt_string_err(xd, "generating program", contents->fprog, list);
1055 do_cpt_string_err(xd, "generation time", contents->ftime, list);
1056 contents->file_version = cpt_version;
1057 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
1058 if (contents->file_version > cpt_version)
1061 "Attempting to read a checkpoint file of version %d with code of version %d\n",
1062 contents->file_version, cpt_version);
1064 if (contents->file_version >= 13)
1066 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1070 contents->double_prec = -1;
1072 if (contents->file_version >= 12)
1074 do_cpt_string_err(xd, "generating host", fhost, list);
1076 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1077 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1078 if (contents->file_version >= 10)
1080 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1084 contents->nhchainlength = 1;
1086 if (contents->file_version >= 11)
1088 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1092 contents->nnhpres = 0;
1094 if (contents->file_version >= 14)
1096 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1100 contents->nlambda = 0;
1102 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1103 if (contents->file_version >= 3)
1105 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1109 contents->simulation_part = 1;
1111 if (contents->file_version >= 5)
1113 do_cpt_step_err(xd, "step", &contents->step, list);
1118 do_cpt_int_err(xd, "step", &idum, list);
1119 contents->step = static_cast<int64_t>(idum);
1121 do_cpt_double_err(xd, "t", &contents->t, list);
1122 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1123 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1124 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1125 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1126 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1127 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1128 if (contents->file_version >= 4)
1130 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1131 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1135 contents->flags_eks = 0;
1136 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV + 1));
1137 contents->flags_state = (contents->flags_state
1138 & ~((1 << (estORIRE_DTAV + 1)) | (1 << (estORIRE_DTAV + 2))
1139 | (1 << (estORIRE_DTAV + 3))));
1141 if (contents->file_version >= 14)
1143 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1147 contents->flags_dfh = 0;
1150 if (contents->file_version >= 15)
1152 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1159 if (contents->file_version >= 16)
1161 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1165 contents->eSwapCoords = eswapNO;
1168 if (contents->file_version >= 17)
1170 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1174 contents->flags_awhh = 0;
1177 if (contents->file_version >= 18)
1179 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1183 contents->flagsPullHistory = 0;
1187 static int do_cpt_footer(XDR* xd, int file_version)
1192 if (file_version >= 2)
1195 res = xdr_int(xd, &magic);
1200 if (magic != CPT_MAGIC2)
1209 static int do_cpt_state(XDR* xd, int fflags, t_state* state, FILE* list)
1212 const StatePart part = StatePart::microState;
1213 const int sflags = state->flags;
1214 // If reading, state->natoms was probably just read, so
1215 // allocations need to be managed. If writing, this won't change
1216 // anything that matters.
1217 state_change_natoms(state, state->natoms);
1218 for (int i = 0; (i < estNR && ret == 0); i++)
1220 if (fflags & (1 << i))
1225 ret = doRealArrayRef(
1226 xd, part, i, sflags,
1227 gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()),
1231 ret = do_cpte_int(xd, part, i, sflags, &state->fep_state, list);
1233 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1235 ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list);
1237 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1239 ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list);
1242 ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list);
1245 ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list);
1248 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list);
1251 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list);
1254 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list);
1257 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list);
1260 ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list);
1263 ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list);
1265 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1266 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1268 ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list);
1271 ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list);
1273 /* The RNG entries are no longer written,
1274 * the next 4 lines are only for reading old files.
1276 case estLD_RNG_NOTSUPPORTED:
1277 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1279 case estLD_RNGI_NOTSUPPORTED:
1280 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1282 case estMC_RNG_NOTSUPPORTED:
1283 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1285 case estMC_RNGI_NOTSUPPORTED:
1286 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1288 case estDISRE_INITF:
1289 ret = do_cpte_real(xd, part, i, sflags, &state->hist.disre_initf, list);
1291 case estDISRE_RM3TAV:
1292 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs,
1293 &state->hist.disre_rm3tav, list);
1295 case estORIRE_INITF:
1296 ret = do_cpte_real(xd, part, i, sflags, &state->hist.orire_initf, list);
1299 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav,
1300 &state->hist.orire_Dtav, list);
1302 case estPULLCOMPREVSTEP:
1303 ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list);
1307 "Unknown state entry %d\n"
1308 "You are reading a checkpoint file written by different code, which "
1317 static int do_cpt_ekinstate(XDR* xd, int fflags, ekinstate_t* ekins, FILE* list)
1321 const StatePart part = StatePart::kineticEnergy;
1322 for (int i = 0; (i < eeksNR && ret == 0); i++)
1324 if (fflags & (1 << i))
1330 ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list);
1333 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list);
1336 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list);
1339 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list);
1342 ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list);
1344 case eeksEKINSCALEF:
1345 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list);
1348 ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list);
1350 case eeksEKINSCALEH:
1351 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list);
1354 ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list);
1356 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1359 "Unknown ekin data state entry %d\n"
1360 "You are probably reading a new checkpoint file with old code",
1370 static int do_cpt_swapstate(XDR* xd, gmx_bool bRead, int eSwapCoords, swaphistory_t* swapstate, FILE* list)
1372 int swap_cpt_version = 2;
1374 if (eSwapCoords == eswapNO)
1379 swapstate->bFromCpt = bRead;
1380 swapstate->eSwapCoords = eSwapCoords;
1382 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1383 if (bRead && swap_cpt_version < 2)
1386 "Cannot read checkpoint files that were written with old versions"
1387 "of the ion/water position swapping protocol.\n");
1390 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1392 /* When reading, init_swapcoords has not been called yet,
1393 * so we have to allocate memory first. */
1394 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1397 snew(swapstate->ionType, swapstate->nIonTypes);
1400 for (int ic = 0; ic < eCompNR; ic++)
1402 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1404 swapstateIons_t* gs = &swapstate->ionType[ii];
1408 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1412 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1417 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1421 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1424 if (bRead && (nullptr == gs->nMolPast[ic]))
1426 snew(gs->nMolPast[ic], swapstate->nAverage);
1429 for (int j = 0; j < swapstate->nAverage; j++)
1433 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1437 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1443 /* Ion flux per channel */
1444 for (int ic = 0; ic < eChanNR; ic++)
1446 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1448 swapstateIons_t* gs = &swapstate->ionType[ii];
1452 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1456 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1461 /* Ion flux leakage */
1464 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1468 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1472 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1474 swapstateIons_t* gs = &swapstate->ionType[ii];
1476 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1480 snew(gs->channel_label, gs->nMol);
1481 snew(gs->comp_from, gs->nMol);
1484 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1485 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1488 /* Save the last known whole positions to checkpoint
1489 * file to be able to also make multimeric channels whole in PBC */
1490 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1491 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1494 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1495 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1496 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1497 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1501 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0],
1502 *swapstate->xc_old_whole_p[eChan0], list);
1503 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1],
1504 *swapstate->xc_old_whole_p[eChan1], list);
1511 static int do_cpt_enerhist(XDR* xd, gmx_bool bRead, int fflags, energyhistory_t* enerhist, FILE* list)
1520 GMX_RELEASE_ASSERT(enerhist != nullptr,
1521 "With energy history, we need a valid enerhist pointer");
1523 /* This is stored/read for backward compatibility */
1524 int energyHistoryNumEnergies = 0;
1527 enerhist->nsteps = 0;
1529 enerhist->nsteps_sim = 0;
1530 enerhist->nsum_sim = 0;
1532 else if (enerhist != nullptr)
1534 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1537 delta_h_history_t* deltaH = enerhist->deltaHForeignLambdas.get();
1538 const StatePart part = StatePart::energyHistory;
1539 for (int i = 0; (i < eenhNR && ret == 0); i++)
1541 if (fflags & (1 << i))
1546 ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list);
1548 case eenhENERGY_AVER:
1549 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list);
1551 case eenhENERGY_SUM:
1552 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list);
1554 case eenhENERGY_NSUM:
1555 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list);
1557 case eenhENERGY_SUM_SIM:
1558 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list);
1560 case eenhENERGY_NSUM_SIM:
1561 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list);
1563 case eenhENERGY_NSTEPS:
1564 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list);
1566 case eenhENERGY_NSTEPS_SIM:
1567 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list);
1569 case eenhENERGY_DELTA_H_NN:
1572 if (!bRead && deltaH != nullptr)
1574 numDeltaH = deltaH->dh.size();
1576 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1579 if (deltaH == nullptr)
1581 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1582 deltaH = enerhist->deltaHForeignLambdas.get();
1584 deltaH->dh.resize(numDeltaH);
1585 deltaH->start_lambda_set = FALSE;
1589 case eenhENERGY_DELTA_H_LIST:
1590 for (auto dh : deltaH->dh)
1592 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1595 case eenhENERGY_DELTA_H_STARTTIME:
1596 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list);
1598 case eenhENERGY_DELTA_H_STARTLAMBDA:
1599 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list);
1603 "Unknown energy history entry %d\n"
1604 "You are probably reading a new checkpoint file with old code",
1610 if ((fflags & (1 << eenhENERGY_SUM)) && !(fflags & (1 << eenhENERGY_SUM_SIM)))
1612 /* Assume we have an old file format and copy sum to sum_sim */
1613 enerhist->ener_sum_sim = enerhist->ener_sum;
1616 if ((fflags & (1 << eenhENERGY_NSUM)) && !(fflags & (1 << eenhENERGY_NSTEPS)))
1618 /* Assume we have an old file format and copy nsum to nsteps */
1619 enerhist->nsteps = enerhist->nsum;
1621 if ((fflags & (1 << eenhENERGY_NSUM_SIM)) && !(fflags & (1 << eenhENERGY_NSTEPS_SIM)))
1623 /* Assume we have an old file format and copy nsum to nsteps */
1624 enerhist->nsteps_sim = enerhist->nsum_sim;
1630 static int doCptPullCoordHist(XDR* xd, PullCoordinateHistory* pullCoordHist, const StatePart part, FILE* list)
1635 flags |= ((1 << epullcoordh_VALUE_REF_SUM) | (1 << epullcoordh_VALUE_SUM)
1636 | (1 << epullcoordh_DR01_SUM) | (1 << epullcoordh_DR23_SUM)
1637 | (1 << epullcoordh_DR45_SUM) | (1 << epullcoordh_FSCAL_SUM));
1639 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1643 case epullcoordh_VALUE_REF_SUM:
1644 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list);
1646 case epullcoordh_VALUE_SUM:
1647 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list);
1649 case epullcoordh_DR01_SUM:
1650 for (int j = 0; j < DIM && ret == 0; j++)
1652 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1655 case epullcoordh_DR23_SUM:
1656 for (int j = 0; j < DIM && ret == 0; j++)
1658 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1661 case epullcoordh_DR45_SUM:
1662 for (int j = 0; j < DIM && ret == 0; j++)
1664 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1667 case epullcoordh_FSCAL_SUM:
1668 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list);
1670 case epullcoordh_DYNAX_SUM:
1671 for (int j = 0; j < DIM && ret == 0; j++)
1673 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1682 static int doCptPullGroupHist(XDR* xd, PullGroupHistory* pullGroupHist, const StatePart part, FILE* list)
1687 flags |= ((1 << epullgrouph_X_SUM));
1689 for (int i = 0; i < epullgrouph_NR; i++)
1693 case epullgrouph_X_SUM:
1694 for (int j = 0; j < DIM && ret == 0; j++)
1696 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1706 static int doCptPullHist(XDR* xd, gmx_bool bRead, int fflags, PullHistory* pullHist, const StatePart part, FILE* list)
1709 int pullHistoryNumCoordinates = 0;
1710 int pullHistoryNumGroups = 0;
1712 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1713 * average pull forces and coordinates) in the pull history, in temporary variables,
1714 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1717 pullHist->numValuesInXSum = 0;
1718 pullHist->numValuesInFSum = 0;
1720 else if (pullHist != nullptr)
1722 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1723 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1727 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1730 for (int i = 0; (i < epullhNR && ret == 0); i++)
1732 if (fflags & (1 << i))
1736 case epullhPULL_NUMCOORDINATES:
1737 ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list);
1739 case epullhPULL_NUMGROUPS:
1740 do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list);
1742 case epullhPULL_NUMVALUESINXSUM:
1743 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list);
1745 case epullhPULL_NUMVALUESINFSUM:
1746 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list);
1750 "Unknown pull history entry %d\n"
1751 "You are probably reading a new checkpoint file with old code",
1758 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1759 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1761 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1763 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1765 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1767 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1769 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1776 static int do_cpt_df_hist(XDR* xd, int fflags, int nlambda, df_history_t** dfhistPtr, FILE* list)
1785 if (*dfhistPtr == nullptr)
1787 snew(*dfhistPtr, 1);
1788 (*dfhistPtr)->nlambda = nlambda;
1789 init_df_history(*dfhistPtr, nlambda);
1791 df_history_t* dfhist = *dfhistPtr;
1793 const StatePart part = StatePart::freeEnergyHistory;
1794 for (int i = 0; (i < edfhNR && ret == 0); i++)
1796 if (fflags & (1 << i))
1801 ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list);
1804 ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list);
1807 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list);
1810 ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list);
1812 case edfhSUMWEIGHTS:
1813 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list);
1816 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list);
1819 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list);
1822 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list);
1825 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list);
1828 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list);
1831 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list);
1834 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list);
1837 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list);
1840 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list);
1845 "Unknown df history entry %d\n"
1846 "You are probably reading a new checkpoint file with old code",
1856 /* This function stores the last whole configuration of the reference and
1857 * average structure in the .cpt file
1859 static int do_cpt_EDstate(XDR* xd, gmx_bool bRead, int nED, edsamhistory_t* EDstate, FILE* list)
1866 EDstate->bFromCpt = bRead;
1869 /* When reading, init_edsam has not been called yet,
1870 * so we have to allocate memory first. */
1873 snew(EDstate->nref, EDstate->nED);
1874 snew(EDstate->old_sref, EDstate->nED);
1875 snew(EDstate->nav, EDstate->nED);
1876 snew(EDstate->old_sav, EDstate->nED);
1879 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1880 for (int i = 0; i < EDstate->nED; i++)
1884 /* Reference structure SREF */
1885 sprintf(buf, "ED%d # of atoms in reference structure", i + 1);
1886 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1887 sprintf(buf, "ED%d x_ref", i + 1);
1890 snew(EDstate->old_sref[i], EDstate->nref[i]);
1891 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1895 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1898 /* Average structure SAV */
1899 sprintf(buf, "ED%d # of atoms in average structure", i + 1);
1900 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1901 sprintf(buf, "ED%d x_av", i + 1);
1904 snew(EDstate->old_sav[i], EDstate->nav[i]);
1905 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1909 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1916 static int do_cpt_correlation_grid(XDR* xd,
1918 gmx_unused int fflags,
1919 gmx::CorrelationGridHistory* corrGrid,
1925 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1926 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1927 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1931 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize,
1932 corrGrid->blockDataListSize);
1935 for (gmx::CorrelationBlockDataHistory& blockData : corrGrid->blockDataBuffer)
1937 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1938 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1939 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1940 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1941 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1942 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1943 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1944 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1945 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1946 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1947 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1953 static int do_cpt_awh_bias(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhBiasHistory* biasHistory, FILE* list)
1957 gmx::AwhBiasStateHistory* state = &biasHistory->state;
1958 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1960 if (fflags & (1 << i))
1964 case eawhhIN_INITIAL:
1965 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list);
1967 case eawhhEQUILIBRATEHISTOGRAM:
1968 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list);
1971 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list);
1978 numPoints = biasHistory->pointState.size();
1980 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1983 biasHistory->pointState.resize(numPoints);
1987 case eawhhCOORDPOINT:
1988 for (auto& psh : biasHistory->pointState)
1990 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1991 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1992 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1993 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1994 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1995 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1996 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1997 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1998 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1999 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
2000 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
2003 case eawhhUMBRELLAGRIDPOINT:
2004 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list);
2006 case eawhhUPDATELIST:
2007 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
2008 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
2010 case eawhhLOGSCALEDSAMPLEWEIGHT:
2011 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
2012 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
2014 case eawhhNUMUPDATES:
2015 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
2017 case eawhhFORCECORRELATIONGRID:
2018 ret = do_cpt_correlation_grid(xd, bRead, fflags,
2019 &biasHistory->forceCorrelationGrid, list, i);
2021 default: gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
2029 static int do_cpt_awh(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhHistory* awhHistory, FILE* list)
2035 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
2037 if (awhHistory == nullptr)
2039 GMX_RELEASE_ASSERT(bRead,
2040 "do_cpt_awh should not be called for writing without an AwhHistory");
2042 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
2043 awhHistory = awhHistoryLocal.get();
2046 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
2047 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
2048 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
2049 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
2050 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
2051 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
2053 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
2054 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
2059 numBias = awhHistory->bias.size();
2061 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
2065 awhHistory->bias.resize(numBias);
2067 for (auto& bias : awhHistory->bias)
2069 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
2075 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
2080 static void do_cpt_mdmodules(int fileVersion,
2081 t_fileio* checkpointFileHandle,
2082 const gmx::MdModulesNotifier& mdModulesNotifier)
2084 if (fileVersion >= cptv_MdModules)
2086 gmx::FileIOXdrSerializer serializer(checkpointFileHandle);
2087 gmx::KeyValueTreeObject mdModuleCheckpointParameterTree =
2088 gmx::deserializeKeyValueTree(&serializer);
2089 gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
2090 mdModuleCheckpointParameterTree, fileVersion
2092 mdModulesNotifier.checkpointingNotifications_.notify(mdModuleCheckpointReadingDataOnMaster);
2096 static int do_cpt_files(XDR* xd, gmx_bool bRead, std::vector<gmx_file_position_t>* outputfiles, FILE* list, int file_version)
2099 gmx_off_t mask = 0xFFFFFFFFL;
2100 int offset_high, offset_low;
2101 std::array<char, CPTSTRLEN> buf;
2102 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
2104 // Ensure that reading pre-allocates outputfiles, while writing
2105 // writes what is already there.
2106 int nfiles = outputfiles->size();
2107 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
2113 outputfiles->resize(nfiles);
2116 for (auto& outputfile : *outputfiles)
2118 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
2121 do_cpt_string_err(xd, "output filename", buf, list);
2122 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
2124 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2128 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2132 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32)
2133 | (static_cast<gmx_off_t>(offset_low) & mask);
2137 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
2139 offset = outputfile.offset;
2147 offset_low = static_cast<int>(offset & mask);
2148 offset_high = static_cast<int>((offset >> 32) & mask);
2150 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2154 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2159 if (file_version >= 8)
2161 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize, list) != 0)
2165 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(),
2166 outputfile.checksum.data(), list)
2174 outputfile.checksumSize = -1;
2181 void write_checkpoint(const char* fn,
2182 gmx_bool bNumberAndKeep,
2184 const t_commrec* cr,
2188 int simulation_part,
2194 ObservablesHistory* observablesHistory,
2195 const gmx::MdModulesNotifier& mdModulesNotifier)
2198 char* fntemp; /* the temporary checkpoint file name */
2200 char buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
2203 if (DOMAINDECOMP(cr))
2205 npmenodes = cr->npmenodes;
2213 /* make the new temporary filename */
2214 snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
2215 std::strcpy(fntemp, fn);
2216 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2217 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
2218 std::strcat(fntemp, suffix);
2219 std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2221 /* if we can't rename, we just overwrite the cpt file.
2222 * dangerous if interrupted.
2224 snew(fntemp, std::strlen(fn));
2225 std::strcpy(fntemp, fn);
2227 std::string timebuf = gmx_format_current_time();
2231 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
2234 /* Get offsets for open files */
2235 auto outputfiles = gmx_fio_get_output_file_positions();
2237 fp = gmx_fio_open(fntemp, "w");
2240 if (state->ekinstate.bUpToDate)
2242 flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF) | (1 << eeksEKINO)
2243 | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH) | (1 << eeksVSCALE)
2244 | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
2251 energyhistory_t* enerhist = observablesHistory->energyHistory.get();
2253 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2255 flags_enh |= (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
2256 if (enerhist->nsum > 0)
2258 flags_enh |= ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
2260 if (enerhist->nsum_sim > 0)
2262 flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
2264 if (enerhist->deltaHForeignLambdas != nullptr)
2266 flags_enh |= ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
2267 | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
2271 PullHistory* pullHist = observablesHistory->pullHistory.get();
2272 int flagsPullHistory = 0;
2273 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2275 flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
2276 flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
2277 | (1 << epullhPULL_NUMVALUESINFSUM));
2283 flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
2284 | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
2287 flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
2289 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
2290 || (elamstats == elamstatsMETROPOLIS))
2292 flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
2293 | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
2302 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2304 flags_awhh |= ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
2305 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
2306 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
2307 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
2310 /* We can check many more things now (CPU, acceleration, etc), but
2311 * it is highly unlikely to have two separate builds with exactly
2312 * the same version, user, time, and build host!
2315 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
2317 edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
2318 int nED = (edsamhist ? edsamhist->nED : 0);
2320 swaphistory_t* swaphist = observablesHistory->swapHistory.get();
2321 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2323 CheckpointHeaderContents headerContents = { 0,
2341 state->nhchainlength,
2351 std::strcpy(headerContents.version, gmx_version());
2352 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2353 std::strcpy(headerContents.ftime, timebuf.c_str());
2354 if (DOMAINDECOMP(cr))
2356 copy_ivec(domdecCells, headerContents.dd_nc);
2359 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2361 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
2362 || (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0)
2363 || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)
2364 || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
2366 || (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)
2367 || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)
2368 || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0)
2369 || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0)
2370 || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr, headerContents.file_version) < 0))
2372 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2375 // Checkpointing MdModules
2377 gmx::KeyValueTreeBuilder builder;
2378 gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
2379 headerContents.file_version };
2380 mdModulesNotifier.checkpointingNotifications_.notify(mdModulesWriteCheckpoint);
2381 auto tree = builder.build();
2382 gmx::FileIOXdrSerializer serializer(fp);
2383 gmx::serializeKeyValueTree(tree, &serializer);
2386 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2388 /* we really, REALLY, want to make sure to physically write the checkpoint,
2389 and all the files it depends on, out to disk. Because we've
2390 opened the checkpoint with gmx_fio_open(), it's in our list
2392 ret = gmx_fio_all_output_fsync();
2397 sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
2399 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2405 gmx_warning("%s", buf);
2409 if (gmx_fio_close(fp) != 0)
2411 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2414 /* we don't move the checkpoint if the user specified they didn't want it,
2415 or if the fsyncs failed */
2417 if (!bNumberAndKeep && !ret)
2421 /* Rename the previous checkpoint file */
2422 std::strcpy(buf, fn);
2423 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2424 std::strcat(buf, "_prev");
2425 std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2428 /* we copy here so that if something goes wrong between now and
2429 * the rename below, there's always a state.cpt.
2430 * If renames are atomic (such as in POSIX systems),
2431 * this copying should be unneccesary.
2433 gmx_file_copy(fn, buf, FALSE);
2434 /* We don't really care if this fails:
2435 * there's already a new checkpoint.
2440 gmx_file_rename(fn, buf);
2443 if (gmx_file_rename(fntemp, fn) != 0)
2445 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2448 #endif /* GMX_NO_RENAME */
2453 /*code for alternate checkpointing scheme. moved from top of loop over
2455 fcRequestCheckPoint();
2456 if (fcCheckPointParallel(cr->nodeid, NULL, 0) == 0)
2458 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step);
2460 #endif /* end GMX_FAHCORE block */
2463 static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
2465 bool foundMismatch = (p != f);
2473 fprintf(fplog, " %s mismatch,\n", type);
2474 fprintf(fplog, " current program: %d\n", p);
2475 fprintf(fplog, " checkpoint file: %d\n", f);
2476 fprintf(fplog, "\n");
2480 static void check_string(FILE* fplog, const char* type, const char* p, const char* f, gmx_bool* mm)
2482 bool foundMismatch = (std::strcmp(p, f) != 0);
2490 fprintf(fplog, " %s mismatch,\n", type);
2491 fprintf(fplog, " current program: %s\n", p);
2492 fprintf(fplog, " checkpoint file: %s\n", f);
2493 fprintf(fplog, "\n");
2497 static void check_match(FILE* fplog,
2498 const t_commrec* cr,
2500 const CheckpointHeaderContents& headerContents,
2501 gmx_bool reproducibilityRequested)
2503 /* Note that this check_string on the version will also print a message
2504 * when only the minor version differs. But we only print a warning
2505 * message further down with reproducibilityRequested=TRUE.
2507 gmx_bool versionDiffers = FALSE;
2508 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2510 gmx_bool precisionDiffers = FALSE;
2511 check_int(fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2512 if (precisionDiffers)
2514 const char msg_precision_difference[] =
2515 "You are continuing a simulation with a different precision. Not matching\n"
2516 "single/double precision will lead to precision or performance loss.\n";
2519 fprintf(fplog, "%s\n", msg_precision_difference);
2523 gmx_bool mm = (versionDiffers || precisionDiffers);
2525 if (reproducibilityRequested)
2527 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(),
2528 headerContents.fprog, &mm);
2530 check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2533 if (cr->nnodes > 1 && reproducibilityRequested)
2535 check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2537 int npp = cr->nnodes;
2538 if (cr->npmenodes >= 0)
2540 npp -= cr->npmenodes;
2542 int npp_f = headerContents.nnodes;
2543 if (headerContents.npme >= 0)
2545 npp_f -= headerContents.npme;
2549 check_int(fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2550 check_int(fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2551 check_int(fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2557 /* Gromacs should be able to continue from checkpoints between
2558 * different patch level versions, but we do not guarantee
2559 * compatibility between different major/minor versions - check this.
2563 sscanf(gmx_version(), "%5d", &gmx_major);
2564 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2565 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2567 const char msg_major_version_difference[] =
2568 "The current GROMACS major version is not identical to the one that\n"
2569 "generated the checkpoint file. In principle GROMACS does not support\n"
2570 "continuation from checkpoints between different versions, so we advise\n"
2571 "against this. If you still want to try your luck we recommend that you use\n"
2572 "the -noappend flag to keep your output files from the two versions separate.\n"
2573 "This might also work around errors where the output fields in the energy\n"
2574 "file have changed between the different versions.\n";
2576 const char msg_mismatch_notice[] =
2577 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2578 "Continuation is exact, but not guaranteed to be binary identical.\n";
2580 if (majorVersionDiffers)
2584 fprintf(fplog, "%s\n", msg_major_version_difference);
2587 else if (reproducibilityRequested)
2589 /* Major & minor versions match at least, but something is different. */
2592 fprintf(fplog, "%s\n", msg_mismatch_notice);
2598 static void read_checkpoint(const char* fn,
2600 const t_commrec* cr,
2603 int* init_fep_state,
2604 CheckpointHeaderContents* headerContents,
2606 ObservablesHistory* observablesHistory,
2607 gmx_bool reproducibilityRequested,
2608 const gmx::MdModulesNotifier& mdModulesNotifier)
2611 char buf[STEPSTRSIZE];
2614 fp = gmx_fio_open(fn, "r");
2615 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2617 // If we are appending, then we don't want write to the open log
2618 // file because we still need to compute a checksum for it. In
2619 // that case, the filehandle will be nullptr. Otherwise, we report
2620 // to the new log file about the checkpoint file that we are
2622 FILE* fplog = gmx_fio_getfp(logfio);
2625 fprintf(fplog, "\n");
2626 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2627 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2628 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2629 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2630 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2631 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2632 fprintf(fplog, " time: %f\n", headerContents->t);
2633 fprintf(fplog, "\n");
2636 if (headerContents->natoms != state->natoms)
2639 "Checkpoint file is for a system of %d atoms, while the current system consists "
2641 headerContents->natoms, state->natoms);
2643 if (headerContents->ngtc != state->ngtc)
2646 "Checkpoint file is for a system of %d T-coupling groups, while the current "
2647 "system consists of %d T-coupling groups",
2648 headerContents->ngtc, state->ngtc);
2650 if (headerContents->nnhpres != state->nnhpres)
2653 "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the "
2654 "current system consists of %d NH-pressure-coupling variables",
2655 headerContents->nnhpres, state->nnhpres);
2658 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2659 if (headerContents->nlambda != nlambdaHistory)
2662 "Checkpoint file is for a system with %d lambda states, while the current system "
2663 "consists of %d lambda states",
2664 headerContents->nlambda, nlambdaHistory);
2667 init_gtc_state(state, state->ngtc, state->nnhpres,
2668 headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2669 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2671 if (headerContents->eIntegrator != eIntegrator)
2674 "Cannot change integrator during a checkpoint restart. Perhaps you should make a "
2675 "new .tpr with grompp -f new.mdp -t %s",
2679 if (headerContents->flags_state != state->flags)
2682 "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
2683 "should make a new .tpr with grompp -f new.mdp -t %s",
2689 check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
2692 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2693 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it
2694 here. Investigate for 5.0. */
2699 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2704 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1 << eeksEKINH)) != 0)
2705 || ((headerContents->flags_eks & (1 << eeksEKINF)) != 0)
2706 || ((headerContents->flags_eks & (1 << eeksEKINO)) != 0)
2707 || (((headerContents->flags_eks & (1 << eeksEKINSCALEF))
2708 | (headerContents->flags_eks & (1 << eeksEKINSCALEH))
2709 | (headerContents->flags_eks & (1 << eeksVSCALE)))
2712 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2714 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2716 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents->flags_enh,
2717 observablesHistory->energyHistory.get(), nullptr);
2723 if (headerContents->flagsPullHistory)
2725 if (observablesHistory->pullHistory == nullptr)
2727 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2729 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents->flagsPullHistory,
2730 observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2737 if (headerContents->file_version < 6)
2740 "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2743 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda,
2744 &state->dfhist, nullptr);
2750 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2752 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t{});
2754 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED,
2755 observablesHistory->edsamHistory.get(), nullptr);
2761 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2763 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2765 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2771 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2773 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t{});
2775 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords,
2776 observablesHistory->swapHistory.get(), nullptr);
2782 std::vector<gmx_file_position_t> outputfiles;
2783 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2788 do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
2789 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2794 if (gmx_fio_close(fp) != 0)
2796 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2801 void load_checkpoint(const char* fn,
2803 const t_commrec* cr,
2807 ObservablesHistory* observablesHistory,
2808 gmx_bool reproducibilityRequested,
2809 const gmx::MdModulesNotifier& mdModulesNotifier)
2811 CheckpointHeaderContents headerContents;
2814 /* Read the state from the checkpoint file */
2815 read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state), &headerContents,
2816 state, observablesHistory, reproducibilityRequested, mdModulesNotifier);
2820 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2821 gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = { *cr, headerContents.file_version };
2822 mdModulesNotifier.checkpointingNotifications_.notify(broadcastCheckPointData);
2824 ir->bContinuation = TRUE;
2825 // TODO Should the following condition be <=? Currently if you
2826 // pass a checkpoint written by an normal completion to a restart,
2827 // mdrun will read all input, does some work but no steps, and
2828 // write successful output. But perhaps that is not desirable.
2829 if ((ir->nsteps >= 0) && (ir->nsteps < headerContents.step))
2831 // Note that we do not intend to support the use of mdrun
2832 // -nsteps to circumvent this condition.
2833 char nstepsString[STEPSTRSIZE], stepString[STEPSTRSIZE];
2834 gmx_step_str(ir->nsteps, nstepsString);
2835 gmx_step_str(headerContents.step, stepString);
2837 "The input requested %s steps, however the checkpoint "
2838 "file has already reached step %s. The simulation will not "
2839 "proceed, because either your simulation is already complete, "
2840 "or your combination of input files don't match.",
2841 nstepsString, stepString);
2843 if (ir->nsteps >= 0)
2845 ir->nsteps += ir->init_step - headerContents.step;
2847 ir->init_step = headerContents.step;
2848 ir->simulation_part = headerContents.simulation_part + 1;
2851 void read_checkpoint_part_and_step(const char* filename, int* simulation_part, int64_t* step)
2855 if (filename == nullptr || !gmx_fexist(filename) || ((fp = gmx_fio_open(filename, "r")) == nullptr))
2857 *simulation_part = 0;
2862 CheckpointHeaderContents headerContents;
2863 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2865 *simulation_part = headerContents.simulation_part;
2866 *step = headerContents.step;
2869 static CheckpointHeaderContents read_checkpoint_data(t_fileio* fp,
2871 std::vector<gmx_file_position_t>* outputfiles)
2873 CheckpointHeaderContents headerContents;
2874 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2875 state->natoms = headerContents.natoms;
2876 state->ngtc = headerContents.ngtc;
2877 state->nnhpres = headerContents.nnhpres;
2878 state->nhchainlength = headerContents.nhchainlength;
2879 state->flags = headerContents.flags_state;
2880 int ret = do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2885 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2891 energyhistory_t enerhist;
2892 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, nullptr);
2897 PullHistory pullHist = {};
2898 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
2899 StatePart::pullHistory, nullptr);
2905 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
2906 &state->dfhist, nullptr);
2912 edsamhistory_t edsamhist = {};
2913 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2919 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2925 swaphistory_t swaphist = {};
2926 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2932 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, outputfiles, nullptr, headerContents.file_version);
2938 gmx::MdModulesNotifier mdModuleNotifier;
2939 do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
2940 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2945 return headerContents;
2948 void read_checkpoint_trxframe(t_fileio* fp, t_trxframe* fr)
2951 std::vector<gmx_file_position_t> outputfiles;
2952 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, &outputfiles);
2954 fr->natoms = state.natoms;
2956 fr->step = int64_to_int(headerContents.step, "conversion of checkpoint to trajectory");
2958 fr->time = headerContents.t;
2960 fr->lambda = state.lambda[efptFEP];
2961 fr->fep_state = state.fep_state;
2963 fr->bX = ((state.flags & (1 << estX)) != 0);
2966 fr->x = makeRvecArray(state.x, state.natoms);
2968 fr->bV = ((state.flags & (1 << estV)) != 0);
2971 fr->v = makeRvecArray(state.v, state.natoms);
2974 fr->bBox = ((state.flags & (1 << estBOX)) != 0);
2977 copy_mat(state.box, fr->box);
2981 void list_checkpoint(const char* fn, FILE* out)
2988 fp = gmx_fio_open(fn, "r");
2989 CheckpointHeaderContents headerContents;
2990 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2991 state.natoms = headerContents.natoms;
2992 state.ngtc = headerContents.ngtc;
2993 state.nnhpres = headerContents.nnhpres;
2994 state.nhchainlength = headerContents.nhchainlength;
2995 state.flags = headerContents.flags_state;
2996 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
3001 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
3007 energyhistory_t enerhist;
3008 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, out);
3012 PullHistory pullHist = {};
3013 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
3014 StatePart::pullHistory, out);
3019 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
3020 &state.dfhist, out);
3025 edsamhistory_t edsamhist = {};
3026 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
3031 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state.awhHistory.get(), out);
3036 swaphistory_t swaphist = {};
3037 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
3042 std::vector<gmx_file_position_t> outputfiles;
3043 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
3048 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
3055 if (gmx_fio_close(fp) != 0)
3057 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3061 /* This routine cannot print tons of data, since it is called before the log file is opened. */
3062 CheckpointHeaderContents read_checkpoint_simulation_part_and_filenames(t_fileio* fp,
3063 std::vector<gmx_file_position_t>* outputfiles)
3066 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, outputfiles);
3067 if (gmx_fio_close(fp) != 0)
3069 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3071 return headerContents;