2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012 by the GROMACS development team.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
42 #include "checkpoint.h"
53 #include "buildinfo.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/gmxfio_xdr.h"
57 #include "gromacs/fileio/xdr_datatype.h"
58 #include "gromacs/fileio/xdrf.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/math/vecdump.h"
62 #include "gromacs/math/vectypes.h"
63 #include "gromacs/mdtypes/awh_correlation_history.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/commrec.h"
66 #include "gromacs/mdtypes/df_history.h"
67 #include "gromacs/mdtypes/edsamhistory.h"
68 #include "gromacs/mdtypes/energyhistory.h"
69 #include "gromacs/mdtypes/inputrec.h"
70 #include "gromacs/mdtypes/md_enums.h"
71 #include "gromacs/mdtypes/mdrunoptions.h"
72 #include "gromacs/mdtypes/observableshistory.h"
73 #include "gromacs/mdtypes/pullhistory.h"
74 #include "gromacs/mdtypes/state.h"
75 #include "gromacs/mdtypes/swaphistory.h"
76 #include "gromacs/trajectory/trajectoryframe.h"
77 #include "gromacs/utility/arrayref.h"
78 #include "gromacs/utility/baseversion.h"
79 #include "gromacs/utility/cstringutil.h"
80 #include "gromacs/utility/fatalerror.h"
81 #include "gromacs/utility/futil.h"
82 #include "gromacs/utility/gmxassert.h"
83 #include "gromacs/utility/int64_to_int.h"
84 #include "gromacs/utility/keyvaluetree.h"
85 #include "gromacs/utility/keyvaluetreebuilder.h"
86 #include "gromacs/utility/keyvaluetreeserializer.h"
87 #include "gromacs/utility/mdmodulenotification.h"
88 #include "gromacs/utility/programcontext.h"
89 #include "gromacs/utility/smalloc.h"
90 #include "gromacs/utility/sysinfo.h"
91 #include "gromacs/utility/txtdump.h"
94 # include "corewrap.h"
97 #define CPT_MAGIC1 171817
98 #define CPT_MAGIC2 171819
100 /*! \brief Enum of values that describe the contents of a cpt file
101 * whose format matches a version number
103 * The enum helps the code be more self-documenting and ensure merges
104 * do not silently resolve when two patches make the same bump. When
105 * adding new functionality, add a new element just above cptv_Count
106 * in this enumeration, and write code below that does the right thing
107 * according to the value of file_version.
111 cptv_Unknown = 17, /**< Version before numbering scheme */
112 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
113 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
114 cptv_PullAverage, /**< Added possibility to output average pull force and position */
115 cptv_MdModules, /**< Added checkpointing for MdModules */
116 cptv_Count /**< the total number of cptv versions */
119 /*! \brief Version number of the file format written to checkpoint
120 * files by this version of the code.
122 * cpt_version should normally only be changed, via adding a new field
123 * to cptv enumeration, when the header or footer format changes.
125 * The state data format itself is backward and forward compatible.
126 * But old code can not read a new entry that is present in the file
127 * (but can read a new format when new entries are not present).
129 * The cpt_version increases whenever the file format in the main
130 * development branch changes, due to an extension of the cptv enum above.
131 * Backward compatibility for reading old run input files is maintained
132 * by checking this version number against that of the file and then using
133 * the correct code path. */
134 static const int cpt_version = cptv_Count - 1;
137 const char* est_names[estNR] = { "FE-lambda",
143 "thermostat-integral",
148 "LD-rng-unsupported",
149 "LD-rng-i-unsupported",
162 "MC-rng-unsupported",
163 "MC-rng-i-unsupported",
164 "barostat-integral" };
181 static const char* eeks_names[eeksNR] = { "Ekin_n", "Ekinh", "dEkindlambda",
182 "mv_cos", "Ekinf", "Ekinh_old",
183 "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC",
195 eenhENERGY_NSTEPS_SIM,
196 eenhENERGY_DELTA_H_NN,
197 eenhENERGY_DELTA_H_LIST,
198 eenhENERGY_DELTA_H_STARTTIME,
199 eenhENERGY_DELTA_H_STARTLAMBDA,
205 epullhPULL_NUMCOORDINATES,
206 epullhPULL_NUMGROUPS,
207 epullhPULL_NUMVALUESINXSUM,
208 epullhPULL_NUMVALUESINFSUM,
214 epullcoordh_VALUE_REF_SUM,
215 epullcoordh_VALUE_SUM,
216 epullcoordh_DR01_SUM,
217 epullcoordh_DR23_SUM,
218 epullcoordh_DR45_SUM,
219 epullcoordh_FSCAL_SUM,
220 epullcoordh_DYNAX_SUM,
230 static const char* eenh_names[eenhNR] = { "energy_n",
239 "energy_delta_h_list",
240 "energy_delta_h_start_time",
241 "energy_delta_h_start_lambda" };
243 static const char* ePullhNames[epullhNR] = { "pullhistory_numcoordinates", "pullhistory_numgroups",
244 "pullhistory_numvaluesinxsum",
245 "pullhistory_numvaluesinfsum" };
247 /* free energy history variables -- need to be preserved over checkpoint */
266 /* free energy history variable names */
267 static const char* edfh_names[edfhNR] = { "bEquilibrated",
269 "Wang-Landau Histogram",
277 "accumulated_plus_2",
278 "accumulated_minus_2",
282 /* AWH biasing history variables */
286 eawhhEQUILIBRATEHISTOGRAM,
290 eawhhUMBRELLAGRIDPOINT,
292 eawhhLOGSCALEDSAMPLEWEIGHT,
294 eawhhFORCECORRELATIONGRID,
298 static const char* eawhh_names[eawhhNR] = { "awh_in_initial", "awh_equilibrateHistogram",
299 "awh_histsize", "awh_npoints",
300 "awh_coordpoint", "awh_umbrellaGridpoint",
301 "awh_updatelist", "awh_logScaledSampleWeight",
302 "awh_numupdates", "awh_forceCorrelationGrid" };
310 static const char* epull_prev_step_com_names[epullsNR] = { "Pull groups prev step COM" };
313 //! Higher level vector element type, only used for formatting checkpoint dumps
314 enum class CptElementType
316 integer, //!< integer
317 real, //!< float or double, not linked to precision of type real
318 real3, //!< float[3] or double[3], not linked to precision of type real
319 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
322 //! \brief Parts of the checkpoint state, only used for reporting
325 microState, //!< The microstate of the simulated system
326 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
327 energyHistory, //!< Energy observable statistics
328 freeEnergyHistory, //!< Free-energy state and observable statistics
329 accWeightHistogram, //!< Accelerated weight histogram method state
330 pullState, //!< COM of previous step.
331 pullHistory //!< Pull history statistics (sums since last written output)
334 //! \brief Return the name of a checkpoint entry based on part and part entry
335 static const char* entryName(StatePart part, int ecpt)
339 case StatePart::microState: return est_names[ecpt];
340 case StatePart::kineticEnergy: return eeks_names[ecpt];
341 case StatePart::energyHistory: return eenh_names[ecpt];
342 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
343 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
344 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
345 case StatePart::pullHistory: return ePullhNames[ecpt];
351 static void cp_warning(FILE* fp)
353 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
356 [[noreturn]] static void cp_error()
358 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
361 static void do_cpt_string_err(XDR* xd, const char* desc, gmx::ArrayRef<char> s, FILE* list)
363 char* data = s.data();
364 if (xdr_string(xd, &data, s.size()) == 0)
370 fprintf(list, "%s = %s\n", desc, data);
374 static int do_cpt_int(XDR* xd, const char* desc, int* i, FILE* list)
376 if (xdr_int(xd, i) == 0)
382 fprintf(list, "%s = %d\n", desc, *i);
387 static int do_cpt_u_chars(XDR* xd, const char* desc, int n, unsigned char* i, FILE* list)
391 fprintf(list, "%s = ", desc);
394 for (int j = 0; j < n && res; j++)
396 res &= xdr_u_char(xd, &i[j]);
399 fprintf(list, "%02x", i[j]);
414 static void do_cpt_int_err(XDR* xd, const char* desc, int* i, FILE* list)
416 if (do_cpt_int(xd, desc, i, list) < 0)
422 static void do_cpt_bool_err(XDR* xd, const char* desc, bool* b, FILE* list)
424 int i = static_cast<int>(*b);
426 if (do_cpt_int(xd, desc, &i, list) < 0)
434 static void do_cpt_step_err(XDR* xd, const char* desc, int64_t* i, FILE* list)
436 char buf[STEPSTRSIZE];
438 if (xdr_int64(xd, i) == 0)
444 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
448 static void do_cpt_double_err(XDR* xd, const char* desc, double* f, FILE* list)
450 if (xdr_double(xd, f) == 0)
456 fprintf(list, "%s = %f\n", desc, *f);
460 static void do_cpt_real_err(XDR* xd, real* f)
463 bool_t res = xdr_double(xd, f);
465 bool_t res = xdr_float(xd, f);
473 static void do_cpt_n_rvecs_err(XDR* xd, const char* desc, int n, rvec f[], FILE* list)
475 for (int i = 0; i < n; i++)
477 for (int j = 0; j < DIM; j++)
479 do_cpt_real_err(xd, &f[i][j]);
485 pr_rvecs(list, 0, desc, f, n);
497 static const int value = xdr_datatype_int;
501 struct xdr_type<float>
503 static const int value = xdr_datatype_float;
507 struct xdr_type<double>
509 static const int value = xdr_datatype_double;
512 //! \brief Returns size in byte of an xdr_datatype
513 static inline unsigned int sizeOfXdrType(int xdrType)
517 case xdr_datatype_int: return sizeof(int);
518 case xdr_datatype_float: return sizeof(float);
519 case xdr_datatype_double: return sizeof(double);
520 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
526 //! \brief Returns the XDR process function for i/o of an XDR type
527 static inline xdrproc_t xdrProc(int xdrType)
531 case xdr_datatype_int: return reinterpret_cast<xdrproc_t>(xdr_int);
532 case xdr_datatype_float: return reinterpret_cast<xdrproc_t>(xdr_float);
533 case xdr_datatype_double: return reinterpret_cast<xdrproc_t>(xdr_double);
534 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
540 /*! \brief Lists or only reads an xdr vector from checkpoint file
542 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
543 * The header for the print is set by \p part and \p ecpt.
544 * The formatting of the printing is set by \p cptElementType.
545 * When list==NULL only reads the elements.
547 static bool_t listXdrVector(XDR* xd, StatePart part, int ecpt, int nf, int xdrType, FILE* list, CptElementType cptElementType)
551 const unsigned int elemSize = sizeOfXdrType(xdrType);
552 std::vector<char> data(nf * elemSize);
553 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
559 case xdr_datatype_int:
560 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int*>(data.data()), nf, TRUE);
562 case xdr_datatype_float:
564 if (cptElementType == CptElementType::real3)
566 pr_rvecs(list, 0, entryName(part, ecpt),
567 reinterpret_cast<const rvec*>(data.data()), nf / 3);
572 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
573 pr_fvec(list, 0, entryName(part, ecpt),
574 reinterpret_cast<const float*>(data.data()), nf, TRUE);
577 case xdr_datatype_double:
579 if (cptElementType == CptElementType::real3)
581 pr_rvecs(list, 0, entryName(part, ecpt),
582 reinterpret_cast<const rvec*>(data.data()), nf / 3);
587 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
588 pr_dvec(list, 0, entryName(part, ecpt),
589 reinterpret_cast<const double*>(data.data()), nf, TRUE);
592 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
599 //! \brief Convert a double array, typed char*, to float
600 gmx_unused static void convertArrayRealPrecision(const char* c, float* v, int n)
602 const double* d = reinterpret_cast<const double*>(c);
603 for (int i = 0; i < n; i++)
605 v[i] = static_cast<float>(d[i]);
609 //! \brief Convert a float array, typed char*, to double
610 static void convertArrayRealPrecision(const char* c, double* v, int n)
612 const float* f = reinterpret_cast<const float*>(c);
613 for (int i = 0; i < n; i++)
615 v[i] = static_cast<double>(f[i]);
619 //! \brief Generate an error for trying to convert to integer
620 static void convertArrayRealPrecision(const char gmx_unused* c, int gmx_unused* v, int gmx_unused n)
622 GMX_RELEASE_ASSERT(false,
623 "We only expect type mismatches between float and double, not integer");
626 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
628 * This is the only routine that does the actually i/o of real vector,
629 * all other routines are intermediate level routines for specific real
630 * data types, calling this routine.
631 * Currently this routine is (too) complex, since it handles both real *
632 * and std::vector<real>. Using real * is deprecated and this routine
633 * will simplify a lot when only std::vector needs to be supported.
635 * The routine is generic to vectors with different allocators,
636 * because as part of reading a checkpoint there are vectors whose
637 * size is not known until reading has progressed far enough, so a
638 * resize method must be called.
640 * When not listing, we use either v or vector, depending on which is !=NULL.
641 * If nval >= 0, nval is used; on read this should match the passed value.
642 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
643 * the value is stored in nptr
645 template<typename T, typename AllocatorType>
646 static int doVectorLow(XDR* xd,
653 std::vector<T, AllocatorType>* vector,
655 CptElementType cptElementType)
657 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr)
658 || (v == nullptr && vector != nullptr),
659 "Without list, we should have exactly one of v and vector != NULL");
663 int numElemInTheFile;
668 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
669 numElemInTheFile = nval;
675 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
676 numElemInTheFile = *nptr;
680 numElemInTheFile = vector->size();
684 /* Read/write the vector element count */
685 res = xdr_int(xd, &numElemInTheFile);
690 /* Read/write the element data type */
691 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
692 int xdrTypeInTheFile = xdrTypeInTheCode;
693 res = xdr_int(xd, &xdrTypeInTheFile);
699 if (list == nullptr && (sflags & (1 << ecpt)))
703 if (numElemInTheFile != nval)
706 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
707 entryName(part, ecpt), nval, numElemInTheFile);
710 else if (nptr != nullptr)
712 *nptr = numElemInTheFile;
715 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
719 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
720 entryName(part, ecpt), xdr_datatype_names[xdrTypeInTheCode],
721 xdr_datatype_names[xdrTypeInTheFile]);
723 /* Matching int and real should never occur, but check anyhow */
724 if (xdrTypeInTheFile == xdr_datatype_int || xdrTypeInTheCode == xdr_datatype_int)
727 "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
736 snew(*v, numElemInTheFile);
742 /* This conditional ensures that we don't resize on write.
743 * In particular in the state where this code was written
744 * vector has a size of numElemInThefile and we
745 * don't want to lose that padding here.
747 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
749 vector->resize(numElemInTheFile);
757 vChar = reinterpret_cast<char*>(vp);
761 snew(vChar, numElemInTheFile * sizeOfXdrType(xdrTypeInTheFile));
763 res = xdr_vector(xd, vChar, numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
764 xdrProc(xdrTypeInTheFile));
772 /* In the old code float-double conversion came for free.
773 * In the new code we still support it, mainly because
774 * the tip4p_continue regression test makes use of this.
775 * It's an open question if we do or don't want to allow this.
777 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
783 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile, list, cptElementType);
789 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
792 doVector(XDR* xd, StatePart part, int ecpt, int sflags, std::vector<T>* vector, FILE* list, int numElements = -1)
794 return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list,
795 CptElementType::real);
798 //! \brief Read/Write an ArrayRef<real>.
799 static int doRealArrayRef(XDR* xd, StatePart part, int ecpt, int sflags, gmx::ArrayRef<real> vector, FILE* list)
801 real* v_real = vector.data();
802 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, vector.size(), nullptr,
803 &v_real, nullptr, list, CptElementType::real);
806 //! Convert from view of RVec to view of real.
807 static gmx::ArrayRef<real> realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
809 return gmx::arrayRefFromArray<real>(reinterpret_cast<real*>(ofRVecs.data()), ofRVecs.size() * DIM);
812 //! \brief Read/Write a PaddedVector whose value_type is RVec.
813 template<typename PaddedVectorOfRVecType>
815 doRvecVector(XDR* xd, StatePart part, int ecpt, int sflags, PaddedVectorOfRVecType* v, int numAtoms, FILE* list)
817 const int numReals = numAtoms * DIM;
822 sflags & (1 << ecpt),
823 "When not listing, the flag for the entry should be set when requesting i/o");
824 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
826 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
830 // Use the rebind facility to change the value_type of the
831 // allocator from RVec to real.
832 using realAllocator =
833 typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
834 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr,
835 nullptr, list, CptElementType::real);
839 /* This function stores n along with the reals for reading,
840 * but on reading it assumes that n matches the value in the checkpoint file,
841 * a fatal error is generated when this is not the case.
843 static int do_cpte_reals(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
845 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
846 list, CptElementType::real);
849 /* This function does the same as do_cpte_reals,
850 * except that on reading it ignores the passed value of *n
851 * and stores the value read from the checkpoint file in *n.
853 static int do_cpte_n_reals(XDR* xd, StatePart part, int ecpt, int sflags, int* n, real** v, FILE* list)
855 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list,
856 CptElementType::real);
859 static int do_cpte_real(XDR* xd, StatePart part, int ecpt, int sflags, real* r, FILE* list)
861 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr,
862 list, CptElementType::real);
865 static int do_cpte_ints(XDR* xd, StatePart part, int ecpt, int sflags, int n, int** v, FILE* list)
867 return doVectorLow<int, std::allocator<int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
868 list, CptElementType::integer);
871 static int do_cpte_int(XDR* xd, StatePart part, int ecpt, int sflags, int* i, FILE* list)
873 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
876 static int do_cpte_bool(XDR* xd, StatePart part, int ecpt, int sflags, bool* b, FILE* list)
878 int i = static_cast<int>(*b);
879 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
884 static int do_cpte_doubles(XDR* xd, StatePart part, int ecpt, int sflags, int n, double** v, FILE* list)
886 return doVectorLow<double, std::allocator<double>>(xd, part, ecpt, sflags, n, nullptr, v,
887 nullptr, list, CptElementType::real);
890 static int do_cpte_double(XDR* xd, StatePart part, int ecpt, int sflags, double* r, FILE* list)
892 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
895 static int do_cpte_matrix(XDR* xd, StatePart part, int ecpt, int sflags, matrix v, FILE* list)
901 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, DIM * DIM, nullptr, &vr,
902 nullptr, nullptr, CptElementType::matrix3x3);
904 if (list && ret == 0)
906 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
913 static int do_cpte_nmatrix(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
917 char name[CPTSTRLEN];
924 for (i = 0; i < n; i++)
926 reti = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]),
927 nullptr, nullptr, CptElementType::matrix3x3);
928 if (list && reti == 0)
930 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
931 pr_reals(list, 0, name, v[i], n);
941 static int do_cpte_matrices(XDR* xd, StatePart part, int ecpt, int sflags, int n, matrix** v, FILE* list)
944 matrix *vp, *va = nullptr;
950 res = xdr_int(xd, &nf);
955 if (list == nullptr && nf != n)
957 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n",
958 entryName(part, ecpt), n, nf);
960 if (list || !(sflags & (1 << ecpt)))
973 snew(vr, nf * DIM * DIM);
974 for (i = 0; i < nf; i++)
976 for (j = 0; j < DIM; j++)
978 for (k = 0; k < DIM; k++)
980 vr[(i * DIM + j) * DIM + k] = vp[i][j][k];
984 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, nf * DIM * DIM, nullptr,
985 &vr, nullptr, nullptr, CptElementType::matrix3x3);
986 for (i = 0; i < nf; i++)
988 for (j = 0; j < DIM; j++)
990 for (k = 0; k < DIM; k++)
992 vp[i][j][k] = vr[(i * DIM + j) * DIM + k];
998 if (list && ret == 0)
1000 for (i = 0; i < nf; i++)
1002 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
1013 static void do_cpt_header(XDR* xd, gmx_bool bRead, FILE* list, CheckpointHeaderContents* contents)
1026 res = xdr_int(xd, &magic);
1030 "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
1032 if (magic != CPT_MAGIC1)
1035 "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
1036 "The checkpoint file is corrupted or not a checkpoint file",
1042 gmx_gethostname(fhost, 255);
1044 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
1045 // The following fields are no longer ever written with meaningful
1046 // content, but because they precede the file version, there is no
1047 // good way for new code to read the old and new formats, nor a
1048 // good way for old code to avoid giving an error while reading a
1049 // new format. So we read and write a field that no longer has a
1051 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
1052 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
1053 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
1054 do_cpt_string_err(xd, "generating program", contents->fprog, list);
1055 do_cpt_string_err(xd, "generation time", contents->ftime, list);
1056 contents->file_version = cpt_version;
1057 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
1058 if (contents->file_version > cpt_version)
1061 "Attempting to read a checkpoint file of version %d with code of version %d\n",
1062 contents->file_version, cpt_version);
1064 if (contents->file_version >= 13)
1066 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1070 contents->double_prec = -1;
1072 if (contents->file_version >= 12)
1074 do_cpt_string_err(xd, "generating host", fhost, list);
1076 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1077 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1078 if (contents->file_version >= 10)
1080 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1084 contents->nhchainlength = 1;
1086 if (contents->file_version >= 11)
1088 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1092 contents->nnhpres = 0;
1094 if (contents->file_version >= 14)
1096 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1100 contents->nlambda = 0;
1102 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1103 if (contents->file_version >= 3)
1105 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1109 contents->simulation_part = 1;
1111 if (contents->file_version >= 5)
1113 do_cpt_step_err(xd, "step", &contents->step, list);
1118 do_cpt_int_err(xd, "step", &idum, list);
1119 contents->step = static_cast<int64_t>(idum);
1121 do_cpt_double_err(xd, "t", &contents->t, list);
1122 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1123 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1124 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1125 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1126 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1127 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1128 if (contents->file_version >= 4)
1130 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1131 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1135 contents->flags_eks = 0;
1136 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV + 1));
1137 contents->flags_state = (contents->flags_state
1138 & ~((1 << (estORIRE_DTAV + 1)) | (1 << (estORIRE_DTAV + 2))
1139 | (1 << (estORIRE_DTAV + 3))));
1141 if (contents->file_version >= 14)
1143 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1147 contents->flags_dfh = 0;
1150 if (contents->file_version >= 15)
1152 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1159 if (contents->file_version >= 16)
1161 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1165 contents->eSwapCoords = eswapNO;
1168 if (contents->file_version >= 17)
1170 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1174 contents->flags_awhh = 0;
1177 if (contents->file_version >= 18)
1179 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1183 contents->flagsPullHistory = 0;
1187 static int do_cpt_footer(XDR* xd, int file_version)
1192 if (file_version >= 2)
1195 res = xdr_int(xd, &magic);
1200 if (magic != CPT_MAGIC2)
1209 static int do_cpt_state(XDR* xd, int fflags, t_state* state, FILE* list)
1212 const StatePart part = StatePart::microState;
1213 const int sflags = state->flags;
1214 // If reading, state->natoms was probably just read, so
1215 // allocations need to be managed. If writing, this won't change
1216 // anything that matters.
1217 state_change_natoms(state, state->natoms);
1218 for (int i = 0; (i < estNR && ret == 0); i++)
1220 if (fflags & (1 << i))
1225 ret = doRealArrayRef(
1226 xd, part, i, sflags,
1227 gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()),
1231 ret = do_cpte_int(xd, part, i, sflags, &state->fep_state, list);
1233 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1235 ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list);
1237 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1239 ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list);
1242 ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list);
1245 ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list);
1248 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list);
1251 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list);
1254 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list);
1257 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list);
1260 ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list);
1263 ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list);
1265 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1266 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1268 ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list);
1271 ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list);
1273 /* The RNG entries are no longer written,
1274 * the next 4 lines are only for reading old files.
1276 case estLD_RNG_NOTSUPPORTED:
1277 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1279 case estLD_RNGI_NOTSUPPORTED:
1280 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1282 case estMC_RNG_NOTSUPPORTED:
1283 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1285 case estMC_RNGI_NOTSUPPORTED:
1286 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1288 case estDISRE_INITF:
1289 ret = do_cpte_real(xd, part, i, sflags, &state->hist.disre_initf, list);
1291 case estDISRE_RM3TAV:
1292 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs,
1293 &state->hist.disre_rm3tav, list);
1295 case estORIRE_INITF:
1296 ret = do_cpte_real(xd, part, i, sflags, &state->hist.orire_initf, list);
1299 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav,
1300 &state->hist.orire_Dtav, list);
1302 case estPULLCOMPREVSTEP:
1303 ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list);
1307 "Unknown state entry %d\n"
1308 "You are reading a checkpoint file written by different code, which "
1317 static int do_cpt_ekinstate(XDR* xd, int fflags, ekinstate_t* ekins, FILE* list)
1321 const StatePart part = StatePart::kineticEnergy;
1322 for (int i = 0; (i < eeksNR && ret == 0); i++)
1324 if (fflags & (1 << i))
1330 ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list);
1333 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list);
1336 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list);
1339 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list);
1342 ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list);
1344 case eeksEKINSCALEF:
1345 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list);
1348 ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list);
1350 case eeksEKINSCALEH:
1351 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list);
1354 ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list);
1356 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1359 "Unknown ekin data state entry %d\n"
1360 "You are probably reading a new checkpoint file with old code",
1370 static int do_cpt_swapstate(XDR* xd, gmx_bool bRead, int eSwapCoords, swaphistory_t* swapstate, FILE* list)
1372 int swap_cpt_version = 2;
1374 if (eSwapCoords == eswapNO)
1379 swapstate->bFromCpt = bRead;
1380 swapstate->eSwapCoords = eSwapCoords;
1382 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1383 if (bRead && swap_cpt_version < 2)
1386 "Cannot read checkpoint files that were written with old versions"
1387 "of the ion/water position swapping protocol.\n");
1390 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1392 /* When reading, init_swapcoords has not been called yet,
1393 * so we have to allocate memory first. */
1394 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1397 snew(swapstate->ionType, swapstate->nIonTypes);
1400 for (int ic = 0; ic < eCompNR; ic++)
1402 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1404 swapstateIons_t* gs = &swapstate->ionType[ii];
1408 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1412 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1417 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1421 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1424 if (bRead && (nullptr == gs->nMolPast[ic]))
1426 snew(gs->nMolPast[ic], swapstate->nAverage);
1429 for (int j = 0; j < swapstate->nAverage; j++)
1433 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1437 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1443 /* Ion flux per channel */
1444 for (int ic = 0; ic < eChanNR; ic++)
1446 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1448 swapstateIons_t* gs = &swapstate->ionType[ii];
1452 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1456 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1461 /* Ion flux leakage */
1464 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1468 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1472 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1474 swapstateIons_t* gs = &swapstate->ionType[ii];
1476 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1480 snew(gs->channel_label, gs->nMol);
1481 snew(gs->comp_from, gs->nMol);
1484 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1485 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1488 /* Save the last known whole positions to checkpoint
1489 * file to be able to also make multimeric channels whole in PBC */
1490 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1491 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1494 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1495 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1496 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1497 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1501 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0],
1502 *swapstate->xc_old_whole_p[eChan0], list);
1503 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1],
1504 *swapstate->xc_old_whole_p[eChan1], list);
1511 static int do_cpt_enerhist(XDR* xd, gmx_bool bRead, int fflags, energyhistory_t* enerhist, FILE* list)
1520 GMX_RELEASE_ASSERT(enerhist != nullptr,
1521 "With energy history, we need a valid enerhist pointer");
1523 /* This is stored/read for backward compatibility */
1524 int energyHistoryNumEnergies = 0;
1527 enerhist->nsteps = 0;
1529 enerhist->nsteps_sim = 0;
1530 enerhist->nsum_sim = 0;
1532 else if (enerhist != nullptr)
1534 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1537 delta_h_history_t* deltaH = enerhist->deltaHForeignLambdas.get();
1538 const StatePart part = StatePart::energyHistory;
1539 for (int i = 0; (i < eenhNR && ret == 0); i++)
1541 if (fflags & (1 << i))
1546 ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list);
1548 case eenhENERGY_AVER:
1549 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list);
1551 case eenhENERGY_SUM:
1552 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list);
1554 case eenhENERGY_NSUM:
1555 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list);
1557 case eenhENERGY_SUM_SIM:
1558 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list);
1560 case eenhENERGY_NSUM_SIM:
1561 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list);
1563 case eenhENERGY_NSTEPS:
1564 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list);
1566 case eenhENERGY_NSTEPS_SIM:
1567 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list);
1569 case eenhENERGY_DELTA_H_NN:
1572 if (!bRead && deltaH != nullptr)
1574 numDeltaH = deltaH->dh.size();
1576 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1579 if (deltaH == nullptr)
1581 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1582 deltaH = enerhist->deltaHForeignLambdas.get();
1584 deltaH->dh.resize(numDeltaH);
1585 deltaH->start_lambda_set = FALSE;
1589 case eenhENERGY_DELTA_H_LIST:
1590 for (auto dh : deltaH->dh)
1592 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1595 case eenhENERGY_DELTA_H_STARTTIME:
1596 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list);
1598 case eenhENERGY_DELTA_H_STARTLAMBDA:
1599 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list);
1603 "Unknown energy history entry %d\n"
1604 "You are probably reading a new checkpoint file with old code",
1610 if ((fflags & (1 << eenhENERGY_SUM)) && !(fflags & (1 << eenhENERGY_SUM_SIM)))
1612 /* Assume we have an old file format and copy sum to sum_sim */
1613 enerhist->ener_sum_sim = enerhist->ener_sum;
1616 if ((fflags & (1 << eenhENERGY_NSUM)) && !(fflags & (1 << eenhENERGY_NSTEPS)))
1618 /* Assume we have an old file format and copy nsum to nsteps */
1619 enerhist->nsteps = enerhist->nsum;
1621 if ((fflags & (1 << eenhENERGY_NSUM_SIM)) && !(fflags & (1 << eenhENERGY_NSTEPS_SIM)))
1623 /* Assume we have an old file format and copy nsum to nsteps */
1624 enerhist->nsteps_sim = enerhist->nsum_sim;
1630 static int doCptPullCoordHist(XDR* xd, PullCoordinateHistory* pullCoordHist, const StatePart part, FILE* list)
1635 flags |= ((1 << epullcoordh_VALUE_REF_SUM) | (1 << epullcoordh_VALUE_SUM)
1636 | (1 << epullcoordh_DR01_SUM) | (1 << epullcoordh_DR23_SUM)
1637 | (1 << epullcoordh_DR45_SUM) | (1 << epullcoordh_FSCAL_SUM));
1639 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1643 case epullcoordh_VALUE_REF_SUM:
1644 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list);
1646 case epullcoordh_VALUE_SUM:
1647 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list);
1649 case epullcoordh_DR01_SUM:
1650 for (int j = 0; j < DIM && ret == 0; j++)
1652 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1655 case epullcoordh_DR23_SUM:
1656 for (int j = 0; j < DIM && ret == 0; j++)
1658 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1661 case epullcoordh_DR45_SUM:
1662 for (int j = 0; j < DIM && ret == 0; j++)
1664 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1667 case epullcoordh_FSCAL_SUM:
1668 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list);
1670 case epullcoordh_DYNAX_SUM:
1671 for (int j = 0; j < DIM && ret == 0; j++)
1673 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1682 static int doCptPullGroupHist(XDR* xd, PullGroupHistory* pullGroupHist, const StatePart part, FILE* list)
1687 flags |= ((1 << epullgrouph_X_SUM));
1689 for (int i = 0; i < epullgrouph_NR; i++)
1693 case epullgrouph_X_SUM:
1694 for (int j = 0; j < DIM && ret == 0; j++)
1696 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1706 static int doCptPullHist(XDR* xd, gmx_bool bRead, int fflags, PullHistory* pullHist, const StatePart part, FILE* list)
1709 int pullHistoryNumCoordinates = 0;
1710 int pullHistoryNumGroups = 0;
1712 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1713 * average pull forces and coordinates) in the pull history, in temporary variables,
1714 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1717 pullHist->numValuesInXSum = 0;
1718 pullHist->numValuesInFSum = 0;
1720 else if (pullHist != nullptr)
1722 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1723 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1727 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1730 for (int i = 0; (i < epullhNR && ret == 0); i++)
1732 if (fflags & (1 << i))
1736 case epullhPULL_NUMCOORDINATES:
1737 ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list);
1739 case epullhPULL_NUMGROUPS:
1740 do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list);
1742 case epullhPULL_NUMVALUESINXSUM:
1743 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list);
1745 case epullhPULL_NUMVALUESINFSUM:
1746 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list);
1750 "Unknown pull history entry %d\n"
1751 "You are probably reading a new checkpoint file with old code",
1758 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1759 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1761 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1763 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1765 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1767 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1769 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1776 static int do_cpt_df_hist(XDR* xd, int fflags, int nlambda, df_history_t** dfhistPtr, FILE* list)
1785 if (*dfhistPtr == nullptr)
1787 snew(*dfhistPtr, 1);
1788 (*dfhistPtr)->nlambda = nlambda;
1789 init_df_history(*dfhistPtr, nlambda);
1791 df_history_t* dfhist = *dfhistPtr;
1793 const StatePart part = StatePart::freeEnergyHistory;
1794 for (int i = 0; (i < edfhNR && ret == 0); i++)
1796 if (fflags & (1 << i))
1801 ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list);
1804 ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list);
1807 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list);
1810 ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list);
1812 case edfhSUMWEIGHTS:
1813 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list);
1816 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list);
1819 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list);
1822 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list);
1825 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list);
1828 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list);
1831 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list);
1834 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list);
1837 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list);
1840 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list);
1845 "Unknown df history entry %d\n"
1846 "You are probably reading a new checkpoint file with old code",
1856 /* This function stores the last whole configuration of the reference and
1857 * average structure in the .cpt file
1859 static int do_cpt_EDstate(XDR* xd, gmx_bool bRead, int nED, edsamhistory_t* EDstate, FILE* list)
1866 EDstate->bFromCpt = bRead;
1869 /* When reading, init_edsam has not been called yet,
1870 * so we have to allocate memory first. */
1873 snew(EDstate->nref, EDstate->nED);
1874 snew(EDstate->old_sref, EDstate->nED);
1875 snew(EDstate->nav, EDstate->nED);
1876 snew(EDstate->old_sav, EDstate->nED);
1879 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1880 for (int i = 0; i < EDstate->nED; i++)
1884 /* Reference structure SREF */
1885 sprintf(buf, "ED%d # of atoms in reference structure", i + 1);
1886 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1887 sprintf(buf, "ED%d x_ref", i + 1);
1890 snew(EDstate->old_sref[i], EDstate->nref[i]);
1891 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1895 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1898 /* Average structure SAV */
1899 sprintf(buf, "ED%d # of atoms in average structure", i + 1);
1900 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1901 sprintf(buf, "ED%d x_av", i + 1);
1904 snew(EDstate->old_sav[i], EDstate->nav[i]);
1905 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1909 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1916 static int do_cpt_correlation_grid(XDR* xd,
1918 gmx_unused int fflags,
1919 gmx::CorrelationGridHistory* corrGrid,
1925 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1926 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1927 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1931 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize,
1932 corrGrid->blockDataListSize);
1935 for (gmx::CorrelationBlockDataHistory& blockData : corrGrid->blockDataBuffer)
1937 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1938 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1939 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1940 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1941 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1942 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1943 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1944 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1945 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1946 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1947 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1953 static int do_cpt_awh_bias(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhBiasHistory* biasHistory, FILE* list)
1957 gmx::AwhBiasStateHistory* state = &biasHistory->state;
1958 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1960 if (fflags & (1 << i))
1964 case eawhhIN_INITIAL:
1965 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list);
1967 case eawhhEQUILIBRATEHISTOGRAM:
1968 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list);
1971 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list);
1978 numPoints = biasHistory->pointState.size();
1980 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1983 biasHistory->pointState.resize(numPoints);
1987 case eawhhCOORDPOINT:
1988 for (auto& psh : biasHistory->pointState)
1990 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1991 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1992 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1993 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1994 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1995 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1996 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1997 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1998 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1999 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
2000 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
2003 case eawhhUMBRELLAGRIDPOINT:
2004 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list);
2006 case eawhhUPDATELIST:
2007 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
2008 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
2010 case eawhhLOGSCALEDSAMPLEWEIGHT:
2011 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
2012 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
2014 case eawhhNUMUPDATES:
2015 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
2017 case eawhhFORCECORRELATIONGRID:
2018 ret = do_cpt_correlation_grid(xd, bRead, fflags,
2019 &biasHistory->forceCorrelationGrid, list, i);
2021 default: gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
2029 static int do_cpt_awh(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhHistory* awhHistory, FILE* list)
2035 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
2037 if (awhHistory == nullptr)
2039 GMX_RELEASE_ASSERT(bRead,
2040 "do_cpt_awh should not be called for writing without an AwhHistory");
2042 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
2043 awhHistory = awhHistoryLocal.get();
2046 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
2047 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
2048 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
2049 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
2050 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
2051 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
2053 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
2054 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
2059 numBias = awhHistory->bias.size();
2061 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
2065 awhHistory->bias.resize(numBias);
2067 for (auto& bias : awhHistory->bias)
2069 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
2075 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
2080 static void do_cpt_mdmodules(int fileVersion,
2081 t_fileio* checkpointFileHandle,
2082 const gmx::MdModulesNotifier& mdModulesNotifier)
2084 if (fileVersion >= cptv_MdModules)
2086 gmx::FileIOXdrSerializer serializer(checkpointFileHandle);
2087 gmx::KeyValueTreeObject mdModuleCheckpointParameterTree =
2088 gmx::deserializeKeyValueTree(&serializer);
2089 gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
2090 mdModuleCheckpointParameterTree, fileVersion
2092 mdModulesNotifier.notifier_.notify(mdModuleCheckpointReadingDataOnMaster);
2096 static int do_cpt_files(XDR* xd, gmx_bool bRead, std::vector<gmx_file_position_t>* outputfiles, FILE* list, int file_version)
2099 gmx_off_t mask = 0xFFFFFFFFL;
2100 int offset_high, offset_low;
2101 std::array<char, CPTSTRLEN> buf;
2102 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
2104 // Ensure that reading pre-allocates outputfiles, while writing
2105 // writes what is already there.
2106 int nfiles = outputfiles->size();
2107 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
2113 outputfiles->resize(nfiles);
2116 for (auto& outputfile : *outputfiles)
2118 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
2121 do_cpt_string_err(xd, "output filename", buf, list);
2122 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
2124 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2128 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2132 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32)
2133 | (static_cast<gmx_off_t>(offset_low) & mask);
2137 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
2139 offset = outputfile.offset;
2147 offset_low = static_cast<int>(offset & mask);
2148 offset_high = static_cast<int>((offset >> 32) & mask);
2150 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2154 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2159 if (file_version >= 8)
2161 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize, list) != 0)
2165 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(),
2166 outputfile.checksum.data(), list)
2174 outputfile.checksumSize = -1;
2180 static void mpiBarrierBeforeRename(const bool applyMpiBarrierBeforeRename, MPI_Comm mpiBarrierCommunicator)
2182 if (applyMpiBarrierBeforeRename)
2185 MPI_Barrier(mpiBarrierCommunicator);
2187 GMX_RELEASE_ASSERT(false, "Should not request a barrier without MPI");
2188 GMX_UNUSED_VALUE(mpiBarrierCommunicator);
2193 void write_checkpoint(const char* fn,
2194 gmx_bool bNumberAndKeep,
2196 const t_commrec* cr,
2200 int simulation_part,
2206 ObservablesHistory* observablesHistory,
2207 const gmx::MdModulesNotifier& mdModulesNotifier,
2208 bool applyMpiBarrierBeforeRename,
2209 MPI_Comm mpiBarrierCommunicator)
2212 char* fntemp; /* the temporary checkpoint file name */
2214 char buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
2217 if (DOMAINDECOMP(cr))
2219 npmenodes = cr->npmenodes;
2227 /* make the new temporary filename */
2228 snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
2229 std::strcpy(fntemp, fn);
2230 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2231 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
2232 std::strcat(fntemp, suffix);
2233 std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2235 /* if we can't rename, we just overwrite the cpt file.
2236 * dangerous if interrupted.
2238 snew(fntemp, std::strlen(fn));
2239 std::strcpy(fntemp, fn);
2241 std::string timebuf = gmx_format_current_time();
2245 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
2248 /* Get offsets for open files */
2249 auto outputfiles = gmx_fio_get_output_file_positions();
2251 fp = gmx_fio_open(fntemp, "w");
2254 if (state->ekinstate.bUpToDate)
2256 flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF) | (1 << eeksEKINO)
2257 | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH) | (1 << eeksVSCALE)
2258 | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
2265 energyhistory_t* enerhist = observablesHistory->energyHistory.get();
2267 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2269 flags_enh |= (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
2270 if (enerhist->nsum > 0)
2272 flags_enh |= ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
2274 if (enerhist->nsum_sim > 0)
2276 flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
2278 if (enerhist->deltaHForeignLambdas != nullptr)
2280 flags_enh |= ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
2281 | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
2285 PullHistory* pullHist = observablesHistory->pullHistory.get();
2286 int flagsPullHistory = 0;
2287 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2289 flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
2290 flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
2291 | (1 << epullhPULL_NUMVALUESINFSUM));
2297 flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
2298 | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
2301 flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
2303 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
2304 || (elamstats == elamstatsMETROPOLIS))
2306 flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
2307 | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
2316 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2318 flags_awhh |= ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
2319 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
2320 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
2321 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
2324 /* We can check many more things now (CPU, acceleration, etc), but
2325 * it is highly unlikely to have two separate builds with exactly
2326 * the same version, user, time, and build host!
2329 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
2331 edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
2332 int nED = (edsamhist ? edsamhist->nED : 0);
2334 swaphistory_t* swaphist = observablesHistory->swapHistory.get();
2335 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2337 CheckpointHeaderContents headerContents = { 0,
2355 state->nhchainlength,
2365 std::strcpy(headerContents.version, gmx_version());
2366 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2367 std::strcpy(headerContents.ftime, timebuf.c_str());
2368 if (DOMAINDECOMP(cr))
2370 copy_ivec(domdecCells, headerContents.dd_nc);
2373 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2375 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
2376 || (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0)
2377 || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)
2378 || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
2380 || (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)
2381 || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)
2382 || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0)
2383 || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0)
2384 || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr, headerContents.file_version) < 0))
2386 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2389 // Checkpointing MdModules
2391 gmx::KeyValueTreeBuilder builder;
2392 gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
2393 headerContents.file_version };
2394 mdModulesNotifier.notifier_.notify(mdModulesWriteCheckpoint);
2395 auto tree = builder.build();
2396 gmx::FileIOXdrSerializer serializer(fp);
2397 gmx::serializeKeyValueTree(tree, &serializer);
2400 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2402 /* we really, REALLY, want to make sure to physically write the checkpoint,
2403 and all the files it depends on, out to disk. Because we've
2404 opened the checkpoint with gmx_fio_open(), it's in our list
2406 ret = gmx_fio_all_output_fsync();
2411 sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
2413 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2419 gmx_warning("%s", buf);
2423 if (gmx_fio_close(fp) != 0)
2425 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2428 /* we don't move the checkpoint if the user specified they didn't want it,
2429 or if the fsyncs failed */
2431 if (!bNumberAndKeep && !ret)
2435 /* Rename the previous checkpoint file */
2436 mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
2438 std::strcpy(buf, fn);
2439 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2440 std::strcat(buf, "_prev");
2441 std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2444 /* we copy here so that if something goes wrong between now and
2445 * the rename below, there's always a state.cpt.
2446 * If renames are atomic (such as in POSIX systems),
2447 * this copying should be unneccesary.
2449 gmx_file_copy(fn, buf, FALSE);
2450 /* We don't really care if this fails:
2451 * there's already a new checkpoint.
2456 gmx_file_rename(fn, buf);
2460 /* Rename the checkpoint file from the temporary to the final name */
2461 mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
2463 if (gmx_file_rename(fntemp, fn) != 0)
2465 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2468 #endif /* GMX_NO_RENAME */
2473 /*code for alternate checkpointing scheme. moved from top of loop over
2475 fcRequestCheckPoint();
2476 if (fcCheckPointParallel(cr->nodeid, NULL, 0) == 0)
2478 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step);
2480 #endif /* end GMX_FAHCORE block */
2483 static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
2485 bool foundMismatch = (p != f);
2493 fprintf(fplog, " %s mismatch,\n", type);
2494 fprintf(fplog, " current program: %d\n", p);
2495 fprintf(fplog, " checkpoint file: %d\n", f);
2496 fprintf(fplog, "\n");
2500 static void check_string(FILE* fplog, const char* type, const char* p, const char* f, gmx_bool* mm)
2502 bool foundMismatch = (std::strcmp(p, f) != 0);
2510 fprintf(fplog, " %s mismatch,\n", type);
2511 fprintf(fplog, " current program: %s\n", p);
2512 fprintf(fplog, " checkpoint file: %s\n", f);
2513 fprintf(fplog, "\n");
2517 static void check_match(FILE* fplog,
2518 const t_commrec* cr,
2520 const CheckpointHeaderContents& headerContents,
2521 gmx_bool reproducibilityRequested)
2523 /* Note that this check_string on the version will also print a message
2524 * when only the minor version differs. But we only print a warning
2525 * message further down with reproducibilityRequested=TRUE.
2527 gmx_bool versionDiffers = FALSE;
2528 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2530 gmx_bool precisionDiffers = FALSE;
2531 check_int(fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2532 if (precisionDiffers)
2534 const char msg_precision_difference[] =
2535 "You are continuing a simulation with a different precision. Not matching\n"
2536 "single/double precision will lead to precision or performance loss.\n";
2539 fprintf(fplog, "%s\n", msg_precision_difference);
2543 gmx_bool mm = (versionDiffers || precisionDiffers);
2545 if (reproducibilityRequested)
2547 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(),
2548 headerContents.fprog, &mm);
2550 check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2553 if (cr->nnodes > 1 && reproducibilityRequested)
2555 check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2557 int npp = cr->nnodes;
2558 if (cr->npmenodes >= 0)
2560 npp -= cr->npmenodes;
2562 int npp_f = headerContents.nnodes;
2563 if (headerContents.npme >= 0)
2565 npp_f -= headerContents.npme;
2569 check_int(fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2570 check_int(fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2571 check_int(fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2577 /* Gromacs should be able to continue from checkpoints between
2578 * different patch level versions, but we do not guarantee
2579 * compatibility between different major/minor versions - check this.
2583 sscanf(gmx_version(), "%5d", &gmx_major);
2584 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2585 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2587 const char msg_major_version_difference[] =
2588 "The current GROMACS major version is not identical to the one that\n"
2589 "generated the checkpoint file. In principle GROMACS does not support\n"
2590 "continuation from checkpoints between different versions, so we advise\n"
2591 "against this. If you still want to try your luck we recommend that you use\n"
2592 "the -noappend flag to keep your output files from the two versions separate.\n"
2593 "This might also work around errors where the output fields in the energy\n"
2594 "file have changed between the different versions.\n";
2596 const char msg_mismatch_notice[] =
2597 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2598 "Continuation is exact, but not guaranteed to be binary identical.\n";
2600 if (majorVersionDiffers)
2604 fprintf(fplog, "%s\n", msg_major_version_difference);
2607 else if (reproducibilityRequested)
2609 /* Major & minor versions match at least, but something is different. */
2612 fprintf(fplog, "%s\n", msg_mismatch_notice);
2618 static void read_checkpoint(const char* fn,
2620 const t_commrec* cr,
2623 int* init_fep_state,
2624 CheckpointHeaderContents* headerContents,
2626 ObservablesHistory* observablesHistory,
2627 gmx_bool reproducibilityRequested,
2628 const gmx::MdModulesNotifier& mdModulesNotifier)
2631 char buf[STEPSTRSIZE];
2634 fp = gmx_fio_open(fn, "r");
2635 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2637 // If we are appending, then we don't want write to the open log
2638 // file because we still need to compute a checksum for it. In
2639 // that case, the filehandle will be nullptr. Otherwise, we report
2640 // to the new log file about the checkpoint file that we are
2642 FILE* fplog = gmx_fio_getfp(logfio);
2645 fprintf(fplog, "\n");
2646 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2647 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2648 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2649 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2650 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2651 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2652 fprintf(fplog, " time: %f\n", headerContents->t);
2653 fprintf(fplog, "\n");
2656 if (headerContents->natoms != state->natoms)
2659 "Checkpoint file is for a system of %d atoms, while the current system consists "
2661 headerContents->natoms, state->natoms);
2663 if (headerContents->ngtc != state->ngtc)
2666 "Checkpoint file is for a system of %d T-coupling groups, while the current "
2667 "system consists of %d T-coupling groups",
2668 headerContents->ngtc, state->ngtc);
2670 if (headerContents->nnhpres != state->nnhpres)
2673 "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the "
2674 "current system consists of %d NH-pressure-coupling variables",
2675 headerContents->nnhpres, state->nnhpres);
2678 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2679 if (headerContents->nlambda != nlambdaHistory)
2682 "Checkpoint file is for a system with %d lambda states, while the current system "
2683 "consists of %d lambda states",
2684 headerContents->nlambda, nlambdaHistory);
2687 init_gtc_state(state, state->ngtc, state->nnhpres,
2688 headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2689 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2691 if (headerContents->eIntegrator != eIntegrator)
2694 "Cannot change integrator during a checkpoint restart. Perhaps you should make a "
2695 "new .tpr with grompp -f new.mdp -t %s",
2699 if (headerContents->flags_state != state->flags)
2702 "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
2703 "should make a new .tpr with grompp -f new.mdp -t %s",
2709 check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
2712 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2713 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it
2714 here. Investigate for 5.0. */
2719 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2724 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1 << eeksEKINH)) != 0)
2725 || ((headerContents->flags_eks & (1 << eeksEKINF)) != 0)
2726 || ((headerContents->flags_eks & (1 << eeksEKINO)) != 0)
2727 || (((headerContents->flags_eks & (1 << eeksEKINSCALEF))
2728 | (headerContents->flags_eks & (1 << eeksEKINSCALEH))
2729 | (headerContents->flags_eks & (1 << eeksVSCALE)))
2732 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2734 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2736 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents->flags_enh,
2737 observablesHistory->energyHistory.get(), nullptr);
2743 if (headerContents->flagsPullHistory)
2745 if (observablesHistory->pullHistory == nullptr)
2747 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2749 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents->flagsPullHistory,
2750 observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2757 if (headerContents->file_version < 6)
2760 "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2763 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda,
2764 &state->dfhist, nullptr);
2770 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2772 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t{});
2774 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED,
2775 observablesHistory->edsamHistory.get(), nullptr);
2781 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2783 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2785 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2791 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2793 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t{});
2795 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords,
2796 observablesHistory->swapHistory.get(), nullptr);
2802 std::vector<gmx_file_position_t> outputfiles;
2803 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2808 do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
2809 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2814 if (gmx_fio_close(fp) != 0)
2816 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2821 void load_checkpoint(const char* fn,
2823 const t_commrec* cr,
2827 ObservablesHistory* observablesHistory,
2828 gmx_bool reproducibilityRequested,
2829 const gmx::MdModulesNotifier& mdModulesNotifier)
2831 CheckpointHeaderContents headerContents;
2834 /* Read the state from the checkpoint file */
2835 read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state), &headerContents,
2836 state, observablesHistory, reproducibilityRequested, mdModulesNotifier);
2840 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2841 gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = { *cr, headerContents.file_version };
2842 mdModulesNotifier.notifier_.notify(broadcastCheckPointData);
2844 ir->bContinuation = TRUE;
2845 if (ir->nsteps >= 0)
2847 // TODO Should the following condition be <=? Currently if you
2848 // pass a checkpoint written by an normal completion to a restart,
2849 // mdrun will read all input, does some work but no steps, and
2850 // write successful output. But perhaps that is not desirable.
2851 // Note that we do not intend to support the use of mdrun
2852 // -nsteps to circumvent this condition.
2853 if (ir->nsteps + ir->init_step < headerContents.step)
2855 char buf[STEPSTRSIZE];
2856 std::string message =
2857 gmx::formatString("The input requested %s steps, ", gmx_step_str(ir->nsteps, buf));
2858 if (ir->init_step > 0)
2860 message += gmx::formatString("starting from step %s, ", gmx_step_str(ir->init_step, buf));
2862 message += gmx::formatString(
2863 "however the checkpoint "
2864 "file has already reached step %s. The simulation will not "
2865 "proceed, because either your simulation is already complete, "
2866 "or your combination of input files don't match.",
2867 gmx_step_str(headerContents.step, buf));
2868 gmx_fatal(FARGS, "%s", message.c_str());
2870 ir->nsteps += ir->init_step - headerContents.step;
2872 ir->init_step = headerContents.step;
2873 ir->simulation_part = headerContents.simulation_part + 1;
2876 void read_checkpoint_part_and_step(const char* filename, int* simulation_part, int64_t* step)
2880 if (filename == nullptr || !gmx_fexist(filename) || ((fp = gmx_fio_open(filename, "r")) == nullptr))
2882 *simulation_part = 0;
2887 CheckpointHeaderContents headerContents;
2888 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2890 *simulation_part = headerContents.simulation_part;
2891 *step = headerContents.step;
2894 static CheckpointHeaderContents read_checkpoint_data(t_fileio* fp,
2896 std::vector<gmx_file_position_t>* outputfiles)
2898 CheckpointHeaderContents headerContents;
2899 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2900 state->natoms = headerContents.natoms;
2901 state->ngtc = headerContents.ngtc;
2902 state->nnhpres = headerContents.nnhpres;
2903 state->nhchainlength = headerContents.nhchainlength;
2904 state->flags = headerContents.flags_state;
2905 int ret = do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2910 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2916 energyhistory_t enerhist;
2917 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, nullptr);
2922 PullHistory pullHist = {};
2923 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
2924 StatePart::pullHistory, nullptr);
2930 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
2931 &state->dfhist, nullptr);
2937 edsamhistory_t edsamhist = {};
2938 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2944 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2950 swaphistory_t swaphist = {};
2951 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2957 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, outputfiles, nullptr, headerContents.file_version);
2963 gmx::MdModulesNotifier mdModuleNotifier;
2964 do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
2965 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2970 return headerContents;
2973 void read_checkpoint_trxframe(t_fileio* fp, t_trxframe* fr)
2976 std::vector<gmx_file_position_t> outputfiles;
2977 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, &outputfiles);
2979 fr->natoms = state.natoms;
2981 fr->step = int64_to_int(headerContents.step, "conversion of checkpoint to trajectory");
2983 fr->time = headerContents.t;
2985 fr->lambda = state.lambda[efptFEP];
2986 fr->fep_state = state.fep_state;
2988 fr->bX = ((state.flags & (1 << estX)) != 0);
2991 fr->x = makeRvecArray(state.x, state.natoms);
2993 fr->bV = ((state.flags & (1 << estV)) != 0);
2996 fr->v = makeRvecArray(state.v, state.natoms);
2999 fr->bBox = ((state.flags & (1 << estBOX)) != 0);
3002 copy_mat(state.box, fr->box);
3006 void list_checkpoint(const char* fn, FILE* out)
3013 fp = gmx_fio_open(fn, "r");
3014 CheckpointHeaderContents headerContents;
3015 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
3016 state.natoms = headerContents.natoms;
3017 state.ngtc = headerContents.ngtc;
3018 state.nnhpres = headerContents.nnhpres;
3019 state.nhchainlength = headerContents.nhchainlength;
3020 state.flags = headerContents.flags_state;
3021 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
3026 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
3032 energyhistory_t enerhist;
3033 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, out);
3037 PullHistory pullHist = {};
3038 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
3039 StatePart::pullHistory, out);
3044 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
3045 &state.dfhist, out);
3050 edsamhistory_t edsamhist = {};
3051 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
3056 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state.awhHistory.get(), out);
3061 swaphistory_t swaphist = {};
3062 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
3067 std::vector<gmx_file_position_t> outputfiles;
3068 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
3073 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
3080 if (gmx_fio_close(fp) != 0)
3082 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3086 /* This routine cannot print tons of data, since it is called before the log file is opened. */
3087 CheckpointHeaderContents read_checkpoint_simulation_part_and_filenames(t_fileio* fp,
3088 std::vector<gmx_file_position_t>* outputfiles)
3091 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, outputfiles);
3092 if (gmx_fio_close(fp) != 0)
3094 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3096 return headerContents;