2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012 by the GROMACS development team.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
42 #include "checkpoint.h"
53 #include "buildinfo.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/gmxfio_xdr.h"
57 #include "gromacs/fileio/xdr_datatype.h"
58 #include "gromacs/fileio/xdrf.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/math/vecdump.h"
62 #include "gromacs/math/vectypes.h"
63 #include "gromacs/mdtypes/awh_correlation_history.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/commrec.h"
66 #include "gromacs/mdtypes/df_history.h"
67 #include "gromacs/mdtypes/edsamhistory.h"
68 #include "gromacs/mdtypes/energyhistory.h"
69 #include "gromacs/mdtypes/inputrec.h"
70 #include "gromacs/mdtypes/md_enums.h"
71 #include "gromacs/mdtypes/mdrunoptions.h"
72 #include "gromacs/mdtypes/observableshistory.h"
73 #include "gromacs/mdtypes/pullhistory.h"
74 #include "gromacs/mdtypes/state.h"
75 #include "gromacs/mdtypes/swaphistory.h"
76 #include "gromacs/trajectory/trajectoryframe.h"
77 #include "gromacs/utility/arrayref.h"
78 #include "gromacs/utility/baseversion.h"
79 #include "gromacs/utility/cstringutil.h"
80 #include "gromacs/utility/fatalerror.h"
81 #include "gromacs/utility/futil.h"
82 #include "gromacs/utility/gmxassert.h"
83 #include "gromacs/utility/int64_to_int.h"
84 #include "gromacs/utility/keyvaluetree.h"
85 #include "gromacs/utility/keyvaluetreebuilder.h"
86 #include "gromacs/utility/keyvaluetreeserializer.h"
87 #include "gromacs/utility/mdmodulenotification.h"
88 #include "gromacs/utility/programcontext.h"
89 #include "gromacs/utility/smalloc.h"
90 #include "gromacs/utility/sysinfo.h"
91 #include "gromacs/utility/txtdump.h"
94 # include "corewrap.h"
97 #define CPT_MAGIC1 171817
98 #define CPT_MAGIC2 171819
100 /*! \brief Enum of values that describe the contents of a cpt file
101 * whose format matches a version number
103 * The enum helps the code be more self-documenting and ensure merges
104 * do not silently resolve when two patches make the same bump. When
105 * adding new functionality, add a new element just above cptv_Count
106 * in this enumeration, and write code below that does the right thing
107 * according to the value of file_version.
111 cptv_Unknown = 17, /**< Version before numbering scheme */
112 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
113 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
114 cptv_PullAverage, /**< Added possibility to output average pull force and position */
115 cptv_MdModules, /**< Added checkpointing for MdModules */
116 cptv_Count /**< the total number of cptv versions */
119 /*! \brief Version number of the file format written to checkpoint
120 * files by this version of the code.
122 * cpt_version should normally only be changed, via adding a new field
123 * to cptv enumeration, when the header or footer format changes.
125 * The state data format itself is backward and forward compatible.
126 * But old code can not read a new entry that is present in the file
127 * (but can read a new format when new entries are not present).
129 * The cpt_version increases whenever the file format in the main
130 * development branch changes, due to an extension of the cptv enum above.
131 * Backward compatibility for reading old run input files is maintained
132 * by checking this version number against that of the file and then using
133 * the correct code path. */
134 static const int cpt_version = cptv_Count - 1;
137 const char* est_names[estNR] = { "FE-lambda",
143 "thermostat-integral",
148 "LD-rng-unsupported",
149 "LD-rng-i-unsupported",
162 "MC-rng-unsupported",
163 "MC-rng-i-unsupported",
164 "barostat-integral" };
181 static const char* eeks_names[eeksNR] = { "Ekin_n", "Ekinh", "dEkindlambda",
182 "mv_cos", "Ekinf", "Ekinh_old",
183 "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC",
195 eenhENERGY_NSTEPS_SIM,
196 eenhENERGY_DELTA_H_NN,
197 eenhENERGY_DELTA_H_LIST,
198 eenhENERGY_DELTA_H_STARTTIME,
199 eenhENERGY_DELTA_H_STARTLAMBDA,
205 epullhPULL_NUMCOORDINATES,
206 epullhPULL_NUMGROUPS,
207 epullhPULL_NUMVALUESINXSUM,
208 epullhPULL_NUMVALUESINFSUM,
214 epullcoordh_VALUE_REF_SUM,
215 epullcoordh_VALUE_SUM,
216 epullcoordh_DR01_SUM,
217 epullcoordh_DR23_SUM,
218 epullcoordh_DR45_SUM,
219 epullcoordh_FSCAL_SUM,
220 epullcoordh_DYNAX_SUM,
230 static const char* eenh_names[eenhNR] = { "energy_n",
239 "energy_delta_h_list",
240 "energy_delta_h_start_time",
241 "energy_delta_h_start_lambda" };
243 static const char* ePullhNames[epullhNR] = { "pullhistory_numcoordinates", "pullhistory_numgroups",
244 "pullhistory_numvaluesinxsum",
245 "pullhistory_numvaluesinfsum" };
247 /* free energy history variables -- need to be preserved over checkpoint */
266 /* free energy history variable names */
267 static const char* edfh_names[edfhNR] = { "bEquilibrated",
269 "Wang-Landau Histogram",
277 "accumulated_plus_2",
278 "accumulated_minus_2",
282 /* AWH biasing history variables */
286 eawhhEQUILIBRATEHISTOGRAM,
290 eawhhUMBRELLAGRIDPOINT,
292 eawhhLOGSCALEDSAMPLEWEIGHT,
294 eawhhFORCECORRELATIONGRID,
298 static const char* eawhh_names[eawhhNR] = { "awh_in_initial", "awh_equilibrateHistogram",
299 "awh_histsize", "awh_npoints",
300 "awh_coordpoint", "awh_umbrellaGridpoint",
301 "awh_updatelist", "awh_logScaledSampleWeight",
302 "awh_numupdates", "awh_forceCorrelationGrid" };
310 static const char* epull_prev_step_com_names[epullsNR] = { "Pull groups prev step COM" };
313 //! Higher level vector element type, only used for formatting checkpoint dumps
314 enum class CptElementType
316 integer, //!< integer
317 real, //!< float or double, not linked to precision of type real
318 real3, //!< float[3] or double[3], not linked to precision of type real
319 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
322 //! \brief Parts of the checkpoint state, only used for reporting
325 microState, //!< The microstate of the simulated system
326 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
327 energyHistory, //!< Energy observable statistics
328 freeEnergyHistory, //!< Free-energy state and observable statistics
329 accWeightHistogram, //!< Accelerated weight histogram method state
330 pullState, //!< COM of previous step.
331 pullHistory //!< Pull history statistics (sums since last written output)
334 //! \brief Return the name of a checkpoint entry based on part and part entry
335 static const char* entryName(StatePart part, int ecpt)
339 case StatePart::microState: return est_names[ecpt];
340 case StatePart::kineticEnergy: return eeks_names[ecpt];
341 case StatePart::energyHistory: return eenh_names[ecpt];
342 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
343 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
344 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
345 case StatePart::pullHistory: return ePullhNames[ecpt];
351 static void cp_warning(FILE* fp)
353 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
356 [[noreturn]] static void cp_error()
358 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
361 static void do_cpt_string_err(XDR* xd, const char* desc, gmx::ArrayRef<char> s, FILE* list)
363 char* data = s.data();
364 if (xdr_string(xd, &data, s.size()) == 0)
370 fprintf(list, "%s = %s\n", desc, data);
374 static int do_cpt_int(XDR* xd, const char* desc, int* i, FILE* list)
376 if (xdr_int(xd, i) == 0)
382 fprintf(list, "%s = %d\n", desc, *i);
387 static int do_cpt_u_chars(XDR* xd, const char* desc, int n, unsigned char* i, FILE* list)
391 fprintf(list, "%s = ", desc);
394 for (int j = 0; j < n && res; j++)
396 res &= xdr_u_char(xd, &i[j]);
399 fprintf(list, "%02x", i[j]);
414 static void do_cpt_int_err(XDR* xd, const char* desc, int* i, FILE* list)
416 if (do_cpt_int(xd, desc, i, list) < 0)
422 static void do_cpt_bool_err(XDR* xd, const char* desc, bool* b, FILE* list)
424 int i = static_cast<int>(*b);
426 if (do_cpt_int(xd, desc, &i, list) < 0)
434 static void do_cpt_step_err(XDR* xd, const char* desc, int64_t* i, FILE* list)
436 char buf[STEPSTRSIZE];
438 if (xdr_int64(xd, i) == 0)
444 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
448 static void do_cpt_double_err(XDR* xd, const char* desc, double* f, FILE* list)
450 if (xdr_double(xd, f) == 0)
456 fprintf(list, "%s = %f\n", desc, *f);
460 static void do_cpt_real_err(XDR* xd, real* f)
463 bool_t res = xdr_double(xd, f);
465 bool_t res = xdr_float(xd, f);
473 static void do_cpt_n_rvecs_err(XDR* xd, const char* desc, int n, rvec f[], FILE* list)
475 for (int i = 0; i < n; i++)
477 for (int j = 0; j < DIM; j++)
479 do_cpt_real_err(xd, &f[i][j]);
485 pr_rvecs(list, 0, desc, f, n);
497 static const int value = xdr_datatype_int;
501 struct xdr_type<float>
503 static const int value = xdr_datatype_float;
507 struct xdr_type<double>
509 static const int value = xdr_datatype_double;
512 //! \brief Returns size in byte of an xdr_datatype
513 static inline unsigned int sizeOfXdrType(int xdrType)
517 case xdr_datatype_int: return sizeof(int);
518 case xdr_datatype_float: return sizeof(float);
519 case xdr_datatype_double: return sizeof(double);
520 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
526 //! \brief Returns the XDR process function for i/o of an XDR type
527 static inline xdrproc_t xdrProc(int xdrType)
531 case xdr_datatype_int: return reinterpret_cast<xdrproc_t>(xdr_int);
532 case xdr_datatype_float: return reinterpret_cast<xdrproc_t>(xdr_float);
533 case xdr_datatype_double: return reinterpret_cast<xdrproc_t>(xdr_double);
534 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
540 /*! \brief Lists or only reads an xdr vector from checkpoint file
542 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
543 * The header for the print is set by \p part and \p ecpt.
544 * The formatting of the printing is set by \p cptElementType.
545 * When list==NULL only reads the elements.
547 static bool_t listXdrVector(XDR* xd, StatePart part, int ecpt, int nf, int xdrType, FILE* list, CptElementType cptElementType)
551 const unsigned int elemSize = sizeOfXdrType(xdrType);
552 std::vector<char> data(nf * elemSize);
553 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
559 case xdr_datatype_int:
560 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int*>(data.data()), nf, TRUE);
562 case xdr_datatype_float:
564 if (cptElementType == CptElementType::real3)
566 pr_rvecs(list, 0, entryName(part, ecpt),
567 reinterpret_cast<const rvec*>(data.data()), nf / 3);
572 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
573 pr_fvec(list, 0, entryName(part, ecpt),
574 reinterpret_cast<const float*>(data.data()), nf, TRUE);
577 case xdr_datatype_double:
579 if (cptElementType == CptElementType::real3)
581 pr_rvecs(list, 0, entryName(part, ecpt),
582 reinterpret_cast<const rvec*>(data.data()), nf / 3);
587 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
588 pr_dvec(list, 0, entryName(part, ecpt),
589 reinterpret_cast<const double*>(data.data()), nf, TRUE);
592 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
599 //! \brief Convert a double array, typed char*, to float
600 gmx_unused static void convertArrayRealPrecision(const char* c, float* v, int n)
602 const double* d = reinterpret_cast<const double*>(c);
603 for (int i = 0; i < n; i++)
605 v[i] = static_cast<float>(d[i]);
609 //! \brief Convert a float array, typed char*, to double
610 static void convertArrayRealPrecision(const char* c, double* v, int n)
612 const float* f = reinterpret_cast<const float*>(c);
613 for (int i = 0; i < n; i++)
615 v[i] = static_cast<double>(f[i]);
619 //! \brief Generate an error for trying to convert to integer
620 static void convertArrayRealPrecision(const char gmx_unused* c, int gmx_unused* v, int gmx_unused n)
622 GMX_RELEASE_ASSERT(false,
623 "We only expect type mismatches between float and double, not integer");
626 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
628 * This is the only routine that does the actually i/o of real vector,
629 * all other routines are intermediate level routines for specific real
630 * data types, calling this routine.
631 * Currently this routine is (too) complex, since it handles both real *
632 * and std::vector<real>. Using real * is deprecated and this routine
633 * will simplify a lot when only std::vector needs to be supported.
635 * The routine is generic to vectors with different allocators,
636 * because as part of reading a checkpoint there are vectors whose
637 * size is not known until reading has progressed far enough, so a
638 * resize method must be called.
640 * When not listing, we use either v or vector, depending on which is !=NULL.
641 * If nval >= 0, nval is used; on read this should match the passed value.
642 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
643 * the value is stored in nptr
645 template<typename T, typename AllocatorType>
646 static int doVectorLow(XDR* xd,
653 std::vector<T, AllocatorType>* vector,
655 CptElementType cptElementType)
657 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr)
658 || (v == nullptr && vector != nullptr),
659 "Without list, we should have exactly one of v and vector != NULL");
663 int numElemInTheFile;
668 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
669 numElemInTheFile = nval;
675 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
676 numElemInTheFile = *nptr;
680 numElemInTheFile = vector->size();
684 /* Read/write the vector element count */
685 res = xdr_int(xd, &numElemInTheFile);
690 /* Read/write the element data type */
691 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
692 int xdrTypeInTheFile = xdrTypeInTheCode;
693 res = xdr_int(xd, &xdrTypeInTheFile);
699 if (list == nullptr && (sflags & (1 << ecpt)))
703 if (numElemInTheFile != nval)
706 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
707 entryName(part, ecpt), nval, numElemInTheFile);
710 else if (nptr != nullptr)
712 *nptr = numElemInTheFile;
715 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
719 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
720 entryName(part, ecpt), xdr_datatype_names[xdrTypeInTheCode],
721 xdr_datatype_names[xdrTypeInTheFile]);
723 /* Matching int and real should never occur, but check anyhow */
724 if (xdrTypeInTheFile == xdr_datatype_int || xdrTypeInTheCode == xdr_datatype_int)
727 "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
736 snew(*v, numElemInTheFile);
742 /* This conditional ensures that we don't resize on write.
743 * In particular in the state where this code was written
744 * vector has a size of numElemInThefile and we
745 * don't want to lose that padding here.
747 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
749 vector->resize(numElemInTheFile);
757 vChar = reinterpret_cast<char*>(vp);
761 snew(vChar, numElemInTheFile * sizeOfXdrType(xdrTypeInTheFile));
763 res = xdr_vector(xd, vChar, numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
764 xdrProc(xdrTypeInTheFile));
772 /* In the old code float-double conversion came for free.
773 * In the new code we still support it, mainly because
774 * the tip4p_continue regression test makes use of this.
775 * It's an open question if we do or don't want to allow this.
777 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
783 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile, list, cptElementType);
789 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
792 doVector(XDR* xd, StatePart part, int ecpt, int sflags, std::vector<T>* vector, FILE* list, int numElements = -1)
794 return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list,
795 CptElementType::real);
798 //! \brief Read/Write an ArrayRef<real>.
799 static int doRealArrayRef(XDR* xd, StatePart part, int ecpt, int sflags, gmx::ArrayRef<real> vector, FILE* list)
801 real* v_real = vector.data();
802 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, vector.size(), nullptr,
803 &v_real, nullptr, list, CptElementType::real);
806 //! Convert from view of RVec to view of real.
807 static gmx::ArrayRef<real> realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
809 return gmx::arrayRefFromArray<real>(reinterpret_cast<real*>(ofRVecs.data()), ofRVecs.size() * DIM);
812 //! \brief Read/Write a PaddedVector whose value_type is RVec.
813 template<typename PaddedVectorOfRVecType>
815 doRvecVector(XDR* xd, StatePart part, int ecpt, int sflags, PaddedVectorOfRVecType* v, int numAtoms, FILE* list)
817 const int numReals = numAtoms * DIM;
822 sflags & (1 << ecpt),
823 "When not listing, the flag for the entry should be set when requesting i/o");
824 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
826 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
830 // Use the rebind facility to change the value_type of the
831 // allocator from RVec to real.
832 using realAllocator =
833 typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
834 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr,
835 nullptr, list, CptElementType::real);
839 /* This function stores n along with the reals for reading,
840 * but on reading it assumes that n matches the value in the checkpoint file,
841 * a fatal error is generated when this is not the case.
843 static int do_cpte_reals(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
845 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
846 list, CptElementType::real);
849 /* This function does the same as do_cpte_reals,
850 * except that on reading it ignores the passed value of *n
851 * and stores the value read from the checkpoint file in *n.
853 static int do_cpte_n_reals(XDR* xd, StatePart part, int ecpt, int sflags, int* n, real** v, FILE* list)
855 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list,
856 CptElementType::real);
859 static int do_cpte_real(XDR* xd, StatePart part, int ecpt, int sflags, real* r, FILE* list)
861 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr,
862 list, CptElementType::real);
865 static int do_cpte_ints(XDR* xd, StatePart part, int ecpt, int sflags, int n, int** v, FILE* list)
867 return doVectorLow<int, std::allocator<int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
868 list, CptElementType::integer);
871 static int do_cpte_int(XDR* xd, StatePart part, int ecpt, int sflags, int* i, FILE* list)
873 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
876 static int do_cpte_bool(XDR* xd, StatePart part, int ecpt, int sflags, bool* b, FILE* list)
878 int i = static_cast<int>(*b);
879 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
884 static int do_cpte_doubles(XDR* xd, StatePart part, int ecpt, int sflags, int n, double** v, FILE* list)
886 return doVectorLow<double, std::allocator<double>>(xd, part, ecpt, sflags, n, nullptr, v,
887 nullptr, list, CptElementType::real);
890 static int do_cpte_double(XDR* xd, StatePart part, int ecpt, int sflags, double* r, FILE* list)
892 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
895 static int do_cpte_matrix(XDR* xd, StatePart part, int ecpt, int sflags, matrix v, FILE* list)
901 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, DIM * DIM, nullptr, &vr,
902 nullptr, nullptr, CptElementType::matrix3x3);
904 if (list && ret == 0)
906 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
913 static int do_cpte_nmatrix(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
917 char name[CPTSTRLEN];
924 for (i = 0; i < n; i++)
926 reti = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]),
927 nullptr, nullptr, CptElementType::matrix3x3);
928 if (list && reti == 0)
930 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
931 pr_reals(list, 0, name, v[i], n);
941 static int do_cpte_matrices(XDR* xd, StatePart part, int ecpt, int sflags, int n, matrix** v, FILE* list)
944 matrix *vp, *va = nullptr;
950 res = xdr_int(xd, &nf);
955 if (list == nullptr && nf != n)
957 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n",
958 entryName(part, ecpt), n, nf);
960 if (list || !(sflags & (1 << ecpt)))
973 snew(vr, nf * DIM * DIM);
974 for (i = 0; i < nf; i++)
976 for (j = 0; j < DIM; j++)
978 for (k = 0; k < DIM; k++)
980 vr[(i * DIM + j) * DIM + k] = vp[i][j][k];
984 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, nf * DIM * DIM, nullptr,
985 &vr, nullptr, nullptr, CptElementType::matrix3x3);
986 for (i = 0; i < nf; i++)
988 for (j = 0; j < DIM; j++)
990 for (k = 0; k < DIM; k++)
992 vp[i][j][k] = vr[(i * DIM + j) * DIM + k];
998 if (list && ret == 0)
1000 for (i = 0; i < nf; i++)
1002 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
1013 static void do_cpt_header(XDR* xd, gmx_bool bRead, FILE* list, CheckpointHeaderContents* contents)
1026 res = xdr_int(xd, &magic);
1030 "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
1032 if (magic != CPT_MAGIC1)
1035 "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
1036 "The checkpoint file is corrupted or not a checkpoint file",
1042 gmx_gethostname(fhost, 255);
1044 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
1045 // The following fields are no longer ever written with meaningful
1046 // content, but because they precede the file version, there is no
1047 // good way for new code to read the old and new formats, nor a
1048 // good way for old code to avoid giving an error while reading a
1049 // new format. So we read and write a field that no longer has a
1051 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
1052 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
1053 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
1054 do_cpt_string_err(xd, "generating program", contents->fprog, list);
1055 do_cpt_string_err(xd, "generation time", contents->ftime, list);
1056 contents->file_version = cpt_version;
1057 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
1058 if (contents->file_version > cpt_version)
1061 "Attempting to read a checkpoint file of version %d with code of version %d\n",
1062 contents->file_version, cpt_version);
1064 if (contents->file_version >= 13)
1066 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1070 contents->double_prec = -1;
1072 if (contents->file_version >= 12)
1074 do_cpt_string_err(xd, "generating host", fhost, list);
1076 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1077 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1078 if (contents->file_version >= 10)
1080 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1084 contents->nhchainlength = 1;
1086 if (contents->file_version >= 11)
1088 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1092 contents->nnhpres = 0;
1094 if (contents->file_version >= 14)
1096 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1100 contents->nlambda = 0;
1102 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1103 if (contents->file_version >= 3)
1105 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1109 contents->simulation_part = 1;
1111 if (contents->file_version >= 5)
1113 do_cpt_step_err(xd, "step", &contents->step, list);
1118 do_cpt_int_err(xd, "step", &idum, list);
1119 contents->step = static_cast<int64_t>(idum);
1121 do_cpt_double_err(xd, "t", &contents->t, list);
1122 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1123 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1124 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1125 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1126 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1127 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1128 if (contents->file_version >= 4)
1130 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1131 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1135 contents->flags_eks = 0;
1136 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV + 1));
1137 contents->flags_state = (contents->flags_state
1138 & ~((1 << (estORIRE_DTAV + 1)) | (1 << (estORIRE_DTAV + 2))
1139 | (1 << (estORIRE_DTAV + 3))));
1141 if (contents->file_version >= 14)
1143 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1147 contents->flags_dfh = 0;
1150 if (contents->file_version >= 15)
1152 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1159 if (contents->file_version >= 16)
1161 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1165 contents->eSwapCoords = eswapNO;
1168 if (contents->file_version >= 17)
1170 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1174 contents->flags_awhh = 0;
1177 if (contents->file_version >= 18)
1179 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1183 contents->flagsPullHistory = 0;
1187 static int do_cpt_footer(XDR* xd, int file_version)
1192 if (file_version >= 2)
1195 res = xdr_int(xd, &magic);
1200 if (magic != CPT_MAGIC2)
1209 static int do_cpt_state(XDR* xd, int fflags, t_state* state, FILE* list)
1212 const StatePart part = StatePart::microState;
1213 const int sflags = state->flags;
1214 // If reading, state->natoms was probably just read, so
1215 // allocations need to be managed. If writing, this won't change
1216 // anything that matters.
1217 state_change_natoms(state, state->natoms);
1218 for (int i = 0; (i < estNR && ret == 0); i++)
1220 if (fflags & (1 << i))
1225 ret = doRealArrayRef(
1226 xd, part, i, sflags,
1227 gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()),
1231 ret = do_cpte_int(xd, part, i, sflags, &state->fep_state, list);
1233 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1235 ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list);
1237 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1239 ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list);
1242 ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list);
1245 ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list);
1248 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list);
1251 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list);
1254 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list);
1257 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list);
1260 ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list);
1263 ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list);
1265 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1266 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1268 ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list);
1271 ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list);
1273 /* The RNG entries are no longer written,
1274 * the next 4 lines are only for reading old files.
1275 * It's OK that three case statements fall through.
1277 case estLD_RNG_NOTSUPPORTED:
1278 case estLD_RNGI_NOTSUPPORTED:
1279 case estMC_RNG_NOTSUPPORTED:
1280 case estMC_RNGI_NOTSUPPORTED:
1281 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1283 case estDISRE_INITF:
1284 ret = do_cpte_real(xd, part, i, sflags, &state->hist.disre_initf, list);
1286 case estDISRE_RM3TAV:
1287 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs,
1288 &state->hist.disre_rm3tav, list);
1290 case estORIRE_INITF:
1291 ret = do_cpte_real(xd, part, i, sflags, &state->hist.orire_initf, list);
1294 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav,
1295 &state->hist.orire_Dtav, list);
1297 case estPULLCOMPREVSTEP:
1298 ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list);
1302 "Unknown state entry %d\n"
1303 "You are reading a checkpoint file written by different code, which "
1312 static int do_cpt_ekinstate(XDR* xd, int fflags, ekinstate_t* ekins, FILE* list)
1316 const StatePart part = StatePart::kineticEnergy;
1317 for (int i = 0; (i < eeksNR && ret == 0); i++)
1319 if (fflags & (1 << i))
1325 ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list);
1328 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list);
1331 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list);
1334 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list);
1337 ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list);
1339 case eeksEKINSCALEF:
1340 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list);
1343 ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list);
1345 case eeksEKINSCALEH:
1346 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list);
1349 ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list);
1351 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1354 "Unknown ekin data state entry %d\n"
1355 "You are probably reading a new checkpoint file with old code",
1365 static int do_cpt_swapstate(XDR* xd, gmx_bool bRead, int eSwapCoords, swaphistory_t* swapstate, FILE* list)
1367 int swap_cpt_version = 2;
1369 if (eSwapCoords == eswapNO)
1374 swapstate->bFromCpt = bRead;
1375 swapstate->eSwapCoords = eSwapCoords;
1377 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1378 if (bRead && swap_cpt_version < 2)
1381 "Cannot read checkpoint files that were written with old versions"
1382 "of the ion/water position swapping protocol.\n");
1385 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1387 /* When reading, init_swapcoords has not been called yet,
1388 * so we have to allocate memory first. */
1389 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1392 snew(swapstate->ionType, swapstate->nIonTypes);
1395 for (int ic = 0; ic < eCompNR; ic++)
1397 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1399 swapstateIons_t* gs = &swapstate->ionType[ii];
1403 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1407 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1412 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1416 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1419 if (bRead && (nullptr == gs->nMolPast[ic]))
1421 snew(gs->nMolPast[ic], swapstate->nAverage);
1424 for (int j = 0; j < swapstate->nAverage; j++)
1428 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1432 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1438 /* Ion flux per channel */
1439 for (int ic = 0; ic < eChanNR; ic++)
1441 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1443 swapstateIons_t* gs = &swapstate->ionType[ii];
1447 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1451 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1456 /* Ion flux leakage */
1459 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1463 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1467 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1469 swapstateIons_t* gs = &swapstate->ionType[ii];
1471 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1475 snew(gs->channel_label, gs->nMol);
1476 snew(gs->comp_from, gs->nMol);
1479 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1480 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1483 /* Save the last known whole positions to checkpoint
1484 * file to be able to also make multimeric channels whole in PBC */
1485 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1486 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1489 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1490 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1491 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1492 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1496 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0],
1497 *swapstate->xc_old_whole_p[eChan0], list);
1498 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1],
1499 *swapstate->xc_old_whole_p[eChan1], list);
1506 static int do_cpt_enerhist(XDR* xd, gmx_bool bRead, int fflags, energyhistory_t* enerhist, FILE* list)
1515 GMX_RELEASE_ASSERT(enerhist != nullptr,
1516 "With energy history, we need a valid enerhist pointer");
1518 /* This is stored/read for backward compatibility */
1519 int energyHistoryNumEnergies = 0;
1522 enerhist->nsteps = 0;
1524 enerhist->nsteps_sim = 0;
1525 enerhist->nsum_sim = 0;
1527 else if (enerhist != nullptr)
1529 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1532 delta_h_history_t* deltaH = enerhist->deltaHForeignLambdas.get();
1533 const StatePart part = StatePart::energyHistory;
1534 for (int i = 0; (i < eenhNR && ret == 0); i++)
1536 if (fflags & (1 << i))
1541 ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list);
1543 case eenhENERGY_AVER:
1544 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list);
1546 case eenhENERGY_SUM:
1547 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list);
1549 case eenhENERGY_NSUM:
1550 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list);
1552 case eenhENERGY_SUM_SIM:
1553 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list);
1555 case eenhENERGY_NSUM_SIM:
1556 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list);
1558 case eenhENERGY_NSTEPS:
1559 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list);
1561 case eenhENERGY_NSTEPS_SIM:
1562 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list);
1564 case eenhENERGY_DELTA_H_NN:
1567 if (!bRead && deltaH != nullptr)
1569 numDeltaH = deltaH->dh.size();
1571 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1574 if (deltaH == nullptr)
1576 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1577 deltaH = enerhist->deltaHForeignLambdas.get();
1579 deltaH->dh.resize(numDeltaH);
1580 deltaH->start_lambda_set = FALSE;
1584 case eenhENERGY_DELTA_H_LIST:
1585 for (auto dh : deltaH->dh)
1587 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1590 case eenhENERGY_DELTA_H_STARTTIME:
1591 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list);
1593 case eenhENERGY_DELTA_H_STARTLAMBDA:
1594 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list);
1598 "Unknown energy history entry %d\n"
1599 "You are probably reading a new checkpoint file with old code",
1605 if ((fflags & (1 << eenhENERGY_SUM)) && !(fflags & (1 << eenhENERGY_SUM_SIM)))
1607 /* Assume we have an old file format and copy sum to sum_sim */
1608 enerhist->ener_sum_sim = enerhist->ener_sum;
1611 if ((fflags & (1 << eenhENERGY_NSUM)) && !(fflags & (1 << eenhENERGY_NSTEPS)))
1613 /* Assume we have an old file format and copy nsum to nsteps */
1614 enerhist->nsteps = enerhist->nsum;
1616 if ((fflags & (1 << eenhENERGY_NSUM_SIM)) && !(fflags & (1 << eenhENERGY_NSTEPS_SIM)))
1618 /* Assume we have an old file format and copy nsum to nsteps */
1619 enerhist->nsteps_sim = enerhist->nsum_sim;
1625 static int doCptPullCoordHist(XDR* xd, PullCoordinateHistory* pullCoordHist, const StatePart part, FILE* list)
1630 flags |= ((1 << epullcoordh_VALUE_REF_SUM) | (1 << epullcoordh_VALUE_SUM)
1631 | (1 << epullcoordh_DR01_SUM) | (1 << epullcoordh_DR23_SUM)
1632 | (1 << epullcoordh_DR45_SUM) | (1 << epullcoordh_FSCAL_SUM));
1634 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1638 case epullcoordh_VALUE_REF_SUM:
1639 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list);
1641 case epullcoordh_VALUE_SUM:
1642 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list);
1644 case epullcoordh_DR01_SUM:
1645 for (int j = 0; j < DIM && ret == 0; j++)
1647 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1650 case epullcoordh_DR23_SUM:
1651 for (int j = 0; j < DIM && ret == 0; j++)
1653 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1656 case epullcoordh_DR45_SUM:
1657 for (int j = 0; j < DIM && ret == 0; j++)
1659 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1662 case epullcoordh_FSCAL_SUM:
1663 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list);
1665 case epullcoordh_DYNAX_SUM:
1666 for (int j = 0; j < DIM && ret == 0; j++)
1668 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1677 static int doCptPullGroupHist(XDR* xd, PullGroupHistory* pullGroupHist, const StatePart part, FILE* list)
1682 flags |= ((1 << epullgrouph_X_SUM));
1684 for (int i = 0; i < epullgrouph_NR; i++)
1688 case epullgrouph_X_SUM:
1689 for (int j = 0; j < DIM && ret == 0; j++)
1691 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1701 static int doCptPullHist(XDR* xd, gmx_bool bRead, int fflags, PullHistory* pullHist, const StatePart part, FILE* list)
1704 int pullHistoryNumCoordinates = 0;
1705 int pullHistoryNumGroups = 0;
1707 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1708 * average pull forces and coordinates) in the pull history, in temporary variables,
1709 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1712 pullHist->numValuesInXSum = 0;
1713 pullHist->numValuesInFSum = 0;
1715 else if (pullHist != nullptr)
1717 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1718 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1722 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1725 for (int i = 0; (i < epullhNR && ret == 0); i++)
1727 if (fflags & (1 << i))
1731 case epullhPULL_NUMCOORDINATES:
1732 ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list);
1734 case epullhPULL_NUMGROUPS:
1735 do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list);
1737 case epullhPULL_NUMVALUESINXSUM:
1738 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list);
1740 case epullhPULL_NUMVALUESINFSUM:
1741 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list);
1745 "Unknown pull history entry %d\n"
1746 "You are probably reading a new checkpoint file with old code",
1753 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1754 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1756 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1758 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1760 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1762 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1764 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1771 static int do_cpt_df_hist(XDR* xd, int fflags, int nlambda, df_history_t** dfhistPtr, FILE* list)
1780 if (*dfhistPtr == nullptr)
1782 snew(*dfhistPtr, 1);
1783 (*dfhistPtr)->nlambda = nlambda;
1784 init_df_history(*dfhistPtr, nlambda);
1786 df_history_t* dfhist = *dfhistPtr;
1788 const StatePart part = StatePart::freeEnergyHistory;
1789 for (int i = 0; (i < edfhNR && ret == 0); i++)
1791 if (fflags & (1 << i))
1796 ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list);
1799 ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list);
1802 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list);
1805 ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list);
1807 case edfhSUMWEIGHTS:
1808 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list);
1811 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list);
1814 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list);
1817 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list);
1820 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list);
1823 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list);
1826 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list);
1829 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list);
1832 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list);
1835 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list);
1840 "Unknown df history entry %d\n"
1841 "You are probably reading a new checkpoint file with old code",
1851 /* This function stores the last whole configuration of the reference and
1852 * average structure in the .cpt file
1854 static int do_cpt_EDstate(XDR* xd, gmx_bool bRead, int nED, edsamhistory_t* EDstate, FILE* list)
1861 EDstate->bFromCpt = bRead;
1864 /* When reading, init_edsam has not been called yet,
1865 * so we have to allocate memory first. */
1868 snew(EDstate->nref, EDstate->nED);
1869 snew(EDstate->old_sref, EDstate->nED);
1870 snew(EDstate->nav, EDstate->nED);
1871 snew(EDstate->old_sav, EDstate->nED);
1874 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1875 for (int i = 0; i < EDstate->nED; i++)
1879 /* Reference structure SREF */
1880 sprintf(buf, "ED%d # of atoms in reference structure", i + 1);
1881 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1882 sprintf(buf, "ED%d x_ref", i + 1);
1885 snew(EDstate->old_sref[i], EDstate->nref[i]);
1886 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1890 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1893 /* Average structure SAV */
1894 sprintf(buf, "ED%d # of atoms in average structure", i + 1);
1895 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1896 sprintf(buf, "ED%d x_av", i + 1);
1899 snew(EDstate->old_sav[i], EDstate->nav[i]);
1900 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1904 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1911 static int do_cpt_correlation_grid(XDR* xd,
1913 gmx_unused int fflags,
1914 gmx::CorrelationGridHistory* corrGrid,
1920 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1921 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1922 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1926 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize,
1927 corrGrid->blockDataListSize);
1930 for (gmx::CorrelationBlockDataHistory& blockData : corrGrid->blockDataBuffer)
1932 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1933 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1934 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1935 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1936 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1937 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1938 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1939 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1940 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1941 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1942 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1948 static int do_cpt_awh_bias(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhBiasHistory* biasHistory, FILE* list)
1952 gmx::AwhBiasStateHistory* state = &biasHistory->state;
1953 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1955 if (fflags & (1 << i))
1959 case eawhhIN_INITIAL:
1960 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list);
1962 case eawhhEQUILIBRATEHISTOGRAM:
1963 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list);
1966 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list);
1973 numPoints = biasHistory->pointState.size();
1975 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1978 biasHistory->pointState.resize(numPoints);
1982 case eawhhCOORDPOINT:
1983 for (auto& psh : biasHistory->pointState)
1985 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1986 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1987 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1988 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1989 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1990 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1991 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1992 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1993 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1994 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1995 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1998 case eawhhUMBRELLAGRIDPOINT:
1999 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list);
2001 case eawhhUPDATELIST:
2002 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
2003 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
2005 case eawhhLOGSCALEDSAMPLEWEIGHT:
2006 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
2007 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
2009 case eawhhNUMUPDATES:
2010 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
2012 case eawhhFORCECORRELATIONGRID:
2013 ret = do_cpt_correlation_grid(xd, bRead, fflags,
2014 &biasHistory->forceCorrelationGrid, list, i);
2016 default: gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
2024 static int do_cpt_awh(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhHistory* awhHistory, FILE* list)
2030 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
2032 if (awhHistory == nullptr)
2034 GMX_RELEASE_ASSERT(bRead,
2035 "do_cpt_awh should not be called for writing without an AwhHistory");
2037 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
2038 awhHistory = awhHistoryLocal.get();
2041 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
2042 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
2043 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
2044 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
2045 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
2046 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
2048 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
2049 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
2054 numBias = awhHistory->bias.size();
2056 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
2060 awhHistory->bias.resize(numBias);
2062 for (auto& bias : awhHistory->bias)
2064 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
2070 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
2075 static void do_cpt_mdmodules(int fileVersion,
2076 t_fileio* checkpointFileHandle,
2077 const gmx::MdModulesNotifier& mdModulesNotifier)
2079 if (fileVersion >= cptv_MdModules)
2081 gmx::FileIOXdrSerializer serializer(checkpointFileHandle);
2082 gmx::KeyValueTreeObject mdModuleCheckpointParameterTree =
2083 gmx::deserializeKeyValueTree(&serializer);
2084 gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
2085 mdModuleCheckpointParameterTree, fileVersion
2087 mdModulesNotifier.checkpointingNotifications_.notify(mdModuleCheckpointReadingDataOnMaster);
2091 static int do_cpt_files(XDR* xd, gmx_bool bRead, std::vector<gmx_file_position_t>* outputfiles, FILE* list, int file_version)
2094 gmx_off_t mask = 0xFFFFFFFFL;
2095 int offset_high, offset_low;
2096 std::array<char, CPTSTRLEN> buf;
2097 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
2099 // Ensure that reading pre-allocates outputfiles, while writing
2100 // writes what is already there.
2101 int nfiles = outputfiles->size();
2102 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
2108 outputfiles->resize(nfiles);
2111 for (auto& outputfile : *outputfiles)
2113 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
2116 do_cpt_string_err(xd, "output filename", buf, list);
2117 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
2119 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2123 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2127 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32)
2128 | (static_cast<gmx_off_t>(offset_low) & mask);
2132 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
2134 offset = outputfile.offset;
2142 offset_low = static_cast<int>(offset & mask);
2143 offset_high = static_cast<int>((offset >> 32) & mask);
2145 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2149 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2154 if (file_version >= 8)
2156 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize, list) != 0)
2160 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(),
2161 outputfile.checksum.data(), list)
2169 outputfile.checksumSize = -1;
2175 static void mpiBarrierBeforeRename(const bool applyMpiBarrierBeforeRename, MPI_Comm mpiBarrierCommunicator)
2177 if (applyMpiBarrierBeforeRename)
2180 MPI_Barrier(mpiBarrierCommunicator);
2182 GMX_RELEASE_ASSERT(false, "Should not request a barrier without MPI");
2183 GMX_UNUSED_VALUE(mpiBarrierCommunicator);
2188 void write_checkpoint(const char* fn,
2189 gmx_bool bNumberAndKeep,
2191 const t_commrec* cr,
2195 int simulation_part,
2201 ObservablesHistory* observablesHistory,
2202 const gmx::MdModulesNotifier& mdModulesNotifier,
2203 bool applyMpiBarrierBeforeRename,
2204 MPI_Comm mpiBarrierCommunicator)
2207 char* fntemp; /* the temporary checkpoint file name */
2209 char buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
2212 if (DOMAINDECOMP(cr))
2214 npmenodes = cr->npmenodes;
2222 /* make the new temporary filename */
2223 snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
2224 std::strcpy(fntemp, fn);
2225 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2226 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
2227 std::strcat(fntemp, suffix);
2228 std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2230 /* if we can't rename, we just overwrite the cpt file.
2231 * dangerous if interrupted.
2233 snew(fntemp, std::strlen(fn));
2234 std::strcpy(fntemp, fn);
2236 std::string timebuf = gmx_format_current_time();
2240 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
2243 /* Get offsets for open files */
2244 auto outputfiles = gmx_fio_get_output_file_positions();
2246 fp = gmx_fio_open(fntemp, "w");
2249 if (state->ekinstate.bUpToDate)
2251 flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF) | (1 << eeksEKINO)
2252 | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH) | (1 << eeksVSCALE)
2253 | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
2260 energyhistory_t* enerhist = observablesHistory->energyHistory.get();
2262 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2264 flags_enh |= (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
2265 if (enerhist->nsum > 0)
2267 flags_enh |= ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
2269 if (enerhist->nsum_sim > 0)
2271 flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
2273 if (enerhist->deltaHForeignLambdas != nullptr)
2275 flags_enh |= ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
2276 | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
2280 PullHistory* pullHist = observablesHistory->pullHistory.get();
2281 int flagsPullHistory = 0;
2282 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2284 flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
2285 flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
2286 | (1 << epullhPULL_NUMVALUESINFSUM));
2292 flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
2293 | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
2296 flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
2298 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
2299 || (elamstats == elamstatsMETROPOLIS))
2301 flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
2302 | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
2311 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2313 flags_awhh |= ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
2314 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
2315 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
2316 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
2319 /* We can check many more things now (CPU, acceleration, etc), but
2320 * it is highly unlikely to have two separate builds with exactly
2321 * the same version, user, time, and build host!
2324 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
2326 edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
2327 int nED = (edsamhist ? edsamhist->nED : 0);
2329 swaphistory_t* swaphist = observablesHistory->swapHistory.get();
2330 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2332 CheckpointHeaderContents headerContents = { 0,
2350 state->nhchainlength,
2360 std::strcpy(headerContents.version, gmx_version());
2361 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2362 std::strcpy(headerContents.ftime, timebuf.c_str());
2363 if (DOMAINDECOMP(cr))
2365 copy_ivec(domdecCells, headerContents.dd_nc);
2368 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2370 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
2371 || (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0)
2372 || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)
2373 || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
2375 || (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)
2376 || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)
2377 || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0)
2378 || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0)
2379 || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr, headerContents.file_version) < 0))
2381 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2384 // Checkpointing MdModules
2386 gmx::KeyValueTreeBuilder builder;
2387 gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
2388 headerContents.file_version };
2389 mdModulesNotifier.checkpointingNotifications_.notify(mdModulesWriteCheckpoint);
2390 auto tree = builder.build();
2391 gmx::FileIOXdrSerializer serializer(fp);
2392 gmx::serializeKeyValueTree(tree, &serializer);
2395 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2397 /* we really, REALLY, want to make sure to physically write the checkpoint,
2398 and all the files it depends on, out to disk. Because we've
2399 opened the checkpoint with gmx_fio_open(), it's in our list
2401 ret = gmx_fio_all_output_fsync();
2406 sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
2408 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2414 gmx_warning("%s", buf);
2418 if (gmx_fio_close(fp) != 0)
2420 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2423 /* we don't move the checkpoint if the user specified they didn't want it,
2424 or if the fsyncs failed */
2426 if (!bNumberAndKeep && !ret)
2430 /* Rename the previous checkpoint file */
2431 mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
2433 std::strcpy(buf, fn);
2434 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2435 std::strcat(buf, "_prev");
2436 std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2439 /* we copy here so that if something goes wrong between now and
2440 * the rename below, there's always a state.cpt.
2441 * If renames are atomic (such as in POSIX systems),
2442 * this copying should be unneccesary.
2444 gmx_file_copy(fn, buf, FALSE);
2445 /* We don't really care if this fails:
2446 * there's already a new checkpoint.
2451 gmx_file_rename(fn, buf);
2455 /* Rename the checkpoint file from the temporary to the final name */
2456 mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
2458 if (gmx_file_rename(fntemp, fn) != 0)
2460 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2463 #endif /* GMX_NO_RENAME */
2468 /*code for alternate checkpointing scheme. moved from top of loop over
2470 fcRequestCheckPoint();
2471 if (fcCheckPointParallel(cr->nodeid, NULL, 0) == 0)
2473 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step);
2475 #endif /* end GMX_FAHCORE block */
2478 static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
2480 bool foundMismatch = (p != f);
2488 fprintf(fplog, " %s mismatch,\n", type);
2489 fprintf(fplog, " current program: %d\n", p);
2490 fprintf(fplog, " checkpoint file: %d\n", f);
2491 fprintf(fplog, "\n");
2495 static void check_string(FILE* fplog, const char* type, const char* p, const char* f, gmx_bool* mm)
2497 bool foundMismatch = (std::strcmp(p, f) != 0);
2505 fprintf(fplog, " %s mismatch,\n", type);
2506 fprintf(fplog, " current program: %s\n", p);
2507 fprintf(fplog, " checkpoint file: %s\n", f);
2508 fprintf(fplog, "\n");
2512 static void check_match(FILE* fplog,
2513 const t_commrec* cr,
2515 const CheckpointHeaderContents& headerContents,
2516 gmx_bool reproducibilityRequested)
2518 /* Note that this check_string on the version will also print a message
2519 * when only the minor version differs. But we only print a warning
2520 * message further down with reproducibilityRequested=TRUE.
2522 gmx_bool versionDiffers = FALSE;
2523 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2525 gmx_bool precisionDiffers = FALSE;
2526 check_int(fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2527 if (precisionDiffers)
2529 const char msg_precision_difference[] =
2530 "You are continuing a simulation with a different precision. Not matching\n"
2531 "single/double precision will lead to precision or performance loss.\n";
2534 fprintf(fplog, "%s\n", msg_precision_difference);
2538 gmx_bool mm = (versionDiffers || precisionDiffers);
2540 if (reproducibilityRequested)
2542 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(),
2543 headerContents.fprog, &mm);
2545 check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2548 if (cr->nnodes > 1 && reproducibilityRequested)
2550 check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2552 int npp = cr->nnodes;
2553 if (cr->npmenodes >= 0)
2555 npp -= cr->npmenodes;
2557 int npp_f = headerContents.nnodes;
2558 if (headerContents.npme >= 0)
2560 npp_f -= headerContents.npme;
2564 check_int(fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2565 check_int(fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2566 check_int(fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2572 /* Gromacs should be able to continue from checkpoints between
2573 * different patch level versions, but we do not guarantee
2574 * compatibility between different major/minor versions - check this.
2578 sscanf(gmx_version(), "%5d", &gmx_major);
2579 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2580 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2582 const char msg_major_version_difference[] =
2583 "The current GROMACS major version is not identical to the one that\n"
2584 "generated the checkpoint file. In principle GROMACS does not support\n"
2585 "continuation from checkpoints between different versions, so we advise\n"
2586 "against this. If you still want to try your luck we recommend that you use\n"
2587 "the -noappend flag to keep your output files from the two versions separate.\n"
2588 "This might also work around errors where the output fields in the energy\n"
2589 "file have changed between the different versions.\n";
2591 const char msg_mismatch_notice[] =
2592 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2593 "Continuation is exact, but not guaranteed to be binary identical.\n";
2595 if (majorVersionDiffers)
2599 fprintf(fplog, "%s\n", msg_major_version_difference);
2602 else if (reproducibilityRequested)
2604 /* Major & minor versions match at least, but something is different. */
2607 fprintf(fplog, "%s\n", msg_mismatch_notice);
2613 static void read_checkpoint(const char* fn,
2615 const t_commrec* cr,
2618 int* init_fep_state,
2619 CheckpointHeaderContents* headerContents,
2621 ObservablesHistory* observablesHistory,
2622 gmx_bool reproducibilityRequested,
2623 const gmx::MdModulesNotifier& mdModulesNotifier)
2626 char buf[STEPSTRSIZE];
2629 fp = gmx_fio_open(fn, "r");
2630 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2632 // If we are appending, then we don't want write to the open log
2633 // file because we still need to compute a checksum for it. In
2634 // that case, the filehandle will be nullptr. Otherwise, we report
2635 // to the new log file about the checkpoint file that we are
2637 FILE* fplog = gmx_fio_getfp(logfio);
2640 fprintf(fplog, "\n");
2641 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2642 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2643 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2644 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2645 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2646 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2647 fprintf(fplog, " time: %f\n", headerContents->t);
2648 fprintf(fplog, "\n");
2651 if (headerContents->natoms != state->natoms)
2654 "Checkpoint file is for a system of %d atoms, while the current system consists "
2656 headerContents->natoms, state->natoms);
2658 if (headerContents->ngtc != state->ngtc)
2661 "Checkpoint file is for a system of %d T-coupling groups, while the current "
2662 "system consists of %d T-coupling groups",
2663 headerContents->ngtc, state->ngtc);
2665 if (headerContents->nnhpres != state->nnhpres)
2668 "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the "
2669 "current system consists of %d NH-pressure-coupling variables",
2670 headerContents->nnhpres, state->nnhpres);
2673 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2674 if (headerContents->nlambda != nlambdaHistory)
2677 "Checkpoint file is for a system with %d lambda states, while the current system "
2678 "consists of %d lambda states",
2679 headerContents->nlambda, nlambdaHistory);
2682 init_gtc_state(state, state->ngtc, state->nnhpres,
2683 headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2684 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2686 if (headerContents->eIntegrator != eIntegrator)
2689 "Cannot change integrator during a checkpoint restart. Perhaps you should make a "
2690 "new .tpr with grompp -f new.mdp -t %s",
2694 if (headerContents->flags_state != state->flags)
2697 "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
2698 "should make a new .tpr with grompp -f new.mdp -t %s",
2704 check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
2707 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2708 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it
2709 here. Investigate for 5.0. */
2714 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2719 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1 << eeksEKINH)) != 0)
2720 || ((headerContents->flags_eks & (1 << eeksEKINF)) != 0)
2721 || ((headerContents->flags_eks & (1 << eeksEKINO)) != 0)
2722 || (((headerContents->flags_eks & (1 << eeksEKINSCALEF))
2723 | (headerContents->flags_eks & (1 << eeksEKINSCALEH))
2724 | (headerContents->flags_eks & (1 << eeksVSCALE)))
2727 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2729 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2731 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents->flags_enh,
2732 observablesHistory->energyHistory.get(), nullptr);
2738 if (headerContents->flagsPullHistory)
2740 if (observablesHistory->pullHistory == nullptr)
2742 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2744 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents->flagsPullHistory,
2745 observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2752 if (headerContents->file_version < 6)
2755 "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2758 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda,
2759 &state->dfhist, nullptr);
2765 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2767 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t{});
2769 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED,
2770 observablesHistory->edsamHistory.get(), nullptr);
2776 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2778 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2780 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2786 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2788 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t{});
2790 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords,
2791 observablesHistory->swapHistory.get(), nullptr);
2797 std::vector<gmx_file_position_t> outputfiles;
2798 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2803 do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
2804 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2809 if (gmx_fio_close(fp) != 0)
2811 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2816 void load_checkpoint(const char* fn,
2818 const t_commrec* cr,
2822 ObservablesHistory* observablesHistory,
2823 gmx_bool reproducibilityRequested,
2824 const gmx::MdModulesNotifier& mdModulesNotifier)
2826 CheckpointHeaderContents headerContents;
2829 /* Read the state from the checkpoint file */
2830 read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state), &headerContents,
2831 state, observablesHistory, reproducibilityRequested, mdModulesNotifier);
2835 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr->mpi_comm_mygroup);
2836 gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = {
2837 cr->mpi_comm_mygroup, PAR(cr), headerContents.file_version
2839 mdModulesNotifier.checkpointingNotifications_.notify(broadcastCheckPointData);
2841 ir->bContinuation = TRUE;
2842 if (ir->nsteps >= 0)
2844 // TODO Should the following condition be <=? Currently if you
2845 // pass a checkpoint written by an normal completion to a restart,
2846 // mdrun will read all input, does some work but no steps, and
2847 // write successful output. But perhaps that is not desirable.
2848 // Note that we do not intend to support the use of mdrun
2849 // -nsteps to circumvent this condition.
2850 if (ir->nsteps + ir->init_step < headerContents.step)
2852 std::string message = gmx::formatString("The input requested %ld steps, ", ir->nsteps);
2853 if (ir->init_step > 0)
2855 message += gmx::formatString("starting from step %ld, ", ir->init_step);
2857 message += gmx::formatString(
2858 "however the checkpoint "
2859 "file has already reached step %ld. The simulation will not "
2860 "proceed, because either your simulation is already complete, "
2861 "or your combination of input files don't match.",
2862 headerContents.step);
2863 gmx_fatal(FARGS, "%s", message.c_str());
2865 ir->nsteps += ir->init_step - headerContents.step;
2867 ir->init_step = headerContents.step;
2868 ir->simulation_part = headerContents.simulation_part + 1;
2871 void read_checkpoint_part_and_step(const char* filename, int* simulation_part, int64_t* step)
2875 if (filename == nullptr || !gmx_fexist(filename) || ((fp = gmx_fio_open(filename, "r")) == nullptr))
2877 *simulation_part = 0;
2882 CheckpointHeaderContents headerContents;
2883 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2885 *simulation_part = headerContents.simulation_part;
2886 *step = headerContents.step;
2889 static CheckpointHeaderContents read_checkpoint_data(t_fileio* fp,
2891 std::vector<gmx_file_position_t>* outputfiles)
2893 CheckpointHeaderContents headerContents;
2894 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2895 state->natoms = headerContents.natoms;
2896 state->ngtc = headerContents.ngtc;
2897 state->nnhpres = headerContents.nnhpres;
2898 state->nhchainlength = headerContents.nhchainlength;
2899 state->flags = headerContents.flags_state;
2900 int ret = do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2905 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2911 energyhistory_t enerhist;
2912 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, nullptr);
2917 PullHistory pullHist = {};
2918 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
2919 StatePart::pullHistory, nullptr);
2925 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
2926 &state->dfhist, nullptr);
2932 edsamhistory_t edsamhist = {};
2933 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2939 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2945 swaphistory_t swaphist = {};
2946 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2952 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, outputfiles, nullptr, headerContents.file_version);
2958 gmx::MdModulesNotifier mdModuleNotifier;
2959 do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
2960 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2965 return headerContents;
2968 void read_checkpoint_trxframe(t_fileio* fp, t_trxframe* fr)
2971 std::vector<gmx_file_position_t> outputfiles;
2972 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, &outputfiles);
2974 fr->natoms = state.natoms;
2976 fr->step = int64_to_int(headerContents.step, "conversion of checkpoint to trajectory");
2978 fr->time = headerContents.t;
2980 fr->lambda = state.lambda[efptFEP];
2981 fr->fep_state = state.fep_state;
2983 fr->bX = ((state.flags & (1 << estX)) != 0);
2986 fr->x = makeRvecArray(state.x, state.natoms);
2988 fr->bV = ((state.flags & (1 << estV)) != 0);
2991 fr->v = makeRvecArray(state.v, state.natoms);
2994 fr->bBox = ((state.flags & (1 << estBOX)) != 0);
2997 copy_mat(state.box, fr->box);
3001 void list_checkpoint(const char* fn, FILE* out)
3008 fp = gmx_fio_open(fn, "r");
3009 CheckpointHeaderContents headerContents;
3010 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
3011 state.natoms = headerContents.natoms;
3012 state.ngtc = headerContents.ngtc;
3013 state.nnhpres = headerContents.nnhpres;
3014 state.nhchainlength = headerContents.nhchainlength;
3015 state.flags = headerContents.flags_state;
3016 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
3021 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
3027 energyhistory_t enerhist;
3028 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, out);
3032 PullHistory pullHist = {};
3033 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
3034 StatePart::pullHistory, out);
3039 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
3040 &state.dfhist, out);
3045 edsamhistory_t edsamhist = {};
3046 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
3051 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state.awhHistory.get(), out);
3056 swaphistory_t swaphist = {};
3057 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
3062 std::vector<gmx_file_position_t> outputfiles;
3063 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
3068 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
3075 if (gmx_fio_close(fp) != 0)
3077 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3081 /* This routine cannot print tons of data, since it is called before the log file is opened. */
3082 CheckpointHeaderContents read_checkpoint_simulation_part_and_filenames(t_fileio* fp,
3083 std::vector<gmx_file_position_t>* outputfiles)
3086 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, outputfiles);
3087 if (gmx_fio_close(fp) != 0)
3089 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3091 return headerContents;