2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
51 #include "buildinfo.h"
52 #include "gromacs/fileio/filetypes.h"
53 #include "gromacs/fileio/gmxfio.h"
54 #include "gromacs/fileio/gmxfio_xdr.h"
55 #include "gromacs/fileio/xdr_datatype.h"
56 #include "gromacs/fileio/xdrf.h"
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/math/vecdump.h"
60 #include "gromacs/math/vectypes.h"
61 #include "gromacs/mdtypes/awh_correlation_history.h"
62 #include "gromacs/mdtypes/awh_history.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/df_history.h"
65 #include "gromacs/mdtypes/edsamhistory.h"
66 #include "gromacs/mdtypes/energyhistory.h"
67 #include "gromacs/mdtypes/inputrec.h"
68 #include "gromacs/mdtypes/md_enums.h"
69 #include "gromacs/mdtypes/mdrunoptions.h"
70 #include "gromacs/mdtypes/observableshistory.h"
71 #include "gromacs/mdtypes/pullhistory.h"
72 #include "gromacs/mdtypes/state.h"
73 #include "gromacs/mdtypes/swaphistory.h"
74 #include "gromacs/trajectory/trajectoryframe.h"
75 #include "gromacs/utility/arrayref.h"
76 #include "gromacs/utility/baseversion.h"
77 #include "gromacs/utility/cstringutil.h"
78 #include "gromacs/utility/fatalerror.h"
79 #include "gromacs/utility/futil.h"
80 #include "gromacs/utility/gmxassert.h"
81 #include "gromacs/utility/int64_to_int.h"
82 #include "gromacs/utility/programcontext.h"
83 #include "gromacs/utility/smalloc.h"
84 #include "gromacs/utility/sysinfo.h"
85 #include "gromacs/utility/txtdump.h"
91 #define CPT_MAGIC1 171817
92 #define CPT_MAGIC2 171819
94 /*! \brief Enum of values that describe the contents of a cpt file
95 * whose format matches a version number
97 * The enum helps the code be more self-documenting and ensure merges
98 * do not silently resolve when two patches make the same bump. When
99 * adding new functionality, add a new element just above cptv_Count
100 * in this enumeration, and write code below that does the right thing
101 * according to the value of file_version.
104 cptv_Unknown = 17, /**< Version before numbering scheme */
105 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
106 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
107 cptv_PullAverage, /**< Added possibility to output average pull force and position */
108 cptv_Count /**< the total number of cptv versions */
111 /*! \brief Version number of the file format written to checkpoint
112 * files by this version of the code.
114 * cpt_version should normally only be changed, via adding a new field
115 * to cptv enumeration, when the header or footer format changes.
117 * The state data format itself is backward and forward compatible.
118 * But old code can not read a new entry that is present in the file
119 * (but can read a new format when new entries are not present).
121 * The cpt_version increases whenever the file format in the main
122 * development branch changes, due to an extension of the cptv enum above.
123 * Backward compatibility for reading old run input files is maintained
124 * by checking this version number against that of the file and then using
125 * the correct code path. */
126 static const int cpt_version = cptv_Count - 1;
129 const char *est_names[estNR] =
132 "box", "box-rel", "box-v", "pres_prev",
133 "nosehoover-xi", "thermostat-integral",
134 "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
135 "disre_initf", "disre_rm3tav",
136 "orire_initf", "orire_Dtav",
137 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
142 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
145 static const char *eeks_names[eeksNR] =
147 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
148 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
152 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
153 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
154 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
155 eenhENERGY_DELTA_H_NN,
156 eenhENERGY_DELTA_H_LIST,
157 eenhENERGY_DELTA_H_STARTTIME,
158 eenhENERGY_DELTA_H_STARTLAMBDA,
163 epullhPULL_NUMCOORDINATES, epullhPULL_NUMGROUPS, epullhPULL_NUMVALUESINXSUM,
164 epullhPULL_NUMVALUESINFSUM, epullhNR
168 epullcoordh_VALUE_REF_SUM, epullcoordh_VALUE_SUM, epullcoordh_DR01_SUM,
169 epullcoordh_DR23_SUM, epullcoordh_DR45_SUM, epullcoordh_FSCAL_SUM,
170 epullcoordh_DYNAX_SUM, epullcoordh_NR
174 epullgrouph_X_SUM, epullgrouph_NR
177 static const char *eenh_names[eenhNR] =
179 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
180 "energy_sum_sim", "energy_nsum_sim",
181 "energy_nsteps", "energy_nsteps_sim",
183 "energy_delta_h_list",
184 "energy_delta_h_start_time",
185 "energy_delta_h_start_lambda"
188 static const char *ePullhNames[epullhNR] =
190 "pullhistory_numcoordinates", "pullhistory_numgroups", "pullhistory_numvaluesinxsum",
191 "pullhistory_numvaluesinfsum"
194 /* free energy history variables -- need to be preserved over checkpoint */
196 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
197 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
199 /* free energy history variable names */
200 static const char *edfh_names[edfhNR] =
202 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
203 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
206 /* AWH biasing history variables */
209 eawhhEQUILIBRATEHISTOGRAM,
212 eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
214 eawhhLOGSCALEDSAMPLEWEIGHT,
216 eawhhFORCECORRELATIONGRID,
220 static const char *eawhh_names[eawhhNR] =
223 "awh_equilibrateHistogram",
226 "awh_coordpoint", "awh_umbrellaGridpoint",
228 "awh_logScaledSampleWeight",
230 "awh_forceCorrelationGrid"
238 static const char *epull_prev_step_com_names[epullsNR] =
240 "Pull groups prev step COM"
244 //! Higher level vector element type, only used for formatting checkpoint dumps
245 enum class CptElementType
247 integer, //!< integer
248 real, //!< float or double, not linked to precision of type real
249 real3, //!< float[3] or double[3], not linked to precision of type real
250 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
253 //! \brief Parts of the checkpoint state, only used for reporting
256 microState, //!< The microstate of the simulated system
257 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
258 energyHistory, //!< Energy observable statistics
259 freeEnergyHistory, //!< Free-energy state and observable statistics
260 accWeightHistogram, //!< Accelerated weight histogram method state
261 pullState, //!< COM of previous step.
262 pullHistory //!< Pull history statistics (sums since last written output)
265 //! \brief Return the name of a checkpoint entry based on part and part entry
266 static const char *entryName(StatePart part, int ecpt)
270 case StatePart::microState: return est_names [ecpt];
271 case StatePart::kineticEnergy: return eeks_names[ecpt];
272 case StatePart::energyHistory: return eenh_names[ecpt];
273 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
274 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
275 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
276 case StatePart::pullHistory: return ePullhNames[ecpt];
282 static void cp_warning(FILE *fp)
284 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
287 [[ noreturn ]] static void cp_error()
289 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
292 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
294 char *data = s.data();
295 if (xdr_string(xd, &data, s.size()) == 0)
301 fprintf(list, "%s = %s\n", desc, data);
305 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
307 if (xdr_int(xd, i) == 0)
313 fprintf(list, "%s = %d\n", desc, *i);
318 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
322 fprintf(list, "%s = ", desc);
325 for (int j = 0; j < n && res; j++)
327 res &= xdr_u_char(xd, &i[j]);
330 fprintf(list, "%02x", i[j]);
345 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
347 if (do_cpt_int(xd, desc, i, list) < 0)
353 static void do_cpt_bool_err(XDR *xd, const char *desc, bool *b, FILE *list)
355 int i = static_cast<int>(*b);
357 if (do_cpt_int(xd, desc, &i, list) < 0)
365 static void do_cpt_step_err(XDR *xd, const char *desc, int64_t *i, FILE *list)
367 char buf[STEPSTRSIZE];
369 if (xdr_int64(xd, i) == 0)
375 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
379 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
381 if (xdr_double(xd, f) == 0)
387 fprintf(list, "%s = %f\n", desc, *f);
391 static void do_cpt_real_err(XDR *xd, real *f)
394 bool_t res = xdr_double(xd, f);
396 bool_t res = xdr_float(xd, f);
404 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
406 for (int i = 0; i < n; i++)
408 for (int j = 0; j < DIM; j++)
410 do_cpt_real_err(xd, &f[i][j]);
416 pr_rvecs(list, 0, desc, f, n);
420 template <typename T>
428 static const int value = xdr_datatype_int;
432 struct xdr_type<float>
434 static const int value = xdr_datatype_float;
438 struct xdr_type<double>
440 static const int value = xdr_datatype_double;
443 //! \brief Returns size in byte of an xdr_datatype
444 static inline unsigned int sizeOfXdrType(int xdrType)
448 case xdr_datatype_int:
450 case xdr_datatype_float:
451 return sizeof(float);
452 case xdr_datatype_double:
453 return sizeof(double);
454 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
460 //! \brief Returns the XDR process function for i/o of an XDR type
461 static inline xdrproc_t xdrProc(int xdrType)
465 case xdr_datatype_int:
466 return reinterpret_cast<xdrproc_t>(xdr_int);
467 case xdr_datatype_float:
468 return reinterpret_cast<xdrproc_t>(xdr_float);
469 case xdr_datatype_double:
470 return reinterpret_cast<xdrproc_t>(xdr_double);
471 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
477 /*! \brief Lists or only reads an xdr vector from checkpoint file
479 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
480 * The header for the print is set by \p part and \p ecpt.
481 * The formatting of the printing is set by \p cptElementType.
482 * When list==NULL only reads the elements.
484 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
485 FILE *list, CptElementType cptElementType)
489 const unsigned int elemSize = sizeOfXdrType(xdrType);
490 std::vector<char> data(nf*elemSize);
491 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
497 case xdr_datatype_int:
498 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
500 case xdr_datatype_float:
502 if (cptElementType == CptElementType::real3)
504 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
509 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
510 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
513 case xdr_datatype_double:
515 if (cptElementType == CptElementType::real3)
517 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
522 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
523 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
526 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
533 //! \brief Convert a double array, typed char*, to float
534 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
536 const double *d = reinterpret_cast<const double *>(c);
537 for (int i = 0; i < n; i++)
539 v[i] = static_cast<float>(d[i]);
543 //! \brief Convert a float array, typed char*, to double
544 static void convertArrayRealPrecision(const char *c, double *v, int n)
546 const float *f = reinterpret_cast<const float *>(c);
547 for (int i = 0; i < n; i++)
549 v[i] = static_cast<double>(f[i]);
553 //! \brief Generate an error for trying to convert to integer
554 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
556 GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
559 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
561 * This is the only routine that does the actually i/o of real vector,
562 * all other routines are intermediate level routines for specific real
563 * data types, calling this routine.
564 * Currently this routine is (too) complex, since it handles both real *
565 * and std::vector<real>. Using real * is deprecated and this routine
566 * will simplify a lot when only std::vector needs to be supported.
568 * The routine is generic to vectors with different allocators,
569 * because as part of reading a checkpoint there are vectors whose
570 * size is not known until reading has progressed far enough, so a
571 * resize method must be called.
573 * When not listing, we use either v or vector, depending on which is !=NULL.
574 * If nval >= 0, nval is used; on read this should match the passed value.
575 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
576 * the value is stored in nptr
578 template<typename T, typename AllocatorType>
579 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
581 T **v, std::vector<T, AllocatorType> *vector,
582 FILE *list, CptElementType cptElementType)
584 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
588 int numElemInTheFile;
593 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
594 numElemInTheFile = nval;
600 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
601 numElemInTheFile = *nptr;
605 numElemInTheFile = vector->size();
609 /* Read/write the vector element count */
610 res = xdr_int(xd, &numElemInTheFile);
615 /* Read/write the element data type */
616 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
617 int xdrTypeInTheFile = xdrTypeInTheCode;
618 res = xdr_int(xd, &xdrTypeInTheFile);
624 if (list == nullptr && (sflags & (1 << ecpt)))
628 if (numElemInTheFile != nval)
630 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
633 else if (nptr != nullptr)
635 *nptr = numElemInTheFile;
638 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
642 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
643 entryName(part, ecpt),
644 xdr_datatype_names[xdrTypeInTheCode],
645 xdr_datatype_names[xdrTypeInTheFile]);
647 /* Matching int and real should never occur, but check anyhow */
648 if (xdrTypeInTheFile == xdr_datatype_int ||
649 xdrTypeInTheCode == xdr_datatype_int)
651 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
660 snew(*v, numElemInTheFile);
666 /* This conditional ensures that we don't resize on write.
667 * In particular in the state where this code was written
668 * vector has a size of numElemInThefile and we
669 * don't want to lose that padding here.
671 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
673 vector->resize(numElemInTheFile);
681 vChar = reinterpret_cast<char *>(vp);
685 snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
687 res = xdr_vector(xd, vChar,
688 numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
689 xdrProc(xdrTypeInTheFile));
697 /* In the old code float-double conversion came for free.
698 * In the new code we still support it, mainly because
699 * the tip4p_continue regression test makes use of this.
700 * It's an open question if we do or don't want to allow this.
702 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
708 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
709 list, cptElementType);
715 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
716 template <typename T>
717 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
718 std::vector<T> *vector, FILE *list, int numElements = -1)
720 return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list, CptElementType::real);
723 //! \brief Read/Write an ArrayRef<real>.
724 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
725 gmx::ArrayRef<real> vector, FILE *list)
727 real *v_real = vector.data();
728 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
731 //! Convert from view of RVec to view of real.
732 static gmx::ArrayRef<real>
733 realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
735 return gmx::arrayRefFromArray<real>(reinterpret_cast<real *>(ofRVecs.data()), ofRVecs.size() * DIM);
738 //! \brief Read/Write a PaddedVector whose value_type is RVec.
739 template <typename PaddedVectorOfRVecType>
740 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
741 PaddedVectorOfRVecType *v, int numAtoms, FILE *list)
743 const int numReals = numAtoms*DIM;
747 GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
748 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
750 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
754 // Use the rebind facility to change the value_type of the
755 // allocator from RVec to real.
756 using realAllocator = typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
757 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
761 /* This function stores n along with the reals for reading,
762 * but on reading it assumes that n matches the value in the checkpoint file,
763 * a fatal error is generated when this is not the case.
765 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
766 int n, real **v, FILE *list)
768 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
771 /* This function does the same as do_cpte_reals,
772 * except that on reading it ignores the passed value of *n
773 * and stores the value read from the checkpoint file in *n.
775 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
776 int *n, real **v, FILE *list)
778 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
781 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
784 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
787 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
788 int n, int **v, FILE *list)
790 return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
793 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
796 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
799 static int do_cpte_bool(XDR *xd, StatePart part, int ecpt, int sflags,
802 int i = static_cast<int>(*b);
803 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
808 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
809 int n, double **v, FILE *list)
811 return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
814 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
815 double *r, FILE *list)
817 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
820 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
821 matrix v, FILE *list)
827 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
828 DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
830 if (list && ret == 0)
832 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
839 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
840 int n, real **v, FILE *list)
844 char name[CPTSTRLEN];
851 for (i = 0; i < n; i++)
853 reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
854 if (list && reti == 0)
856 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
857 pr_reals(list, 0, name, v[i], n);
867 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
868 int n, matrix **v, FILE *list)
871 matrix *vp, *va = nullptr;
877 res = xdr_int(xd, &nf);
882 if (list == nullptr && nf != n)
884 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
886 if (list || !(sflags & (1<<ecpt)))
899 snew(vr, nf*DIM*DIM);
900 for (i = 0; i < nf; i++)
902 for (j = 0; j < DIM; j++)
904 for (k = 0; k < DIM; k++)
906 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
910 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
911 nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
912 CptElementType::matrix3x3);
913 for (i = 0; i < nf; i++)
915 for (j = 0; j < DIM; j++)
917 for (k = 0; k < DIM; k++)
919 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
925 if (list && ret == 0)
927 for (i = 0; i < nf; i++)
929 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
940 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
941 CheckpointHeaderContents *contents)
954 res = xdr_int(xd, &magic);
957 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
959 if (magic != CPT_MAGIC1)
961 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
962 "The checkpoint file is corrupted or not a checkpoint file",
968 gmx_gethostname(fhost, 255);
970 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
971 // The following fields are no longer ever written with meaningful
972 // content, but because they precede the file version, there is no
973 // good way for new code to read the old and new formats, nor a
974 // good way for old code to avoid giving an error while reading a
975 // new format. So we read and write a field that no longer has a
977 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
978 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
979 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
980 do_cpt_string_err(xd, "generating program", contents->fprog, list);
981 do_cpt_string_err(xd, "generation time", contents->ftime, list);
982 contents->file_version = cpt_version;
983 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
984 if (contents->file_version > cpt_version)
986 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
988 if (contents->file_version >= 13)
990 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
994 contents->double_prec = -1;
996 if (contents->file_version >= 12)
998 do_cpt_string_err(xd, "generating host", fhost, list);
1000 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1001 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1002 if (contents->file_version >= 10)
1004 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1008 contents->nhchainlength = 1;
1010 if (contents->file_version >= 11)
1012 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1016 contents->nnhpres = 0;
1018 if (contents->file_version >= 14)
1020 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1024 contents->nlambda = 0;
1026 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1027 if (contents->file_version >= 3)
1029 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1033 contents->simulation_part = 1;
1035 if (contents->file_version >= 5)
1037 do_cpt_step_err(xd, "step", &contents->step, list);
1042 do_cpt_int_err(xd, "step", &idum, list);
1043 contents->step = static_cast<int64_t>(idum);
1045 do_cpt_double_err(xd, "t", &contents->t, list);
1046 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1047 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1048 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1049 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1050 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1051 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1052 if (contents->file_version >= 4)
1054 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1055 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1059 contents->flags_eks = 0;
1060 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV+1));
1061 contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1062 (1<<(estORIRE_DTAV+2)) |
1063 (1<<(estORIRE_DTAV+3))));
1065 if (contents->file_version >= 14)
1067 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1071 contents->flags_dfh = 0;
1074 if (contents->file_version >= 15)
1076 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1083 if (contents->file_version >= 16)
1085 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1089 contents->eSwapCoords = eswapNO;
1092 if (contents->file_version >= 17)
1094 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1098 contents->flags_awhh = 0;
1101 if (contents->file_version >= 18)
1103 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1107 contents->flagsPullHistory = 0;
1111 static int do_cpt_footer(XDR *xd, int file_version)
1116 if (file_version >= 2)
1119 res = xdr_int(xd, &magic);
1124 if (magic != CPT_MAGIC2)
1133 static int do_cpt_state(XDR *xd,
1134 int fflags, t_state *state,
1138 const StatePart part = StatePart::microState;
1139 const int sflags = state->flags;
1140 // If reading, state->natoms was probably just read, so
1141 // allocations need to be managed. If writing, this won't change
1142 // anything that matters.
1143 state_change_natoms(state, state->natoms);
1144 for (int i = 0; (i < estNR && ret == 0); i++)
1146 if (fflags & (1<<i))
1150 case estLAMBDA: ret = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1151 case estFEPSTATE: ret = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1152 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1153 case estBOX_REL: ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1154 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1155 case estPRES_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1156 case estSVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1157 case estFVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1158 case estNH_XI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1159 case estNH_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1160 case estNHPRES_XI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1161 case estNHPRES_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1162 case estTHERM_INT: ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1163 case estBAROS_INT: ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1164 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1165 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1166 case estX: ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1167 case estV: ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1168 /* The RNG entries are no longer written,
1169 * the next 4 lines are only for reading old files.
1171 case estLD_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1172 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1173 case estMC_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1174 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1175 case estDISRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1176 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1177 case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1178 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1179 case estPULLCOMPREVSTEP: ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list); break;
1181 gmx_fatal(FARGS, "Unknown state entry %d\n"
1182 "You are reading a checkpoint file written by different code, which is not supported", i);
1189 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1194 const StatePart part = StatePart::kineticEnergy;
1195 for (int i = 0; (i < eeksNR && ret == 0); i++)
1197 if (fflags & (1<<i))
1202 case eeksEKIN_N: ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1203 case eeksEKINH: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1204 case eeksEKINF: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1205 case eeksEKINO: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1206 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1207 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1208 case eeksVSCALE: ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1209 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1210 case eeksDEKINDL: ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1211 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1213 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1214 "You are probably reading a new checkpoint file with old code", i);
1223 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1224 int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1226 int swap_cpt_version = 2;
1228 if (eSwapCoords == eswapNO)
1233 swapstate->bFromCpt = bRead;
1234 swapstate->eSwapCoords = eSwapCoords;
1236 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1237 if (bRead && swap_cpt_version < 2)
1239 gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1240 "of the ion/water position swapping protocol.\n");
1243 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1245 /* When reading, init_swapcoords has not been called yet,
1246 * so we have to allocate memory first. */
1247 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1250 snew(swapstate->ionType, swapstate->nIonTypes);
1253 for (int ic = 0; ic < eCompNR; ic++)
1255 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1257 swapstateIons_t *gs = &swapstate->ionType[ii];
1261 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1265 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1270 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1274 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1277 if (bRead && (nullptr == gs->nMolPast[ic]) )
1279 snew(gs->nMolPast[ic], swapstate->nAverage);
1282 for (int j = 0; j < swapstate->nAverage; j++)
1286 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1290 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1296 /* Ion flux per channel */
1297 for (int ic = 0; ic < eChanNR; ic++)
1299 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1301 swapstateIons_t *gs = &swapstate->ionType[ii];
1305 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1309 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1314 /* Ion flux leakage */
1317 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1321 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1325 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1327 swapstateIons_t *gs = &swapstate->ionType[ii];
1329 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1333 snew(gs->channel_label, gs->nMol);
1334 snew(gs->comp_from, gs->nMol);
1337 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1338 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1341 /* Save the last known whole positions to checkpoint
1342 * file to be able to also make multimeric channels whole in PBC */
1343 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1344 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1347 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1348 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1349 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1350 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1354 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1355 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1362 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1363 int fflags, energyhistory_t *enerhist,
1373 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1375 /* This is stored/read for backward compatibility */
1376 int energyHistoryNumEnergies = 0;
1379 enerhist->nsteps = 0;
1381 enerhist->nsteps_sim = 0;
1382 enerhist->nsum_sim = 0;
1384 else if (enerhist != nullptr)
1386 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1389 delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1390 const StatePart part = StatePart::energyHistory;
1391 for (int i = 0; (i < eenhNR && ret == 0); i++)
1393 if (fflags & (1<<i))
1397 case eenhENERGY_N: ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1398 case eenhENERGY_AVER: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1399 case eenhENERGY_SUM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1400 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1401 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1402 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1403 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1404 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1405 case eenhENERGY_DELTA_H_NN:
1408 if (!bRead && deltaH != nullptr)
1410 numDeltaH = deltaH->dh.size();
1412 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1415 if (deltaH == nullptr)
1417 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1418 deltaH = enerhist->deltaHForeignLambdas.get();
1420 deltaH->dh.resize(numDeltaH);
1421 deltaH->start_lambda_set = FALSE;
1425 case eenhENERGY_DELTA_H_LIST:
1426 for (auto dh : deltaH->dh)
1428 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1431 case eenhENERGY_DELTA_H_STARTTIME:
1432 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1433 case eenhENERGY_DELTA_H_STARTLAMBDA:
1434 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1436 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1437 "You are probably reading a new checkpoint file with old code", i);
1442 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1444 /* Assume we have an old file format and copy sum to sum_sim */
1445 enerhist->ener_sum_sim = enerhist->ener_sum;
1448 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1449 !(fflags & (1<<eenhENERGY_NSTEPS)))
1451 /* Assume we have an old file format and copy nsum to nsteps */
1452 enerhist->nsteps = enerhist->nsum;
1454 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1455 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1457 /* Assume we have an old file format and copy nsum to nsteps */
1458 enerhist->nsteps_sim = enerhist->nsum_sim;
1464 static int doCptPullCoordHist(XDR *xd, PullCoordinateHistory *pullCoordHist,
1465 const StatePart part, FILE *list)
1470 flags |= ((1<<epullcoordh_VALUE_REF_SUM) | (1<<epullcoordh_VALUE_SUM) | (1<<epullcoordh_DR01_SUM) |
1471 (1<<epullcoordh_DR23_SUM) | (1<<epullcoordh_DR45_SUM) | (1<<epullcoordh_FSCAL_SUM));
1473 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1477 case epullcoordh_VALUE_REF_SUM: ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list); break;
1478 case epullcoordh_VALUE_SUM: ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list); break;
1479 case epullcoordh_DR01_SUM:
1480 for (int j = 0; j < DIM && ret == 0; j++)
1482 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1485 case epullcoordh_DR23_SUM:
1486 for (int j = 0; j < DIM && ret == 0; j++)
1488 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1491 case epullcoordh_DR45_SUM:
1492 for (int j = 0; j < DIM && ret == 0; j++)
1494 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1497 case epullcoordh_FSCAL_SUM: ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list); break;
1498 case epullcoordh_DYNAX_SUM:
1499 for (int j = 0; j < DIM && ret == 0; j++)
1501 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1510 static int doCptPullGroupHist(XDR *xd, PullGroupHistory *pullGroupHist,
1511 const StatePart part, FILE *list)
1516 flags |= ((1<<epullgrouph_X_SUM));
1518 for (int i = 0; i < epullgrouph_NR; i++)
1522 case epullgrouph_X_SUM:
1523 for (int j = 0; j < DIM && ret == 0; j++)
1525 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1535 static int doCptPullHist(XDR *xd, gmx_bool bRead,
1536 int fflags, PullHistory *pullHist,
1537 const StatePart part,
1541 int pullHistoryNumCoordinates = 0;
1542 int pullHistoryNumGroups = 0;
1544 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1545 * average pull forces and coordinates) in the pull history, in temporary variables,
1546 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1549 pullHist->numValuesInXSum = 0;
1550 pullHist->numValuesInFSum = 0;
1552 else if (pullHist != nullptr)
1554 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1555 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1559 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1562 for (int i = 0; (i < epullhNR && ret == 0); i++)
1564 if (fflags & (1<<i))
1568 case epullhPULL_NUMCOORDINATES: ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list); break;
1569 case epullhPULL_NUMGROUPS: do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list); break;
1570 case epullhPULL_NUMVALUESINXSUM: do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list); break;
1571 case epullhPULL_NUMVALUESINFSUM: do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list); break;
1573 gmx_fatal(FARGS, "Unknown pull history entry %d\n"
1574 "You are probably reading a new checkpoint file with old code", i);
1580 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1581 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1583 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1585 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1587 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1589 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1591 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1598 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1607 if (*dfhistPtr == nullptr)
1609 snew(*dfhistPtr, 1);
1610 (*dfhistPtr)->nlambda = nlambda;
1611 init_df_history(*dfhistPtr, nlambda);
1613 df_history_t *dfhist = *dfhistPtr;
1615 const StatePart part = StatePart::freeEnergyHistory;
1616 for (int i = 0; (i < edfhNR && ret == 0); i++)
1618 if (fflags & (1<<i))
1622 case edfhBEQUIL: ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list); break;
1623 case edfhNATLAMBDA: ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1624 case edfhWLHISTO: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1625 case edfhWLDELTA: ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1626 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1627 case edfhSUMDG: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1628 case edfhSUMMINVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1629 case edfhSUMVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1630 case edfhACCUMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1631 case edfhACCUMM: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1632 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1633 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1634 case edfhTIJ: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1635 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1638 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1639 "You are probably reading a new checkpoint file with old code", i);
1648 /* This function stores the last whole configuration of the reference and
1649 * average structure in the .cpt file
1651 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1652 int nED, edsamhistory_t *EDstate, FILE *list)
1659 EDstate->bFromCpt = bRead;
1662 /* When reading, init_edsam has not been called yet,
1663 * so we have to allocate memory first. */
1666 snew(EDstate->nref, EDstate->nED);
1667 snew(EDstate->old_sref, EDstate->nED);
1668 snew(EDstate->nav, EDstate->nED);
1669 snew(EDstate->old_sav, EDstate->nED);
1672 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1673 for (int i = 0; i < EDstate->nED; i++)
1677 /* Reference structure SREF */
1678 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1679 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1680 sprintf(buf, "ED%d x_ref", i+1);
1683 snew(EDstate->old_sref[i], EDstate->nref[i]);
1684 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1688 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1691 /* Average structure SAV */
1692 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1693 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1694 sprintf(buf, "ED%d x_av", i+1);
1697 snew(EDstate->old_sav[i], EDstate->nav[i]);
1698 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1702 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1709 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1710 gmx::CorrelationGridHistory *corrGrid,
1711 FILE *list, int eawhh)
1715 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1716 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1717 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1721 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1724 for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1726 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1727 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1728 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1729 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1730 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1731 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1732 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1733 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1734 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1735 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1736 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1742 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1743 int fflags, gmx::AwhBiasHistory *biasHistory,
1748 gmx::AwhBiasStateHistory *state = &biasHistory->state;
1749 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1751 if (fflags & (1<<i))
1755 case eawhhIN_INITIAL:
1756 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list); break;
1757 case eawhhEQUILIBRATEHISTOGRAM:
1758 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1760 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1766 numPoints = biasHistory->pointState.size();
1768 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1771 biasHistory->pointState.resize(numPoints);
1775 case eawhhCOORDPOINT:
1776 for (auto &psh : biasHistory->pointState)
1778 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1779 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1780 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1781 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1782 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1783 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1784 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1785 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1786 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1787 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1788 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1791 case eawhhUMBRELLAGRIDPOINT:
1792 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1793 case eawhhUPDATELIST:
1794 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1795 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1797 case eawhhLOGSCALEDSAMPLEWEIGHT:
1798 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1799 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1801 case eawhhNUMUPDATES:
1802 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1804 case eawhhFORCECORRELATIONGRID:
1805 ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1808 gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1816 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1817 int fflags, gmx::AwhHistory *awhHistory,
1824 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1826 if (awhHistory == nullptr)
1828 GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1830 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
1831 awhHistory = awhHistoryLocal.get();
1834 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1835 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1836 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1837 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1838 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1839 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1841 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1842 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1847 numBias = awhHistory->bias.size();
1849 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1853 awhHistory->bias.resize(numBias);
1855 for (auto &bias : awhHistory->bias)
1857 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1863 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1868 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1869 std::vector<gmx_file_position_t> *outputfiles,
1870 FILE *list, int file_version)
1873 gmx_off_t mask = 0xFFFFFFFFL;
1874 int offset_high, offset_low;
1875 std::array<char, CPTSTRLEN> buf;
1876 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1878 // Ensure that reading pre-allocates outputfiles, while writing
1879 // writes what is already there.
1880 int nfiles = outputfiles->size();
1881 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1887 outputfiles->resize(nfiles);
1890 for (auto &outputfile : *outputfiles)
1892 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1895 do_cpt_string_err(xd, "output filename", buf, list);
1896 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
1898 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1902 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1906 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1910 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1912 offset = outputfile.offset;
1920 offset_low = static_cast<int>(offset & mask);
1921 offset_high = static_cast<int>((offset >> 32) & mask);
1923 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1927 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1932 if (file_version >= 8)
1934 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize,
1939 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(), outputfile.checksum.data(), list) != 0)
1946 outputfile.checksumSize = -1;
1953 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1954 FILE *fplog, const t_commrec *cr,
1955 ivec domdecCells, int nppnodes,
1956 int eIntegrator, int simulation_part,
1957 gmx_bool bExpanded, int elamstats,
1958 int64_t step, double t,
1959 t_state *state, ObservablesHistory *observablesHistory)
1962 char *fntemp; /* the temporary checkpoint file name */
1964 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1967 if (DOMAINDECOMP(cr))
1969 npmenodes = cr->npmenodes;
1977 /* make the new temporary filename */
1978 snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1979 std::strcpy(fntemp, fn);
1980 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1981 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1982 std::strcat(fntemp, suffix);
1983 std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1985 /* if we can't rename, we just overwrite the cpt file.
1986 * dangerous if interrupted.
1988 snew(fntemp, std::strlen(fn));
1989 std::strcpy(fntemp, fn);
1991 std::string timebuf = gmx_format_current_time();
1995 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1996 gmx_step_str(step, buf), timebuf.c_str());
1999 /* Get offsets for open files */
2000 auto outputfiles = gmx_fio_get_output_file_positions();
2002 fp = gmx_fio_open(fntemp, "w");
2005 if (state->ekinstate.bUpToDate)
2008 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
2009 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
2010 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
2017 energyhistory_t *enerhist = observablesHistory->energyHistory.get();
2019 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2021 flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
2022 if (enerhist->nsum > 0)
2024 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
2025 (1<<eenhENERGY_NSUM));
2027 if (enerhist->nsum_sim > 0)
2029 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
2031 if (enerhist->deltaHForeignLambdas != nullptr)
2033 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
2034 (1<< eenhENERGY_DELTA_H_LIST) |
2035 (1<< eenhENERGY_DELTA_H_STARTTIME) |
2036 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
2040 PullHistory *pullHist = observablesHistory->pullHistory.get();
2041 int flagsPullHistory = 0;
2042 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2044 flagsPullHistory |= (1<<epullhPULL_NUMCOORDINATES);
2045 flagsPullHistory |= ((1<<epullhPULL_NUMGROUPS) | (1<<epullhPULL_NUMVALUESINXSUM) | (1<<epullhPULL_NUMVALUESINFSUM));
2051 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
2052 (1<<edfhTIJ) | (1<<edfhTIJEMP));
2055 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
2057 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
2059 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
2060 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
2069 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2071 flags_awhh |= ( (1 << eawhhIN_INITIAL) |
2072 (1 << eawhhEQUILIBRATEHISTOGRAM) |
2073 (1 << eawhhHISTSIZE) |
2074 (1 << eawhhNPOINTS) |
2075 (1 << eawhhCOORDPOINT) |
2076 (1 << eawhhUMBRELLAGRIDPOINT) |
2077 (1 << eawhhUPDATELIST) |
2078 (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
2079 (1 << eawhhNUMUPDATES) |
2080 (1 << eawhhFORCECORRELATIONGRID));
2083 /* We can check many more things now (CPU, acceleration, etc), but
2084 * it is highly unlikely to have two separate builds with exactly
2085 * the same version, user, time, and build host!
2088 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
2090 edsamhistory_t *edsamhist = observablesHistory->edsamHistory.get();
2091 int nED = (edsamhist ? edsamhist->nED : 0);
2093 swaphistory_t *swaphist = observablesHistory->swapHistory.get();
2094 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2096 CheckpointHeaderContents headerContents =
2098 0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
2099 eIntegrator, simulation_part, step, t, nppnodes,
2101 state->natoms, state->ngtc, state->nnhpres,
2102 state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh,
2103 flagsPullHistory, flags_dfh, flags_awhh,
2106 std::strcpy(headerContents.version, gmx_version());
2107 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2108 std::strcpy(headerContents.ftime, timebuf.c_str());
2109 if (DOMAINDECOMP(cr))
2111 copy_ivec(domdecCells, headerContents.dd_nc);
2114 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2116 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0) ||
2117 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
2118 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0) ||
2119 (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr) < 0) ||
2120 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0) ||
2121 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0) ||
2122 (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
2123 (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
2124 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
2125 headerContents.file_version) < 0))
2127 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2130 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2132 /* we really, REALLY, want to make sure to physically write the checkpoint,
2133 and all the files it depends on, out to disk. Because we've
2134 opened the checkpoint with gmx_fio_open(), it's in our list
2136 ret = gmx_fio_all_output_fsync();
2142 "Cannot fsync '%s'; maybe you are out of disk space?",
2143 gmx_fio_getname(ret));
2145 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2151 gmx_warning("%s", buf);
2155 if (gmx_fio_close(fp) != 0)
2157 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2160 /* we don't move the checkpoint if the user specified they didn't want it,
2161 or if the fsyncs failed */
2163 if (!bNumberAndKeep && !ret)
2167 /* Rename the previous checkpoint file */
2168 std::strcpy(buf, fn);
2169 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2170 std::strcat(buf, "_prev");
2171 std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2174 /* we copy here so that if something goes wrong between now and
2175 * the rename below, there's always a state.cpt.
2176 * If renames are atomic (such as in POSIX systems),
2177 * this copying should be unneccesary.
2179 gmx_file_copy(fn, buf, FALSE);
2180 /* We don't really care if this fails:
2181 * there's already a new checkpoint.
2186 gmx_file_rename(fn, buf);
2189 if (gmx_file_rename(fntemp, fn) != 0)
2191 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2194 #endif /* GMX_NO_RENAME */
2199 /*code for alternate checkpointing scheme. moved from top of loop over
2201 fcRequestCheckPoint();
2202 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2204 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2206 #endif /* end GMX_FAHCORE block */
2209 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2211 bool foundMismatch = (p != f);
2219 fprintf(fplog, " %s mismatch,\n", type);
2220 fprintf(fplog, " current program: %d\n", p);
2221 fprintf(fplog, " checkpoint file: %d\n", f);
2222 fprintf(fplog, "\n");
2226 static void check_string(FILE *fplog, const char *type, const char *p,
2227 const char *f, gmx_bool *mm)
2229 bool foundMismatch = (std::strcmp(p, f) != 0);
2237 fprintf(fplog, " %s mismatch,\n", type);
2238 fprintf(fplog, " current program: %s\n", p);
2239 fprintf(fplog, " checkpoint file: %s\n", f);
2240 fprintf(fplog, "\n");
2244 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2245 const CheckpointHeaderContents &headerContents,
2246 gmx_bool reproducibilityRequested)
2248 /* Note that this check_string on the version will also print a message
2249 * when only the minor version differs. But we only print a warning
2250 * message further down with reproducibilityRequested=TRUE.
2252 gmx_bool versionDiffers = FALSE;
2253 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2255 gmx_bool precisionDiffers = FALSE;
2256 check_int (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2257 if (precisionDiffers)
2259 const char msg_precision_difference[] =
2260 "You are continuing a simulation with a different precision. Not matching\n"
2261 "single/double precision will lead to precision or performance loss.\n";
2264 fprintf(fplog, "%s\n", msg_precision_difference);
2268 gmx_bool mm = (versionDiffers || precisionDiffers);
2270 if (reproducibilityRequested)
2272 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2274 check_int (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2277 if (cr->nnodes > 1 && reproducibilityRequested)
2279 check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2281 int npp = cr->nnodes;
2282 if (cr->npmenodes >= 0)
2284 npp -= cr->npmenodes;
2286 int npp_f = headerContents.nnodes;
2287 if (headerContents.npme >= 0)
2289 npp_f -= headerContents.npme;
2293 check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2294 check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2295 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2301 /* Gromacs should be able to continue from checkpoints between
2302 * different patch level versions, but we do not guarantee
2303 * compatibility between different major/minor versions - check this.
2307 sscanf(gmx_version(), "%5d", &gmx_major);
2308 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2309 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2311 const char msg_major_version_difference[] =
2312 "The current GROMACS major version is not identical to the one that\n"
2313 "generated the checkpoint file. In principle GROMACS does not support\n"
2314 "continuation from checkpoints between different versions, so we advise\n"
2315 "against this. If you still want to try your luck we recommend that you use\n"
2316 "the -noappend flag to keep your output files from the two versions separate.\n"
2317 "This might also work around errors where the output fields in the energy\n"
2318 "file have changed between the different versions.\n";
2320 const char msg_mismatch_notice[] =
2321 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2322 "Continuation is exact, but not guaranteed to be binary identical.\n";
2324 if (majorVersionDiffers)
2328 fprintf(fplog, "%s\n", msg_major_version_difference);
2331 else if (reproducibilityRequested)
2333 /* Major & minor versions match at least, but something is different. */
2336 fprintf(fplog, "%s\n", msg_mismatch_notice);
2342 static void read_checkpoint(const char *fn, t_fileio *logfio,
2343 const t_commrec *cr,
2346 int *init_fep_state,
2347 CheckpointHeaderContents *headerContents,
2349 ObservablesHistory *observablesHistory,
2350 gmx_bool reproducibilityRequested)
2353 char buf[STEPSTRSIZE];
2356 fp = gmx_fio_open(fn, "r");
2357 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2359 // If we are appending, then we don't want write to the open log
2360 // file because we still need to compute a checksum for it. In
2361 // that case, the filehandle will be nullptr. Otherwise, we report
2362 // to the new log file about the checkpoint file that we are
2364 FILE *fplog = gmx_fio_getfp(logfio);
2367 fprintf(fplog, "\n");
2368 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2369 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2370 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2371 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2372 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2373 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2374 fprintf(fplog, " time: %f\n", headerContents->t);
2375 fprintf(fplog, "\n");
2378 if (headerContents->natoms != state->natoms)
2380 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2382 if (headerContents->ngtc != state->ngtc)
2384 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2386 if (headerContents->nnhpres != state->nnhpres)
2388 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2391 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2392 if (headerContents->nlambda != nlambdaHistory)
2394 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2397 init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2398 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2400 if (headerContents->eIntegrator != eIntegrator)
2402 gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2405 if (headerContents->flags_state != state->flags)
2407 gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2412 check_match(fplog, cr, dd_nc, *headerContents,
2413 reproducibilityRequested);
2416 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2417 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
2418 Investigate for 5.0. */
2423 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2428 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1<<eeksEKINH)) != 0) ||
2429 ((headerContents->flags_eks & (1<<eeksEKINF)) != 0) ||
2430 ((headerContents->flags_eks & (1<<eeksEKINO)) != 0) ||
2431 (((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2432 (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2433 (headerContents->flags_eks & (1<<eeksVSCALE))) != 0));
2435 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2437 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2439 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2440 headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2446 if (headerContents->flagsPullHistory)
2448 if (observablesHistory->pullHistory == nullptr)
2450 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2452 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2453 headerContents->flagsPullHistory, observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2460 if (headerContents->file_version < 6)
2462 gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2465 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2471 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2473 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t {});
2475 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2481 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2483 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2485 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2486 headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2492 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2494 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t {});
2496 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2502 std::vector<gmx_file_position_t> outputfiles;
2503 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2509 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2514 if (gmx_fio_close(fp) != 0)
2516 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2521 void load_checkpoint(const char *fn, t_fileio *logfio,
2522 const t_commrec *cr, const ivec dd_nc,
2523 t_inputrec *ir, t_state *state,
2524 ObservablesHistory *observablesHistory,
2525 gmx_bool reproducibilityRequested)
2527 CheckpointHeaderContents headerContents;
2530 /* Read the state from the checkpoint file */
2531 read_checkpoint(fn, logfio,
2533 ir->eI, &(ir->fepvals->init_fep_state),
2535 state, observablesHistory,
2536 reproducibilityRequested);
2540 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2542 ir->bContinuation = TRUE;
2543 // TODO Should the following condition be <=? Currently if you
2544 // pass a checkpoint written by an normal completion to a restart,
2545 // mdrun will read all input, does some work but no steps, and
2546 // write successful output. But perhaps that is not desirable.
2547 if ((ir->nsteps >= 0) && (ir->nsteps < headerContents.step))
2549 // Note that we do not intend to support the use of mdrun
2550 // -nsteps to circumvent this condition.
2551 char nstepsString[STEPSTRSIZE], stepString[STEPSTRSIZE];
2552 gmx_step_str(ir->nsteps, nstepsString);
2553 gmx_step_str(headerContents.step, stepString);
2554 gmx_fatal(FARGS, "The input requested %s steps, however the checkpoint "
2555 "file has already reached step %s. The simulation will not "
2556 "proceed, because either your simulation is already complete, "
2557 "or your combination of input files don't match.",
2558 nstepsString, stepString);
2560 if (ir->nsteps >= 0)
2562 ir->nsteps += ir->init_step - headerContents.step;
2564 ir->init_step = headerContents.step;
2565 ir->simulation_part = headerContents.simulation_part + 1;
2568 void read_checkpoint_part_and_step(const char *filename,
2569 int *simulation_part,
2574 if (filename == nullptr ||
2575 !gmx_fexist(filename) ||
2576 ((fp = gmx_fio_open(filename, "r")) == nullptr))
2578 *simulation_part = 0;
2583 CheckpointHeaderContents headerContents;
2584 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2586 *simulation_part = headerContents.simulation_part;
2587 *step = headerContents.step;
2590 static CheckpointHeaderContents
2591 read_checkpoint_data(t_fileio *fp,
2593 std::vector<gmx_file_position_t> *outputfiles)
2595 CheckpointHeaderContents headerContents;
2596 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2597 state->natoms = headerContents.natoms;
2598 state->ngtc = headerContents.ngtc;
2599 state->nnhpres = headerContents.nnhpres;
2600 state->nhchainlength = headerContents.nhchainlength;
2601 state->flags = headerContents.flags_state;
2603 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2608 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2614 energyhistory_t enerhist;
2615 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2616 headerContents.flags_enh, &enerhist, nullptr);
2621 PullHistory pullHist = {};
2622 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2623 headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, nullptr);
2629 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2635 edsamhistory_t edsamhist = {};
2636 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2642 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2643 headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2649 swaphistory_t swaphist = {};
2650 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2656 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2658 nullptr, headerContents.file_version);
2665 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2670 return headerContents;
2673 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2676 std::vector<gmx_file_position_t> outputfiles;
2677 CheckpointHeaderContents headerContents =
2678 read_checkpoint_data(fp, &state, &outputfiles);
2680 fr->natoms = state.natoms;
2682 fr->step = int64_to_int(headerContents.step,
2683 "conversion of checkpoint to trajectory");
2685 fr->time = headerContents.t;
2687 fr->lambda = state.lambda[efptFEP];
2688 fr->fep_state = state.fep_state;
2690 fr->bX = ((state.flags & (1<<estX)) != 0);
2693 fr->x = makeRvecArray(state.x, state.natoms);
2695 fr->bV = ((state.flags & (1<<estV)) != 0);
2698 fr->v = makeRvecArray(state.v, state.natoms);
2701 fr->bBox = ((state.flags & (1<<estBOX)) != 0);
2704 copy_mat(state.box, fr->box);
2708 void list_checkpoint(const char *fn, FILE *out)
2715 fp = gmx_fio_open(fn, "r");
2716 CheckpointHeaderContents headerContents;
2717 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2718 state.natoms = headerContents.natoms;
2719 state.ngtc = headerContents.ngtc;
2720 state.nnhpres = headerContents.nnhpres;
2721 state.nhchainlength = headerContents.nhchainlength;
2722 state.flags = headerContents.flags_state;
2723 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2728 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2734 energyhistory_t enerhist;
2735 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2736 headerContents.flags_enh, &enerhist, out);
2740 PullHistory pullHist = {};
2741 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2742 headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, out);
2747 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2748 headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2753 edsamhistory_t edsamhist = {};
2754 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2759 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2760 headerContents.flags_awhh, state.awhHistory.get(), out);
2765 swaphistory_t swaphist = {};
2766 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2771 std::vector<gmx_file_position_t> outputfiles;
2772 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2777 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2784 if (gmx_fio_close(fp) != 0)
2786 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2790 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2791 CheckpointHeaderContents
2792 read_checkpoint_simulation_part_and_filenames(t_fileio *fp,
2793 std::vector<gmx_file_position_t> *outputfiles)
2796 CheckpointHeaderContents headerContents =
2797 read_checkpoint_data(fp, &state, outputfiles);
2798 if (gmx_fio_close(fp) != 0)
2800 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2802 return headerContents;