2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
49 #if GMX_NATIVE_WINDOWS
51 #include <sys/locking.h>
57 #include "buildinfo.h"
58 #include "gromacs/compat/make_unique.h"
59 #include "gromacs/fileio/filetypes.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/gmxfio-xdr.h"
62 #include "gromacs/fileio/xdr_datatype.h"
63 #include "gromacs/fileio/xdrf.h"
64 #include "gromacs/gmxlib/network.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vecdump.h"
67 #include "gromacs/math/vectypes.h"
68 #include "gromacs/mdtypes/awh-correlation-history.h"
69 #include "gromacs/mdtypes/awh-history.h"
70 #include "gromacs/mdtypes/commrec.h"
71 #include "gromacs/mdtypes/df_history.h"
72 #include "gromacs/mdtypes/edsamhistory.h"
73 #include "gromacs/mdtypes/energyhistory.h"
74 #include "gromacs/mdtypes/inputrec.h"
75 #include "gromacs/mdtypes/md_enums.h"
76 #include "gromacs/mdtypes/observableshistory.h"
77 #include "gromacs/mdtypes/state.h"
78 #include "gromacs/mdtypes/swaphistory.h"
79 #include "gromacs/trajectory/trajectoryframe.h"
80 #include "gromacs/utility/arrayref.h"
81 #include "gromacs/utility/baseversion.h"
82 #include "gromacs/utility/cstringutil.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/futil.h"
85 #include "gromacs/utility/gmxassert.h"
86 #include "gromacs/utility/int64_to_int.h"
87 #include "gromacs/utility/programcontext.h"
88 #include "gromacs/utility/smalloc.h"
89 #include "gromacs/utility/sysinfo.h"
90 #include "gromacs/utility/txtdump.h"
96 #define CPT_MAGIC1 171817
97 #define CPT_MAGIC2 171819
98 #define CPTSTRLEN 1024
100 /* cpt_version should normally only be changed
101 * when the header or footer format changes.
102 * The state data format itself is backward and forward compatible.
103 * But old code can not read a new entry that is present in the file
104 * (but can read a new format when new entries are not present).
106 static const int cpt_version = 17;
109 const char *est_names[estNR] =
112 "box", "box-rel", "box-v", "pres_prev",
113 "nosehoover-xi", "thermostat-integral",
114 "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
115 "disre_initf", "disre_rm3tav",
116 "orire_initf", "orire_Dtav",
117 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
122 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
125 static const char *eeks_names[eeksNR] =
127 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
128 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
132 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
133 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
134 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
135 eenhENERGY_DELTA_H_NN,
136 eenhENERGY_DELTA_H_LIST,
137 eenhENERGY_DELTA_H_STARTTIME,
138 eenhENERGY_DELTA_H_STARTLAMBDA,
142 static const char *eenh_names[eenhNR] =
144 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
145 "energy_sum_sim", "energy_nsum_sim",
146 "energy_nsteps", "energy_nsteps_sim",
148 "energy_delta_h_list",
149 "energy_delta_h_start_time",
150 "energy_delta_h_start_lambda"
153 /* free energy history variables -- need to be preserved over checkpoint */
155 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
156 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
158 /* free energy history variable names */
159 static const char *edfh_names[edfhNR] =
161 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
162 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
165 /* AWH biasing history variables */
168 eawhhEQUILIBRATEHISTOGRAM,
171 eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
173 eawhhLOGSCALEDSAMPLEWEIGHT,
175 eawhhFORCECORRELATIONGRID,
179 static const char *eawhh_names[eawhhNR] =
182 "awh_equilibrateHistogram",
185 "awh_coordpoint", "awh_umbrellaGridpoint",
187 "awh_logScaledSampleWeight",
189 "awh_forceCorrelationGrid"
192 //! Higher level vector element type, only used for formatting checkpoint dumps
193 enum class CptElementType
195 integer, //!< integer
196 real, //!< float or double, not linked to precision of type real
197 real3, //!< float[3] or double[3], not linked to precision of type real
198 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
201 //! \brief Parts of the checkpoint state, only used for reporting
204 microState, //!< The microstate of the simulated system
205 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
206 energyHistory, //!< Energy observable statistics
207 freeEnergyHistory, //!< Free-energy state and observable statistics
208 accWeightHistogram //!< Accelerated weight histogram method state
211 //! \brief Return the name of a checkpoint entry based on part and part entry
212 static const char *entryName(StatePart part, int ecpt)
216 case StatePart::microState: return est_names [ecpt];
217 case StatePart::kineticEnergy: return eeks_names[ecpt];
218 case StatePart::energyHistory: return eenh_names[ecpt];
219 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
220 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
226 static void cp_warning(FILE *fp)
228 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
231 [[ noreturn ]] static void cp_error()
233 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
236 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
238 char *data = s.data();
239 if (xdr_string(xd, &data, s.size()) == 0)
245 fprintf(list, "%s = %s\n", desc, data);
249 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
251 if (xdr_int(xd, i) == 0)
257 fprintf(list, "%s = %d\n", desc, *i);
262 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
266 fprintf(list, "%s = ", desc);
269 for (int j = 0; j < n && res; j++)
271 res &= xdr_u_char(xd, &i[j]);
274 fprintf(list, "%02x", i[j]);
289 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
291 if (do_cpt_int(xd, desc, i, list) < 0)
297 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
299 char buf[STEPSTRSIZE];
301 if (xdr_int64(xd, i) == 0)
307 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
311 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
313 if (xdr_double(xd, f) == 0)
319 fprintf(list, "%s = %f\n", desc, *f);
323 static void do_cpt_real_err(XDR *xd, real *f)
326 bool_t res = xdr_double(xd, f);
328 bool_t res = xdr_float(xd, f);
336 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
338 for (int i = 0; i < n; i++)
340 for (int j = 0; j < DIM; j++)
342 do_cpt_real_err(xd, &f[i][j]);
348 pr_rvecs(list, 0, desc, f, n);
352 template <typename T>
360 // cppcheck-suppress unusedStructMember
361 static const int value = xdr_datatype_int;
365 struct xdr_type<float>
367 // cppcheck-suppress unusedStructMember
368 static const int value = xdr_datatype_float;
372 struct xdr_type<double>
374 // cppcheck-suppress unusedStructMember
375 static const int value = xdr_datatype_double;
378 //! \brief Returns size in byte of an xdr_datatype
379 static inline unsigned int sizeOfXdrType(int xdrType)
383 case xdr_datatype_int:
385 case xdr_datatype_float:
386 return sizeof(float);
387 case xdr_datatype_double:
388 return sizeof(double);
389 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
395 //! \brief Returns the XDR process function for i/o of an XDR type
396 static inline xdrproc_t xdrProc(int xdrType)
400 case xdr_datatype_int:
401 return reinterpret_cast<xdrproc_t>(xdr_int);
402 case xdr_datatype_float:
403 return reinterpret_cast<xdrproc_t>(xdr_float);
404 case xdr_datatype_double:
405 return reinterpret_cast<xdrproc_t>(xdr_double);
406 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
412 /*! \brief Lists or only reads an xdr vector from checkpoint file
414 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
415 * The header for the print is set by \p part and \p ecpt.
416 * The formatting of the printing is set by \p cptElementType.
417 * When list==NULL only reads the elements.
419 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
420 FILE *list, CptElementType cptElementType)
424 const unsigned int elemSize = sizeOfXdrType(xdrType);
425 std::vector<char> data(nf*elemSize);
426 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
432 case xdr_datatype_int:
433 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
435 case xdr_datatype_float:
437 if (cptElementType == CptElementType::real3)
439 // cppcheck-suppress invalidPointerCast
440 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
445 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
446 // cppcheck-suppress invalidPointerCast
447 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
450 case xdr_datatype_double:
452 if (cptElementType == CptElementType::real3)
454 // cppcheck-suppress invalidPointerCast
455 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
460 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
461 // cppcheck-suppress invalidPointerCast
462 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
465 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
472 //! \brief Convert a double array, typed char*, to float
473 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
475 // cppcheck-suppress invalidPointerCast
476 const double *d = reinterpret_cast<const double *>(c);
477 for (int i = 0; i < n; i++)
479 v[i] = static_cast<float>(d[i]);
483 //! \brief Convert a float array, typed char*, to double
484 static void convertArrayRealPrecision(const char *c, double *v, int n)
486 // cppcheck-suppress invalidPointerCast
487 const float *f = reinterpret_cast<const float *>(c);
488 for (int i = 0; i < n; i++)
490 v[i] = static_cast<double>(f[i]);
494 //! \brief Generate an error for trying to convert to integer
495 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
497 GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
500 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
502 * This is the only routine that does the actually i/o of real vector,
503 * all other routines are intermediate level routines for specific real
504 * data types, calling this routine.
505 * Currently this routine is (too) complex, since it handles both real *
506 * and std::vector<real>. Using real * is deprecated and this routine
507 * will simplify a lot when only std::vector needs to be supported.
509 * The routine is generic to vectors with different allocators,
510 * because as part of reading a checkpoint there are vectors whose
511 * size is not known until reading has progressed far enough, so a
512 * resize method must be called.
514 * When not listing, we use either v or vector, depending on which is !=NULL.
515 * If nval >= 0, nval is used; on read this should match the passed value.
516 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
517 * the value is stored in nptr
519 template<typename T, typename AllocatorType>
520 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
522 T **v, std::vector<T, AllocatorType> *vector,
523 FILE *list, CptElementType cptElementType)
525 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
529 int numElemInTheFile;
534 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
535 numElemInTheFile = nval;
541 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
542 // cppcheck-suppress nullPointer
543 numElemInTheFile = *nptr;
547 numElemInTheFile = vector->size();
551 /* Read/write the vector element count */
552 res = xdr_int(xd, &numElemInTheFile);
557 /* Read/write the element data type */
558 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
559 int xdrTypeInTheFile = xdrTypeInTheCode;
560 res = xdr_int(xd, &xdrTypeInTheFile);
566 if (list == nullptr && (sflags & (1 << ecpt)))
570 if (numElemInTheFile != nval)
572 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
575 else if (nptr != nullptr)
577 *nptr = numElemInTheFile;
580 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
584 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
585 entryName(part, ecpt),
586 xdr_datatype_names[xdrTypeInTheCode],
587 xdr_datatype_names[xdrTypeInTheFile]);
589 /* Matching int and real should never occur, but check anyhow */
590 if (xdrTypeInTheFile == xdr_datatype_int ||
591 xdrTypeInTheCode == xdr_datatype_int)
593 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
602 snew(*v, numElemInTheFile);
608 /* This conditional ensures that we don't resize on write.
609 * In particular in the state where this code was written
610 * PaddedRVecVector has a size of numElemInThefile and we
611 * don't want to lose that padding here.
613 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
615 vector->resize(numElemInTheFile);
623 vChar = reinterpret_cast<char *>(vp);
627 snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
629 res = xdr_vector(xd, vChar,
630 numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
631 xdrProc(xdrTypeInTheFile));
639 /* In the old code float-double conversion came for free.
640 * In the new code we still support it, mainly because
641 * the tip4p_continue regression test makes use of this.
642 * It's an open question if we do or don't want to allow this.
644 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
650 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
651 list, cptElementType);
657 //! \brief Read/Write a std::vector.
658 template <typename T>
659 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
660 std::vector<T> *vector, FILE *list)
662 return doVectorLow<T>(xd, part, ecpt, sflags, -1, nullptr, nullptr, vector, list, CptElementType::real);
665 //! \brief Read/Write an ArrayRef<real>.
666 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
667 gmx::ArrayRef<real> vector, FILE *list)
669 real *v_real = vector.data();
670 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
673 //! \brief Read/Write a vector whose value_type is RVec and whose allocator is \c AllocatorType.
674 template <typename AllocatorType>
675 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
676 std::vector<gmx::RVec, AllocatorType> *v, int numAtoms, FILE *list)
678 const int numReals = numAtoms*DIM;
682 GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
683 GMX_RELEASE_ASSERT(v->size() >= static_cast<size_t>(numAtoms), "v should have sufficient size for numAtoms");
685 real *v_real = v->data()->as_vec();
687 // PaddedRVecVector is padded beyond numAtoms, we should only write
689 gmx::ArrayRef<real> ref(v_real, v_real + numReals);
691 return doRealArrayRef(xd, part, ecpt, sflags, ref, list);
695 // Use the rebind facility to change the value_type of the
696 // allocator from RVec to real.
697 using realAllocator = typename AllocatorType::template rebind<real>::other;
698 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
702 /* This function stores n along with the reals for reading,
703 * but on reading it assumes that n matches the value in the checkpoint file,
704 * a fatal error is generated when this is not the case.
706 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
707 int n, real **v, FILE *list)
709 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
712 /* This function does the same as do_cpte_reals,
713 * except that on reading it ignores the passed value of *n
714 * and stores the value read from the checkpoint file in *n.
716 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
717 int *n, real **v, FILE *list)
719 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
722 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
725 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
728 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
729 int n, int **v, FILE *list)
731 return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
734 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
737 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
740 static int do_cpte_bool(XDR *xd, StatePart part, int ecpt, int sflags,
744 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
749 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
750 int n, double **v, FILE *list)
752 return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
755 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
756 double *r, FILE *list)
758 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
761 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
762 matrix v, FILE *list)
768 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
769 DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
771 if (list && ret == 0)
773 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
780 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
781 int n, real **v, FILE *list)
785 char name[CPTSTRLEN];
792 for (i = 0; i < n; i++)
794 reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
795 if (list && reti == 0)
797 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
798 pr_reals(list, 0, name, v[i], n);
808 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
809 int n, matrix **v, FILE *list)
812 matrix *vp, *va = nullptr;
818 res = xdr_int(xd, &nf);
823 if (list == nullptr && nf != n)
825 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
827 if (list || !(sflags & (1<<ecpt)))
840 snew(vr, nf*DIM*DIM);
841 for (i = 0; i < nf; i++)
843 for (j = 0; j < DIM; j++)
845 for (k = 0; k < DIM; k++)
847 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
851 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
852 nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
853 CptElementType::matrix3x3);
854 for (i = 0; i < nf; i++)
856 for (j = 0; j < DIM; j++)
858 for (k = 0; k < DIM; k++)
860 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
866 if (list && ret == 0)
868 for (i = 0; i < nf; i++)
870 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
881 // TODO Expand this into being a container of all data for
882 // serialization of a checkpoint, which can be stored by the caller
883 // (e.g. so that mdrun doesn't have to open the checkpoint twice).
884 // This will separate issues of allocation from those of
885 // serialization, help separate comparison from reading, and have
886 // better defined transformation functions to/from trajectory frame
888 struct CheckpointHeaderContents
891 char version[CPTSTRLEN];
892 char btime[CPTSTRLEN];
893 char buser[CPTSTRLEN];
894 char bhost[CPTSTRLEN];
896 char fprog[CPTSTRLEN];
897 char ftime[CPTSTRLEN];
919 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
920 CheckpointHeaderContents *contents)
933 res = xdr_int(xd, &magic);
936 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
938 if (magic != CPT_MAGIC1)
940 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
941 "The checkpoint file is corrupted or not a checkpoint file",
947 gmx_gethostname(fhost, 255);
949 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
950 do_cpt_string_err(xd, "GROMACS build time", contents->btime, list);
951 do_cpt_string_err(xd, "GROMACS build user", contents->buser, list);
952 do_cpt_string_err(xd, "GROMACS build host", contents->bhost, list);
953 do_cpt_string_err(xd, "generating program", contents->fprog, list);
954 do_cpt_string_err(xd, "generation time", contents->ftime, list);
955 contents->file_version = cpt_version;
956 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
957 if (contents->file_version > cpt_version)
959 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
961 if (contents->file_version >= 13)
963 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
967 contents->double_prec = -1;
969 if (contents->file_version >= 12)
971 do_cpt_string_err(xd, "generating host", fhost, list);
973 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
974 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
975 if (contents->file_version >= 10)
977 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
981 contents->nhchainlength = 1;
983 if (contents->file_version >= 11)
985 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
989 contents->nnhpres = 0;
991 if (contents->file_version >= 14)
993 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
997 contents->nlambda = 0;
999 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1000 if (contents->file_version >= 3)
1002 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1006 contents->simulation_part = 1;
1008 if (contents->file_version >= 5)
1010 do_cpt_step_err(xd, "step", &contents->step, list);
1015 do_cpt_int_err(xd, "step", &idum, list);
1016 contents->step = static_cast<gmx_int64_t>(idum);
1018 do_cpt_double_err(xd, "t", &contents->t, list);
1019 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1020 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1021 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1022 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1023 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1024 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1025 if (contents->file_version >= 4)
1027 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1028 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1032 contents->flags_eks = 0;
1033 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV+1));
1034 contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1035 (1<<(estORIRE_DTAV+2)) |
1036 (1<<(estORIRE_DTAV+3))));
1038 if (contents->file_version >= 14)
1040 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1044 contents->flags_dfh = 0;
1047 if (contents->file_version >= 15)
1049 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1056 if (contents->file_version >= 16)
1058 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1062 contents->eSwapCoords = eswapNO;
1065 if (contents->file_version >= 17)
1067 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1071 contents->flags_awhh = 0;
1075 static int do_cpt_footer(XDR *xd, int file_version)
1080 if (file_version >= 2)
1083 res = xdr_int(xd, &magic);
1088 if (magic != CPT_MAGIC2)
1097 static int do_cpt_state(XDR *xd,
1098 int fflags, t_state *state,
1102 const StatePart part = StatePart::microState;
1103 const int sflags = state->flags;
1104 // If reading, state->natoms was probably just read, so
1105 // allocations need to be managed. If writing, this won't change
1106 // anything that matters.
1107 state_change_natoms(state, state->natoms);
1108 for (int i = 0; (i < estNR && ret == 0); i++)
1110 if (fflags & (1<<i))
1114 case estLAMBDA: ret = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1115 case estFEPSTATE: ret = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1116 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1117 case estBOX_REL: ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1118 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1119 case estPRES_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1120 case estSVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1121 case estFVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1122 case estNH_XI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1123 case estNH_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1124 case estNHPRES_XI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1125 case estNHPRES_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1126 case estTHERM_INT: ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1127 case estBAROS_INT: ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1128 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1129 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1130 case estX: ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1131 case estV: ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1132 /* The RNG entries are no longer written,
1133 * the next 4 lines are only for reading old files.
1135 case estLD_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1136 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1137 case estMC_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1138 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1139 case estDISRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1140 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1141 case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1142 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1144 gmx_fatal(FARGS, "Unknown state entry %d\n"
1145 "You are reading a checkpoint file written by different code, which is not supported", i);
1153 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1158 const StatePart part = StatePart::kineticEnergy;
1159 for (int i = 0; (i < eeksNR && ret == 0); i++)
1161 if (fflags & (1<<i))
1166 case eeksEKIN_N: ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1167 case eeksEKINH: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1168 case eeksEKINF: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1169 case eeksEKINO: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1170 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1171 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1172 case eeksVSCALE: ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1173 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1174 case eeksDEKINDL: ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1175 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1177 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1178 "You are probably reading a new checkpoint file with old code", i);
1187 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1188 int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1190 int swap_cpt_version = 2;
1192 if (eSwapCoords == eswapNO)
1197 swapstate->bFromCpt = bRead;
1198 swapstate->eSwapCoords = eSwapCoords;
1200 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1201 if (bRead && swap_cpt_version < 2)
1203 gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1204 "of the ion/water position swapping protocol.\n");
1207 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1209 /* When reading, init_swapcoords has not been called yet,
1210 * so we have to allocate memory first. */
1211 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1214 snew(swapstate->ionType, swapstate->nIonTypes);
1217 for (int ic = 0; ic < eCompNR; ic++)
1219 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1221 swapstateIons_t *gs = &swapstate->ionType[ii];
1225 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1229 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1234 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1238 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1241 if (bRead && (nullptr == gs->nMolPast[ic]) )
1243 snew(gs->nMolPast[ic], swapstate->nAverage);
1246 for (int j = 0; j < swapstate->nAverage; j++)
1250 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1254 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1260 /* Ion flux per channel */
1261 for (int ic = 0; ic < eChanNR; ic++)
1263 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1265 swapstateIons_t *gs = &swapstate->ionType[ii];
1269 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1273 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1278 /* Ion flux leakage */
1281 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1285 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1289 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1291 swapstateIons_t *gs = &swapstate->ionType[ii];
1293 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1297 snew(gs->channel_label, gs->nMol);
1298 snew(gs->comp_from, gs->nMol);
1301 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1302 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1305 /* Save the last known whole positions to checkpoint
1306 * file to be able to also make multimeric channels whole in PBC */
1307 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1308 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1311 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1312 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1313 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1314 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1318 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1319 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1326 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1327 int fflags, energyhistory_t *enerhist,
1337 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1339 /* This is stored/read for backward compatibility */
1340 int energyHistoryNumEnergies = 0;
1343 enerhist->nsteps = 0;
1345 enerhist->nsteps_sim = 0;
1346 enerhist->nsum_sim = 0;
1348 else if (enerhist != nullptr)
1350 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1353 delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1354 const StatePart part = StatePart::energyHistory;
1355 for (int i = 0; (i < eenhNR && ret == 0); i++)
1357 if (fflags & (1<<i))
1361 case eenhENERGY_N: ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1362 case eenhENERGY_AVER: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1363 case eenhENERGY_SUM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1364 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1365 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1366 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1367 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1368 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1369 case eenhENERGY_DELTA_H_NN:
1372 if (!bRead && deltaH != nullptr)
1374 numDeltaH = deltaH->dh.size();
1376 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1379 if (deltaH == nullptr)
1381 enerhist->deltaHForeignLambdas = gmx::compat::make_unique<delta_h_history_t>();
1382 deltaH = enerhist->deltaHForeignLambdas.get();
1384 deltaH->dh.resize(numDeltaH);
1385 deltaH->start_lambda_set = FALSE;
1389 case eenhENERGY_DELTA_H_LIST:
1390 for (auto dh : deltaH->dh)
1392 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1395 case eenhENERGY_DELTA_H_STARTTIME:
1396 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1397 case eenhENERGY_DELTA_H_STARTLAMBDA:
1398 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1400 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1401 "You are probably reading a new checkpoint file with old code", i);
1406 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1408 /* Assume we have an old file format and copy sum to sum_sim */
1409 enerhist->ener_sum_sim = enerhist->ener_sum;
1412 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1413 !(fflags & (1<<eenhENERGY_NSTEPS)))
1415 /* Assume we have an old file format and copy nsum to nsteps */
1416 enerhist->nsteps = enerhist->nsum;
1418 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1419 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1421 /* Assume we have an old file format and copy nsum to nsteps */
1422 enerhist->nsteps_sim = enerhist->nsum_sim;
1428 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1437 if (*dfhistPtr == nullptr)
1439 snew(*dfhistPtr, 1);
1440 (*dfhistPtr)->nlambda = nlambda;
1441 init_df_history(*dfhistPtr, nlambda);
1443 df_history_t *dfhist = *dfhistPtr;
1445 const StatePart part = StatePart::freeEnergyHistory;
1446 for (int i = 0; (i < edfhNR && ret == 0); i++)
1448 if (fflags & (1<<i))
1452 case edfhBEQUIL: ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list); break;
1453 case edfhNATLAMBDA: ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1454 case edfhWLHISTO: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1455 case edfhWLDELTA: ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1456 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1457 case edfhSUMDG: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1458 case edfhSUMMINVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1459 case edfhSUMVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1460 case edfhACCUMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1461 case edfhACCUMM: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1462 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1463 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1464 case edfhTIJ: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1465 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1468 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1469 "You are probably reading a new checkpoint file with old code", i);
1478 /* This function stores the last whole configuration of the reference and
1479 * average structure in the .cpt file
1481 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1482 int nED, edsamhistory_t *EDstate, FILE *list)
1489 EDstate->bFromCpt = bRead;
1492 /* When reading, init_edsam has not been called yet,
1493 * so we have to allocate memory first. */
1496 snew(EDstate->nref, EDstate->nED);
1497 snew(EDstate->old_sref, EDstate->nED);
1498 snew(EDstate->nav, EDstate->nED);
1499 snew(EDstate->old_sav, EDstate->nED);
1502 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1503 for (int i = 0; i < EDstate->nED; i++)
1507 /* Reference structure SREF */
1508 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1509 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1510 sprintf(buf, "ED%d x_ref", i+1);
1513 snew(EDstate->old_sref[i], EDstate->nref[i]);
1514 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1518 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1521 /* Average structure SAV */
1522 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1523 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1524 sprintf(buf, "ED%d x_av", i+1);
1527 snew(EDstate->old_sav[i], EDstate->nav[i]);
1528 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1532 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1539 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1540 gmx::CorrelationGridHistory *corrGrid,
1541 FILE *list, int eawhh)
1545 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1546 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1547 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1551 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1554 for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1556 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1557 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1558 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1559 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1560 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1561 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1562 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1563 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1564 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1565 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1566 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1572 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1573 int fflags, gmx::AwhBiasHistory *biasHistory,
1578 gmx::AwhBiasStateHistory *state = &biasHistory->state;
1579 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1581 if (fflags & (1<<i))
1585 case eawhhIN_INITIAL:
1586 do_cpt_int_err(xd, eawhh_names[i], &state->in_initial, list); break;
1587 case eawhhEQUILIBRATEHISTOGRAM:
1588 do_cpt_int_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1590 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1596 numPoints = biasHistory->pointState.size();
1598 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1601 biasHistory->pointState.resize(numPoints);
1605 case eawhhCOORDPOINT:
1606 for (auto &psh : biasHistory->pointState)
1608 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1609 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1610 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1611 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1612 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1613 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1614 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1615 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1616 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1617 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1618 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1621 case eawhhUMBRELLAGRIDPOINT:
1622 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1623 case eawhhUPDATELIST:
1624 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1625 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1627 case eawhhLOGSCALEDSAMPLEWEIGHT:
1628 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1629 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1631 case eawhhNUMUPDATES:
1632 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1634 case eawhhFORCECORRELATIONGRID:
1635 ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1638 gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1646 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1647 int fflags, gmx::AwhHistory *awhHistory,
1654 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1656 if (awhHistory == nullptr)
1658 GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1660 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
1661 awhHistory = awhHistoryLocal.get();
1664 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1665 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1666 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1667 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1668 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1669 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1671 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1672 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1677 numBias = awhHistory->bias.size();
1679 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1683 awhHistory->bias.resize(numBias);
1685 for (auto &bias : awhHistory->bias)
1687 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1693 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1698 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1699 std::vector<gmx_file_position_t> *outputfiles,
1700 FILE *list, int file_version)
1703 gmx_off_t mask = 0xFFFFFFFFL;
1704 int offset_high, offset_low;
1705 std::array<char, CPTSTRLEN> buf;
1706 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1708 // Ensure that reading pre-allocates outputfiles, while writing
1709 // writes what is already there.
1710 int nfiles = outputfiles->size();
1711 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1717 outputfiles->resize(nfiles);
1720 for (auto &outputfile : *outputfiles)
1722 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1725 do_cpt_string_err(xd, "output filename", buf, list);
1726 std::strncpy(outputfile.filename, buf.data(), buf.size()-1);
1728 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1732 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1736 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1740 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1742 offset = outputfile.offset;
1750 offset_low = static_cast<int>(offset & mask);
1751 offset_high = static_cast<int>((offset >> 32) & mask);
1753 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1757 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1762 if (file_version >= 8)
1764 if (do_cpt_int(xd, "file_checksum_size", &outputfile.chksum_size,
1769 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfile.chksum, list) != 0)
1776 outputfile.chksum_size = -1;
1783 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1784 FILE *fplog, const t_commrec *cr,
1785 ivec domdecCells, int nppnodes,
1786 int eIntegrator, int simulation_part,
1787 gmx_bool bExpanded, int elamstats,
1788 gmx_int64_t step, double t,
1789 t_state *state, ObservablesHistory *observablesHistory)
1792 char *fntemp; /* the temporary checkpoint file name */
1794 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1797 if (DOMAINDECOMP(cr))
1799 npmenodes = cr->npmenodes;
1807 /* make the new temporary filename */
1808 snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1809 std::strcpy(fntemp, fn);
1810 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1811 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1812 std::strcat(fntemp, suffix);
1813 std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1815 /* if we can't rename, we just overwrite the cpt file.
1816 * dangerous if interrupted.
1818 snew(fntemp, std::strlen(fn));
1819 std::strcpy(fntemp, fn);
1821 char timebuf[STRLEN];
1822 gmx_format_current_time(timebuf, STRLEN);
1826 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1827 gmx_step_str(step, buf), timebuf);
1830 /* Get offsets for open files */
1831 auto outputfiles = gmx_fio_get_output_file_positions();
1833 fp = gmx_fio_open(fntemp, "w");
1836 if (state->ekinstate.bUpToDate)
1839 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1840 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1841 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1848 energyhistory_t *enerhist = observablesHistory->energyHistory.get();
1850 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
1852 flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
1853 if (enerhist->nsum > 0)
1855 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1856 (1<<eenhENERGY_NSUM));
1858 if (enerhist->nsum_sim > 0)
1860 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
1862 if (enerhist->deltaHForeignLambdas != nullptr)
1864 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1865 (1<< eenhENERGY_DELTA_H_LIST) |
1866 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1867 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1874 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1875 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1878 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1880 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1882 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1883 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1892 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
1894 flags_awhh |= ( (1 << eawhhIN_INITIAL) |
1895 (1 << eawhhEQUILIBRATEHISTOGRAM) |
1896 (1 << eawhhHISTSIZE) |
1897 (1 << eawhhNPOINTS) |
1898 (1 << eawhhCOORDPOINT) |
1899 (1 << eawhhUMBRELLAGRIDPOINT) |
1900 (1 << eawhhUPDATELIST) |
1901 (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
1902 (1 << eawhhNUMUPDATES) |
1903 (1 << eawhhFORCECORRELATIONGRID));
1906 /* We can check many more things now (CPU, acceleration, etc), but
1907 * it is highly unlikely to have two separate builds with exactly
1908 * the same version, user, time, and build host!
1911 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
1913 edsamhistory_t *edsamhist = observablesHistory->edsamHistory.get();
1914 int nED = (edsamhist ? edsamhist->nED : 0);
1916 swaphistory_t *swaphist = observablesHistory->swapHistory.get();
1917 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
1919 CheckpointHeaderContents headerContents =
1921 0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
1922 eIntegrator, simulation_part, step, t, nppnodes,
1924 state->natoms, state->ngtc, state->nnhpres,
1925 state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh, flags_dfh, flags_awhh,
1928 std::strcpy(headerContents.version, gmx_version());
1929 std::strcpy(headerContents.btime, BUILD_TIME);
1930 std::strcpy(headerContents.buser, BUILD_USER);
1931 std::strcpy(headerContents.bhost, BUILD_HOST);
1932 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
1933 std::strcpy(headerContents.ftime, timebuf);
1934 if (DOMAINDECOMP(cr))
1936 copy_ivec(domdecCells, headerContents.dd_nc);
1939 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
1941 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0) ||
1942 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
1943 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0) ||
1944 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0) ||
1945 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0) ||
1946 (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
1947 (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
1948 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
1949 headerContents.file_version) < 0))
1951 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1954 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
1956 /* we really, REALLY, want to make sure to physically write the checkpoint,
1957 and all the files it depends on, out to disk. Because we've
1958 opened the checkpoint with gmx_fio_open(), it's in our list
1960 ret = gmx_fio_all_output_fsync();
1966 "Cannot fsync '%s'; maybe you are out of disk space?",
1967 gmx_fio_getname(ret));
1969 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
1975 gmx_warning("%s", buf);
1979 if (gmx_fio_close(fp) != 0)
1981 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1984 /* we don't move the checkpoint if the user specified they didn't want it,
1985 or if the fsyncs failed */
1987 if (!bNumberAndKeep && !ret)
1991 /* Rename the previous checkpoint file */
1992 std::strcpy(buf, fn);
1993 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1994 std::strcat(buf, "_prev");
1995 std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1997 /* we copy here so that if something goes wrong between now and
1998 * the rename below, there's always a state.cpt.
1999 * If renames are atomic (such as in POSIX systems),
2000 * this copying should be unneccesary.
2002 gmx_file_copy(fn, buf, FALSE);
2003 /* We don't really care if this fails:
2004 * there's already a new checkpoint.
2007 gmx_file_rename(fn, buf);
2010 if (gmx_file_rename(fntemp, fn) != 0)
2012 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2015 #endif /* GMX_NO_RENAME */
2020 /*code for alternate checkpointing scheme. moved from top of loop over
2022 fcRequestCheckPoint();
2023 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2025 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2027 #endif /* end GMX_FAHCORE block */
2030 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2032 bool foundMismatch = (p != f);
2040 fprintf(fplog, " %s mismatch,\n", type);
2041 fprintf(fplog, " current program: %d\n", p);
2042 fprintf(fplog, " checkpoint file: %d\n", f);
2043 fprintf(fplog, "\n");
2047 static void check_string(FILE *fplog, const char *type, const char *p,
2048 const char *f, gmx_bool *mm)
2050 bool foundMismatch = (std::strcmp(p, f) != 0);
2058 fprintf(fplog, " %s mismatch,\n", type);
2059 fprintf(fplog, " current program: %s\n", p);
2060 fprintf(fplog, " checkpoint file: %s\n", f);
2061 fprintf(fplog, "\n");
2065 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2066 const CheckpointHeaderContents &headerContents,
2067 gmx_bool reproducibilityRequested)
2069 /* Note that this check_string on the version will also print a message
2070 * when only the minor version differs. But we only print a warning
2071 * message further down with reproducibilityRequested=TRUE.
2073 gmx_bool versionDiffers = FALSE;
2074 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2076 gmx_bool precisionDiffers = FALSE;
2077 check_int (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2078 if (precisionDiffers)
2080 const char msg_precision_difference[] =
2081 "You are continuing a simulation with a different precision. Not matching\n"
2082 "single/double precision will lead to precision or performance loss.\n";
2085 fprintf(fplog, "%s\n", msg_precision_difference);
2089 gmx_bool mm = (versionDiffers || precisionDiffers);
2091 if (reproducibilityRequested)
2093 check_string(fplog, "Build time", BUILD_TIME, headerContents.btime, &mm);
2094 check_string(fplog, "Build user", BUILD_USER, headerContents.buser, &mm);
2095 check_string(fplog, "Build host", BUILD_HOST, headerContents.bhost, &mm);
2096 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2098 check_int (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2101 if (cr->nnodes > 1 && reproducibilityRequested)
2103 check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2105 int npp = cr->nnodes;
2106 if (cr->npmenodes >= 0)
2108 npp -= cr->npmenodes;
2110 int npp_f = headerContents.nnodes;
2111 if (headerContents.npme >= 0)
2113 npp_f -= headerContents.npme;
2117 check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2118 check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2119 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2125 /* Gromacs should be able to continue from checkpoints between
2126 * different patch level versions, but we do not guarantee
2127 * compatibility between different major/minor versions - check this.
2131 sscanf(gmx_version(), "%5d", &gmx_major);
2132 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2133 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2135 const char msg_major_version_difference[] =
2136 "The current GROMACS major version is not identical to the one that\n"
2137 "generated the checkpoint file. In principle GROMACS does not support\n"
2138 "continuation from checkpoints between different versions, so we advise\n"
2139 "against this. If you still want to try your luck we recommend that you use\n"
2140 "the -noappend flag to keep your output files from the two versions separate.\n"
2141 "This might also work around errors where the output fields in the energy\n"
2142 "file have changed between the different versions.\n";
2144 const char msg_mismatch_notice[] =
2145 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2146 "Continuation is exact, but not guaranteed to be binary identical.\n";
2148 if (majorVersionDiffers)
2152 fprintf(fplog, "%s\n", msg_major_version_difference);
2155 else if (reproducibilityRequested)
2157 /* Major & minor versions match at least, but something is different. */
2160 fprintf(fplog, "%s\n", msg_mismatch_notice);
2166 static void read_checkpoint(const char *fn, FILE **pfplog,
2167 const t_commrec *cr,
2170 int *init_fep_state,
2171 CheckpointHeaderContents *headerContents,
2172 t_state *state, gmx_bool *bReadEkin,
2173 ObservablesHistory *observablesHistory,
2174 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend,
2175 gmx_bool reproducibilityRequested)
2179 char buf[STEPSTRSIZE];
2181 t_fileio *chksum_file;
2182 FILE * fplog = *pfplog;
2183 unsigned char digest[16];
2184 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2185 struct flock fl; /* don't initialize here: the struct order is OS
2189 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2190 fl.l_type = F_WRLCK;
2191 fl.l_whence = SEEK_SET;
2197 fp = gmx_fio_open(fn, "r");
2198 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2200 if (bAppendOutputFiles &&
2201 headerContents->file_version >= 13 && headerContents->double_prec != GMX_DOUBLE)
2203 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
2206 /* This will not be written if we do appending, since fplog is still NULL then */
2209 fprintf(fplog, "\n");
2210 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2211 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2212 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2213 fprintf(fplog, " GROMACS build time: %s\n", headerContents->btime);
2214 fprintf(fplog, " GROMACS build user: %s\n", headerContents->buser);
2215 fprintf(fplog, " GROMACS build host: %s\n", headerContents->bhost);
2216 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2217 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2218 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2219 fprintf(fplog, " time: %f\n", headerContents->t);
2220 fprintf(fplog, "\n");
2223 if (headerContents->natoms != state->natoms)
2225 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2227 if (headerContents->ngtc != state->ngtc)
2229 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2231 if (headerContents->nnhpres != state->nnhpres)
2233 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2236 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2237 if (headerContents->nlambda != nlambdaHistory)
2239 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2242 init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2243 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2245 if (headerContents->eIntegrator != eIntegrator)
2247 gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2250 if (headerContents->flags_state != state->flags)
2252 gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2257 check_match(fplog, cr, dd_nc, *headerContents,
2258 reproducibilityRequested);
2261 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2262 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
2263 Investigate for 5.0. */
2268 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2273 *bReadEkin = ((headerContents->flags_eks & (1<<eeksEKINH)) ||
2274 (headerContents->flags_eks & (1<<eeksEKINF)) ||
2275 (headerContents->flags_eks & (1<<eeksEKINO)) ||
2276 ((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2277 (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2278 (headerContents->flags_eks & (1<<eeksVSCALE))));
2280 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2282 observablesHistory->energyHistory = gmx::compat::make_unique<energyhistory_t>();
2284 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2285 headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2291 if (headerContents->file_version < 6)
2293 gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2296 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2302 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2304 observablesHistory->edsamHistory = gmx::compat::make_unique<edsamhistory_t>(edsamhistory_t {});
2306 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2312 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2314 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2316 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2317 headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2323 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2325 observablesHistory->swapHistory = gmx::compat::make_unique<swaphistory_t>(swaphistory_t {});
2327 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2333 std::vector<gmx_file_position_t> outputfiles;
2334 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2340 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2345 if (gmx_fio_close(fp) != 0)
2347 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2350 /* If the user wants to append to output files,
2351 * we use the file pointer positions of the output files stored
2352 * in the checkpoint file and truncate the files such that any frames
2353 * written after the checkpoint time are removed.
2354 * All files are md5sum checked such that we can be sure that
2355 * we do not truncate other (maybe imprortant) files.
2357 if (bAppendOutputFiles)
2359 if (outputfiles.empty())
2361 gmx_fatal(FARGS, "No names of output files were recorded in the checkpoint");
2363 if (fn2ftp(outputfiles[0].filename) != efLOG)
2365 /* make sure first file is log file so that it is OK to use it for
2368 gmx_fatal(FARGS, "The first output file should always be the log "
2369 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2371 bool firstFile = true;
2372 for (const auto &outputfile : outputfiles)
2374 if (outputfile.offset < 0)
2376 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2377 "is larger than 2 GB, but mdrun did not support large file"
2378 " offsets. Can not append. Run mdrun with -noappend",
2379 outputfile.filename);
2382 chksum_file = gmx_fio_open(outputfile.filename, "a");
2385 chksum_file = gmx_fio_open(outputfile.filename, "r+");
2390 /* Note that there are systems where the lock operation
2391 * will succeed, but a second process can also lock the file.
2392 * We should probably try to detect this.
2394 #if defined __native_client__
2398 #elif GMX_NATIVE_WINDOWS
2399 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2401 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2404 if (errno == ENOSYS)
2408 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2414 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfile.filename);
2418 else if (errno == EACCES || errno == EAGAIN)
2420 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2421 "simulation?", outputfile.filename);
2425 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2426 outputfile.filename, std::strerror(errno));
2431 /* compute md5 chksum */
2432 if (outputfile.chksum_size != -1)
2434 if (gmx_fio_get_file_md5(chksum_file, outputfile.offset,
2435 digest) != outputfile.chksum_size) /*at the end of the call the file position is at the end of the file*/
2437 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2438 outputfile.chksum_size,
2439 outputfile.filename);
2442 /* log file needs to be seeked in case we need to truncate
2443 * (other files are truncated below)*/
2446 if (gmx_fio_seek(chksum_file, outputfile.offset))
2448 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", std::strerror(errno));
2453 /* open log file here - so that lock is never lifted
2454 * after chksum is calculated */
2457 *pfplog = gmx_fio_getfp(chksum_file);
2461 gmx_fio_close(chksum_file);
2464 /* compare md5 chksum */
2465 if (outputfile.chksum_size != -1 &&
2466 memcmp(digest, outputfile.chksum, 16) != 0)
2470 fprintf(debug, "chksum for %s: ", outputfile.filename);
2471 for (j = 0; j < 16; j++)
2473 fprintf(debug, "%02x", digest[j]);
2475 fprintf(debug, "\n");
2477 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2478 outputfile.filename);
2483 if (!firstFile) /* log file is already seeked to correct position */
2485 #if !GMX_NATIVE_WINDOWS || !defined(GMX_FAHCORE)
2486 /* For FAHCORE, we do this elsewhere*/
2487 rc = gmx_truncate(outputfile.filename, outputfile.offset);
2490 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfile.filename);
2500 void load_checkpoint(const char *fn, FILE **fplog,
2501 const t_commrec *cr, const ivec dd_nc,
2502 t_inputrec *ir, t_state *state,
2503 gmx_bool *bReadEkin,
2504 ObservablesHistory *observablesHistory,
2505 gmx_bool bAppend, gmx_bool bForceAppend,
2506 gmx_bool reproducibilityRequested)
2508 CheckpointHeaderContents headerContents;
2511 /* Read the state from the checkpoint file */
2512 read_checkpoint(fn, fplog,
2514 ir->eI, &(ir->fepvals->init_fep_state),
2516 state, bReadEkin, observablesHistory,
2517 bAppend, bForceAppend,
2518 reproducibilityRequested);
2522 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2523 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2525 ir->bContinuation = TRUE;
2526 if (ir->nsteps >= 0)
2528 ir->nsteps += ir->init_step - headerContents.step;
2530 ir->init_step = headerContents.step;
2531 ir->simulation_part = headerContents.simulation_part + 1;
2534 void read_checkpoint_part_and_step(const char *filename,
2535 int *simulation_part,
2540 if (filename == nullptr ||
2541 !gmx_fexist(filename) ||
2542 (!(fp = gmx_fio_open(filename, "r"))))
2544 *simulation_part = 0;
2549 CheckpointHeaderContents headerContents;
2550 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2552 *simulation_part = headerContents.simulation_part;
2553 *step = headerContents.step;
2556 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2557 gmx_int64_t *step, double *t, t_state *state,
2558 std::vector<gmx_file_position_t> *outputfiles)
2562 CheckpointHeaderContents headerContents;
2563 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2564 *simulation_part = headerContents.simulation_part;
2565 *step = headerContents.step;
2566 *t = headerContents.t;
2567 state->natoms = headerContents.natoms;
2568 state->ngtc = headerContents.ngtc;
2569 state->nnhpres = headerContents.nnhpres;
2570 state->nhchainlength = headerContents.nhchainlength;
2571 state->flags = headerContents.flags_state;
2573 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2578 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2584 energyhistory_t enerhist;
2585 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2586 headerContents.flags_enh, &enerhist, nullptr);
2591 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2597 edsamhistory_t edsamhist = {};
2598 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2604 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2605 headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2611 swaphistory_t swaphist = {};
2612 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2618 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2620 nullptr, headerContents.file_version);
2627 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2634 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2637 int simulation_part;
2641 std::vector<gmx_file_position_t> outputfiles;
2642 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, &outputfiles);
2644 fr->natoms = state.natoms;
2646 fr->step = gmx_int64_to_int(step,
2647 "conversion of checkpoint to trajectory");
2651 fr->lambda = state.lambda[efptFEP];
2652 fr->fep_state = state.fep_state;
2654 fr->bX = (state.flags & (1<<estX));
2657 fr->x = makeRvecArray(state.x, state.natoms);
2659 fr->bV = (state.flags & (1<<estV));
2662 fr->v = makeRvecArray(state.v, state.natoms);
2665 fr->bBox = (state.flags & (1<<estBOX));
2668 copy_mat(state.box, fr->box);
2672 void list_checkpoint(const char *fn, FILE *out)
2679 fp = gmx_fio_open(fn, "r");
2680 CheckpointHeaderContents headerContents;
2681 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2682 state.natoms = headerContents.natoms;
2683 state.ngtc = headerContents.ngtc;
2684 state.nnhpres = headerContents.nnhpres;
2685 state.nhchainlength = headerContents.nhchainlength;
2686 state.flags = headerContents.flags_state;
2687 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2692 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2698 energyhistory_t enerhist;
2699 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2700 headerContents.flags_enh, &enerhist, out);
2704 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2705 headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2710 edsamhistory_t edsamhist = {};
2711 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2716 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2717 headerContents.flags_awhh, state.awhHistory.get(), out);
2722 swaphistory_t swaphist = {};
2723 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2728 std::vector<gmx_file_position_t> outputfiles;
2729 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2734 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2741 if (gmx_fio_close(fp) != 0)
2743 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2747 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2749 read_checkpoint_simulation_part_and_filenames(t_fileio *fp,
2750 int *simulation_part,
2751 std::vector<gmx_file_position_t> *outputfiles)
2753 gmx_int64_t step = 0;
2757 read_checkpoint_data(fp, simulation_part, &step, &t, &state, outputfiles);
2758 if (gmx_fio_close(fp) != 0)
2760 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");