2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
49 #if GMX_NATIVE_WINDOWS
51 #include <sys/locking.h>
57 #include "buildinfo.h"
58 #include "gromacs/compat/make_unique.h"
59 #include "gromacs/fileio/filetypes.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/gmxfio-xdr.h"
62 #include "gromacs/fileio/xdr_datatype.h"
63 #include "gromacs/fileio/xdrf.h"
64 #include "gromacs/gmxlib/network.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vecdump.h"
67 #include "gromacs/math/vectypes.h"
68 #include "gromacs/mdtypes/awh-correlation-history.h"
69 #include "gromacs/mdtypes/awh-history.h"
70 #include "gromacs/mdtypes/commrec.h"
71 #include "gromacs/mdtypes/df_history.h"
72 #include "gromacs/mdtypes/edsamhistory.h"
73 #include "gromacs/mdtypes/energyhistory.h"
74 #include "gromacs/mdtypes/inputrec.h"
75 #include "gromacs/mdtypes/md_enums.h"
76 #include "gromacs/mdtypes/observableshistory.h"
77 #include "gromacs/mdtypes/state.h"
78 #include "gromacs/mdtypes/swaphistory.h"
79 #include "gromacs/trajectory/trajectoryframe.h"
80 #include "gromacs/utility/arrayref.h"
81 #include "gromacs/utility/baseversion.h"
82 #include "gromacs/utility/cstringutil.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/futil.h"
85 #include "gromacs/utility/gmxassert.h"
86 #include "gromacs/utility/int64_to_int.h"
87 #include "gromacs/utility/programcontext.h"
88 #include "gromacs/utility/smalloc.h"
89 #include "gromacs/utility/sysinfo.h"
90 #include "gromacs/utility/txtdump.h"
96 #define CPT_MAGIC1 171817
97 #define CPT_MAGIC2 171819
98 #define CPTSTRLEN 1024
100 /*! \brief Enum of values that describe the contents of a cpt file
101 * whose format matches a version number
103 * The enum helps the code be more self-documenting and ensure merges
104 * do not silently resolve when two patches make the same bump. When
105 * adding new functionality, add a new element just above cptv_Count
106 * in this enumeration, and write code below that does the right thing
107 * according to the value of file_version.
110 cptv_Unknown = 17, /**< Version before numbering scheme */
111 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
112 cptv_Count /**< the total number of cptv versions */
115 /*! \brief Version number of the file format written to checkpoint
116 * files by this version of the code.
118 * cpt_version should normally only be changed, via adding a new field
119 * to cptv enumeration, when the header or footer format changes.
121 * The state data format itself is backward and forward compatible.
122 * But old code can not read a new entry that is present in the file
123 * (but can read a new format when new entries are not present).
125 * The cpt_version increases whenever the file format in the main
126 * development branch changes, due to an extension of the cptv enum above.
127 * Backward compatibility for reading old run input files is maintained
128 * by checking this version number against that of the file and then using
129 * the correct code path. */
130 static const int cpt_version = cptv_Count - 1;
133 const char *est_names[estNR] =
136 "box", "box-rel", "box-v", "pres_prev",
137 "nosehoover-xi", "thermostat-integral",
138 "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
139 "disre_initf", "disre_rm3tav",
140 "orire_initf", "orire_Dtav",
141 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
146 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
149 static const char *eeks_names[eeksNR] =
151 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
152 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
156 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
157 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
158 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
159 eenhENERGY_DELTA_H_NN,
160 eenhENERGY_DELTA_H_LIST,
161 eenhENERGY_DELTA_H_STARTTIME,
162 eenhENERGY_DELTA_H_STARTLAMBDA,
166 static const char *eenh_names[eenhNR] =
168 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
169 "energy_sum_sim", "energy_nsum_sim",
170 "energy_nsteps", "energy_nsteps_sim",
172 "energy_delta_h_list",
173 "energy_delta_h_start_time",
174 "energy_delta_h_start_lambda"
177 /* free energy history variables -- need to be preserved over checkpoint */
179 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
180 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
182 /* free energy history variable names */
183 static const char *edfh_names[edfhNR] =
185 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
186 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
189 /* AWH biasing history variables */
192 eawhhEQUILIBRATEHISTOGRAM,
195 eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
197 eawhhLOGSCALEDSAMPLEWEIGHT,
199 eawhhFORCECORRELATIONGRID,
203 static const char *eawhh_names[eawhhNR] =
206 "awh_equilibrateHistogram",
209 "awh_coordpoint", "awh_umbrellaGridpoint",
211 "awh_logScaledSampleWeight",
213 "awh_forceCorrelationGrid"
216 //! Higher level vector element type, only used for formatting checkpoint dumps
217 enum class CptElementType
219 integer, //!< integer
220 real, //!< float or double, not linked to precision of type real
221 real3, //!< float[3] or double[3], not linked to precision of type real
222 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
225 //! \brief Parts of the checkpoint state, only used for reporting
228 microState, //!< The microstate of the simulated system
229 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
230 energyHistory, //!< Energy observable statistics
231 freeEnergyHistory, //!< Free-energy state and observable statistics
232 accWeightHistogram //!< Accelerated weight histogram method state
235 //! \brief Return the name of a checkpoint entry based on part and part entry
236 static const char *entryName(StatePart part, int ecpt)
240 case StatePart::microState: return est_names [ecpt];
241 case StatePart::kineticEnergy: return eeks_names[ecpt];
242 case StatePart::energyHistory: return eenh_names[ecpt];
243 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
244 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
250 static void cp_warning(FILE *fp)
252 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
255 [[ noreturn ]] static void cp_error()
257 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
260 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
262 char *data = s.data();
263 if (xdr_string(xd, &data, s.size()) == 0)
269 fprintf(list, "%s = %s\n", desc, data);
273 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
275 if (xdr_int(xd, i) == 0)
281 fprintf(list, "%s = %d\n", desc, *i);
286 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
290 fprintf(list, "%s = ", desc);
293 for (int j = 0; j < n && res; j++)
295 res &= xdr_u_char(xd, &i[j]);
298 fprintf(list, "%02x", i[j]);
313 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
315 if (do_cpt_int(xd, desc, i, list) < 0)
321 static void do_cpt_bool_err(XDR *xd, const char *desc, bool *b, FILE *list)
323 int i = static_cast<int>(*b);
325 if (do_cpt_int(xd, desc, &i, list) < 0)
333 static void do_cpt_step_err(XDR *xd, const char *desc, int64_t *i, FILE *list)
335 char buf[STEPSTRSIZE];
337 if (xdr_int64(xd, i) == 0)
343 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
347 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
349 if (xdr_double(xd, f) == 0)
355 fprintf(list, "%s = %f\n", desc, *f);
359 static void do_cpt_real_err(XDR *xd, real *f)
362 bool_t res = xdr_double(xd, f);
364 bool_t res = xdr_float(xd, f);
372 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
374 for (int i = 0; i < n; i++)
376 for (int j = 0; j < DIM; j++)
378 do_cpt_real_err(xd, &f[i][j]);
384 pr_rvecs(list, 0, desc, f, n);
388 template <typename T>
396 static const int value = xdr_datatype_int;
400 struct xdr_type<float>
402 static const int value = xdr_datatype_float;
406 struct xdr_type<double>
408 static const int value = xdr_datatype_double;
411 //! \brief Returns size in byte of an xdr_datatype
412 static inline unsigned int sizeOfXdrType(int xdrType)
416 case xdr_datatype_int:
418 case xdr_datatype_float:
419 return sizeof(float);
420 case xdr_datatype_double:
421 return sizeof(double);
422 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
428 //! \brief Returns the XDR process function for i/o of an XDR type
429 static inline xdrproc_t xdrProc(int xdrType)
433 case xdr_datatype_int:
434 return reinterpret_cast<xdrproc_t>(xdr_int);
435 case xdr_datatype_float:
436 return reinterpret_cast<xdrproc_t>(xdr_float);
437 case xdr_datatype_double:
438 return reinterpret_cast<xdrproc_t>(xdr_double);
439 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
445 /*! \brief Lists or only reads an xdr vector from checkpoint file
447 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
448 * The header for the print is set by \p part and \p ecpt.
449 * The formatting of the printing is set by \p cptElementType.
450 * When list==NULL only reads the elements.
452 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
453 FILE *list, CptElementType cptElementType)
457 const unsigned int elemSize = sizeOfXdrType(xdrType);
458 std::vector<char> data(nf*elemSize);
459 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
465 case xdr_datatype_int:
466 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
468 case xdr_datatype_float:
470 if (cptElementType == CptElementType::real3)
472 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
477 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
478 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
481 case xdr_datatype_double:
483 if (cptElementType == CptElementType::real3)
485 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
490 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
491 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
494 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
501 //! \brief Convert a double array, typed char*, to float
502 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
504 const double *d = reinterpret_cast<const double *>(c);
505 for (int i = 0; i < n; i++)
507 v[i] = static_cast<float>(d[i]);
511 //! \brief Convert a float array, typed char*, to double
512 static void convertArrayRealPrecision(const char *c, double *v, int n)
514 const float *f = reinterpret_cast<const float *>(c);
515 for (int i = 0; i < n; i++)
517 v[i] = static_cast<double>(f[i]);
521 //! \brief Generate an error for trying to convert to integer
522 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
524 GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
527 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
529 * This is the only routine that does the actually i/o of real vector,
530 * all other routines are intermediate level routines for specific real
531 * data types, calling this routine.
532 * Currently this routine is (too) complex, since it handles both real *
533 * and std::vector<real>. Using real * is deprecated and this routine
534 * will simplify a lot when only std::vector needs to be supported.
536 * The routine is generic to vectors with different allocators,
537 * because as part of reading a checkpoint there are vectors whose
538 * size is not known until reading has progressed far enough, so a
539 * resize method must be called.
541 * When not listing, we use either v or vector, depending on which is !=NULL.
542 * If nval >= 0, nval is used; on read this should match the passed value.
543 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
544 * the value is stored in nptr
546 template<typename T, typename AllocatorType>
547 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
549 T **v, std::vector<T, AllocatorType> *vector,
550 FILE *list, CptElementType cptElementType)
552 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
556 int numElemInTheFile;
561 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
562 numElemInTheFile = nval;
568 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
569 numElemInTheFile = *nptr;
573 numElemInTheFile = vector->size();
577 /* Read/write the vector element count */
578 res = xdr_int(xd, &numElemInTheFile);
583 /* Read/write the element data type */
584 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
585 int xdrTypeInTheFile = xdrTypeInTheCode;
586 res = xdr_int(xd, &xdrTypeInTheFile);
592 if (list == nullptr && (sflags & (1 << ecpt)))
596 if (numElemInTheFile != nval)
598 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
601 else if (nptr != nullptr)
603 *nptr = numElemInTheFile;
606 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
610 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
611 entryName(part, ecpt),
612 xdr_datatype_names[xdrTypeInTheCode],
613 xdr_datatype_names[xdrTypeInTheFile]);
615 /* Matching int and real should never occur, but check anyhow */
616 if (xdrTypeInTheFile == xdr_datatype_int ||
617 xdrTypeInTheCode == xdr_datatype_int)
619 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
628 snew(*v, numElemInTheFile);
634 /* This conditional ensures that we don't resize on write.
635 * In particular in the state where this code was written
636 * PaddedRVecVector has a size of numElemInThefile and we
637 * don't want to lose that padding here.
639 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
641 vector->resize(numElemInTheFile);
649 vChar = reinterpret_cast<char *>(vp);
653 snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
655 res = xdr_vector(xd, vChar,
656 numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
657 xdrProc(xdrTypeInTheFile));
665 /* In the old code float-double conversion came for free.
666 * In the new code we still support it, mainly because
667 * the tip4p_continue regression test makes use of this.
668 * It's an open question if we do or don't want to allow this.
670 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
676 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
677 list, cptElementType);
683 //! \brief Read/Write a std::vector.
684 template <typename T>
685 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
686 std::vector<T> *vector, FILE *list)
688 return doVectorLow<T>(xd, part, ecpt, sflags, -1, nullptr, nullptr, vector, list, CptElementType::real);
691 //! \brief Read/Write an ArrayRef<real>.
692 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
693 gmx::ArrayRef<real> vector, FILE *list)
695 real *v_real = vector.data();
696 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
699 //! \brief Read/Write a vector whose value_type is RVec and whose allocator is \c AllocatorType.
700 template <typename AllocatorType>
701 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
702 std::vector<gmx::RVec, AllocatorType> *v, int numAtoms, FILE *list)
704 const int numReals = numAtoms*DIM;
708 GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
709 GMX_RELEASE_ASSERT(v->size() >= static_cast<size_t>(numAtoms), "v should have sufficient size for numAtoms");
711 real *v_real = (*v)[0];
713 // PaddedRVecVector is padded beyond numAtoms, we should only write
715 gmx::ArrayRef<real> ref(v_real, v_real + numReals);
717 return doRealArrayRef(xd, part, ecpt, sflags, ref, list);
721 // Use the rebind facility to change the value_type of the
722 // allocator from RVec to real.
723 using realAllocator = typename std::allocator_traits<AllocatorType>::template rebind_alloc<real>;
724 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
728 /* This function stores n along with the reals for reading,
729 * but on reading it assumes that n matches the value in the checkpoint file,
730 * a fatal error is generated when this is not the case.
732 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
733 int n, real **v, FILE *list)
735 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
738 /* This function does the same as do_cpte_reals,
739 * except that on reading it ignores the passed value of *n
740 * and stores the value read from the checkpoint file in *n.
742 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
743 int *n, real **v, FILE *list)
745 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
748 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
751 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
754 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
755 int n, int **v, FILE *list)
757 return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
760 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
763 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
766 static int do_cpte_bool(XDR *xd, StatePart part, int ecpt, int sflags,
769 int i = static_cast<int>(*b);
770 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
775 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
776 int n, double **v, FILE *list)
778 return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
781 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
782 double *r, FILE *list)
784 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
787 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
788 matrix v, FILE *list)
794 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
795 DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
797 if (list && ret == 0)
799 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
806 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
807 int n, real **v, FILE *list)
811 char name[CPTSTRLEN];
818 for (i = 0; i < n; i++)
820 reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
821 if (list && reti == 0)
823 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
824 pr_reals(list, 0, name, v[i], n);
834 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
835 int n, matrix **v, FILE *list)
838 matrix *vp, *va = nullptr;
844 res = xdr_int(xd, &nf);
849 if (list == nullptr && nf != n)
851 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
853 if (list || !(sflags & (1<<ecpt)))
866 snew(vr, nf*DIM*DIM);
867 for (i = 0; i < nf; i++)
869 for (j = 0; j < DIM; j++)
871 for (k = 0; k < DIM; k++)
873 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
877 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
878 nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
879 CptElementType::matrix3x3);
880 for (i = 0; i < nf; i++)
882 for (j = 0; j < DIM; j++)
884 for (k = 0; k < DIM; k++)
886 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
892 if (list && ret == 0)
894 for (i = 0; i < nf; i++)
896 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
907 // TODO Expand this into being a container of all data for
908 // serialization of a checkpoint, which can be stored by the caller
909 // (e.g. so that mdrun doesn't have to open the checkpoint twice).
910 // This will separate issues of allocation from those of
911 // serialization, help separate comparison from reading, and have
912 // better defined transformation functions to/from trajectory frame
915 // Several fields were once written to checkpoint file headers, but
916 // have been removed. So that old files can continue to be read,
917 // the names of such fields contain the string "_UNUSED" so that it
918 // is clear they should not be used.
919 struct CheckpointHeaderContents
922 char version[CPTSTRLEN];
923 char btime_UNUSED[CPTSTRLEN];
924 char buser_UNUSED[CPTSTRLEN];
925 char bhost_UNUSED[CPTSTRLEN];
927 char fprog[CPTSTRLEN];
928 char ftime[CPTSTRLEN];
950 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
951 CheckpointHeaderContents *contents)
964 res = xdr_int(xd, &magic);
967 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
969 if (magic != CPT_MAGIC1)
971 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
972 "The checkpoint file is corrupted or not a checkpoint file",
978 gmx_gethostname(fhost, 255);
980 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
981 // The following fields are no longer ever written with meaningful
982 // content, but because they precede the file version, there is no
983 // good way for new code to read the old and new formats, nor a
984 // good way for old code to avoid giving an error while reading a
985 // new format. So we read and write a field that no longer has a
987 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
988 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
989 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
990 do_cpt_string_err(xd, "generating program", contents->fprog, list);
991 do_cpt_string_err(xd, "generation time", contents->ftime, list);
992 contents->file_version = cpt_version;
993 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
994 if (contents->file_version > cpt_version)
996 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
998 if (contents->file_version >= 13)
1000 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1004 contents->double_prec = -1;
1006 if (contents->file_version >= 12)
1008 do_cpt_string_err(xd, "generating host", fhost, list);
1010 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1011 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1012 if (contents->file_version >= 10)
1014 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1018 contents->nhchainlength = 1;
1020 if (contents->file_version >= 11)
1022 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1026 contents->nnhpres = 0;
1028 if (contents->file_version >= 14)
1030 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1034 contents->nlambda = 0;
1036 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1037 if (contents->file_version >= 3)
1039 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1043 contents->simulation_part = 1;
1045 if (contents->file_version >= 5)
1047 do_cpt_step_err(xd, "step", &contents->step, list);
1052 do_cpt_int_err(xd, "step", &idum, list);
1053 contents->step = static_cast<int64_t>(idum);
1055 do_cpt_double_err(xd, "t", &contents->t, list);
1056 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1057 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1058 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1059 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1060 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1061 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1062 if (contents->file_version >= 4)
1064 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1065 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1069 contents->flags_eks = 0;
1070 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV+1));
1071 contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1072 (1<<(estORIRE_DTAV+2)) |
1073 (1<<(estORIRE_DTAV+3))));
1075 if (contents->file_version >= 14)
1077 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1081 contents->flags_dfh = 0;
1084 if (contents->file_version >= 15)
1086 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1093 if (contents->file_version >= 16)
1095 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1099 contents->eSwapCoords = eswapNO;
1102 if (contents->file_version >= 17)
1104 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1108 contents->flags_awhh = 0;
1112 static int do_cpt_footer(XDR *xd, int file_version)
1117 if (file_version >= 2)
1120 res = xdr_int(xd, &magic);
1125 if (magic != CPT_MAGIC2)
1134 static int do_cpt_state(XDR *xd,
1135 int fflags, t_state *state,
1139 const StatePart part = StatePart::microState;
1140 const int sflags = state->flags;
1141 // If reading, state->natoms was probably just read, so
1142 // allocations need to be managed. If writing, this won't change
1143 // anything that matters.
1144 state_change_natoms(state, state->natoms);
1145 for (int i = 0; (i < estNR && ret == 0); i++)
1147 if (fflags & (1<<i))
1151 case estLAMBDA: ret = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1152 case estFEPSTATE: ret = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1153 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1154 case estBOX_REL: ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1155 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1156 case estPRES_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1157 case estSVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1158 case estFVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1159 case estNH_XI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1160 case estNH_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1161 case estNHPRES_XI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1162 case estNHPRES_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1163 case estTHERM_INT: ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1164 case estBAROS_INT: ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1165 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1166 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1167 case estX: ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1168 case estV: ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1169 /* The RNG entries are no longer written,
1170 * the next 4 lines are only for reading old files.
1172 case estLD_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1173 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1174 case estMC_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1175 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1176 case estDISRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1177 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1178 case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1179 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1181 gmx_fatal(FARGS, "Unknown state entry %d\n"
1182 "You are reading a checkpoint file written by different code, which is not supported", i);
1190 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1195 const StatePart part = StatePart::kineticEnergy;
1196 for (int i = 0; (i < eeksNR && ret == 0); i++)
1198 if (fflags & (1<<i))
1203 case eeksEKIN_N: ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1204 case eeksEKINH: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1205 case eeksEKINF: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1206 case eeksEKINO: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1207 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1208 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1209 case eeksVSCALE: ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1210 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1211 case eeksDEKINDL: ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1212 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1214 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1215 "You are probably reading a new checkpoint file with old code", i);
1224 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1225 int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1227 int swap_cpt_version = 2;
1229 if (eSwapCoords == eswapNO)
1234 swapstate->bFromCpt = bRead;
1235 swapstate->eSwapCoords = eSwapCoords;
1237 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1238 if (bRead && swap_cpt_version < 2)
1240 gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1241 "of the ion/water position swapping protocol.\n");
1244 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1246 /* When reading, init_swapcoords has not been called yet,
1247 * so we have to allocate memory first. */
1248 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1251 snew(swapstate->ionType, swapstate->nIonTypes);
1254 for (int ic = 0; ic < eCompNR; ic++)
1256 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1258 swapstateIons_t *gs = &swapstate->ionType[ii];
1262 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1266 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1271 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1275 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1278 if (bRead && (nullptr == gs->nMolPast[ic]) )
1280 snew(gs->nMolPast[ic], swapstate->nAverage);
1283 for (int j = 0; j < swapstate->nAverage; j++)
1287 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1291 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1297 /* Ion flux per channel */
1298 for (int ic = 0; ic < eChanNR; ic++)
1300 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1302 swapstateIons_t *gs = &swapstate->ionType[ii];
1306 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1310 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1315 /* Ion flux leakage */
1318 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1322 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1326 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1328 swapstateIons_t *gs = &swapstate->ionType[ii];
1330 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1334 snew(gs->channel_label, gs->nMol);
1335 snew(gs->comp_from, gs->nMol);
1338 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1339 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1342 /* Save the last known whole positions to checkpoint
1343 * file to be able to also make multimeric channels whole in PBC */
1344 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1345 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1348 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1349 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1350 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1351 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1355 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1356 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1363 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1364 int fflags, energyhistory_t *enerhist,
1374 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1376 /* This is stored/read for backward compatibility */
1377 int energyHistoryNumEnergies = 0;
1380 enerhist->nsteps = 0;
1382 enerhist->nsteps_sim = 0;
1383 enerhist->nsum_sim = 0;
1385 else if (enerhist != nullptr)
1387 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1390 delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1391 const StatePart part = StatePart::energyHistory;
1392 for (int i = 0; (i < eenhNR && ret == 0); i++)
1394 if (fflags & (1<<i))
1398 case eenhENERGY_N: ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1399 case eenhENERGY_AVER: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1400 case eenhENERGY_SUM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1401 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1402 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1403 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1404 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1405 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1406 case eenhENERGY_DELTA_H_NN:
1409 if (!bRead && deltaH != nullptr)
1411 numDeltaH = deltaH->dh.size();
1413 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1416 if (deltaH == nullptr)
1418 enerhist->deltaHForeignLambdas = gmx::compat::make_unique<delta_h_history_t>();
1419 deltaH = enerhist->deltaHForeignLambdas.get();
1421 deltaH->dh.resize(numDeltaH);
1422 deltaH->start_lambda_set = FALSE;
1426 case eenhENERGY_DELTA_H_LIST:
1427 for (auto dh : deltaH->dh)
1429 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1432 case eenhENERGY_DELTA_H_STARTTIME:
1433 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1434 case eenhENERGY_DELTA_H_STARTLAMBDA:
1435 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1437 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1438 "You are probably reading a new checkpoint file with old code", i);
1443 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1445 /* Assume we have an old file format and copy sum to sum_sim */
1446 enerhist->ener_sum_sim = enerhist->ener_sum;
1449 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1450 !(fflags & (1<<eenhENERGY_NSTEPS)))
1452 /* Assume we have an old file format and copy nsum to nsteps */
1453 enerhist->nsteps = enerhist->nsum;
1455 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1456 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1458 /* Assume we have an old file format and copy nsum to nsteps */
1459 enerhist->nsteps_sim = enerhist->nsum_sim;
1465 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1474 if (*dfhistPtr == nullptr)
1476 snew(*dfhistPtr, 1);
1477 (*dfhistPtr)->nlambda = nlambda;
1478 init_df_history(*dfhistPtr, nlambda);
1480 df_history_t *dfhist = *dfhistPtr;
1482 const StatePart part = StatePart::freeEnergyHistory;
1483 for (int i = 0; (i < edfhNR && ret == 0); i++)
1485 if (fflags & (1<<i))
1489 case edfhBEQUIL: ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list); break;
1490 case edfhNATLAMBDA: ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1491 case edfhWLHISTO: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1492 case edfhWLDELTA: ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1493 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1494 case edfhSUMDG: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1495 case edfhSUMMINVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1496 case edfhSUMVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1497 case edfhACCUMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1498 case edfhACCUMM: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1499 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1500 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1501 case edfhTIJ: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1502 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1505 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1506 "You are probably reading a new checkpoint file with old code", i);
1515 /* This function stores the last whole configuration of the reference and
1516 * average structure in the .cpt file
1518 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1519 int nED, edsamhistory_t *EDstate, FILE *list)
1526 EDstate->bFromCpt = bRead;
1529 /* When reading, init_edsam has not been called yet,
1530 * so we have to allocate memory first. */
1533 snew(EDstate->nref, EDstate->nED);
1534 snew(EDstate->old_sref, EDstate->nED);
1535 snew(EDstate->nav, EDstate->nED);
1536 snew(EDstate->old_sav, EDstate->nED);
1539 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1540 for (int i = 0; i < EDstate->nED; i++)
1544 /* Reference structure SREF */
1545 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1546 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1547 sprintf(buf, "ED%d x_ref", i+1);
1550 snew(EDstate->old_sref[i], EDstate->nref[i]);
1551 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1555 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1558 /* Average structure SAV */
1559 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1560 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1561 sprintf(buf, "ED%d x_av", i+1);
1564 snew(EDstate->old_sav[i], EDstate->nav[i]);
1565 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1569 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1576 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1577 gmx::CorrelationGridHistory *corrGrid,
1578 FILE *list, int eawhh)
1582 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1583 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1584 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1588 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1591 for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1593 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1594 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1595 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1596 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1597 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1598 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1599 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1600 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1601 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1602 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1603 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1609 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1610 int fflags, gmx::AwhBiasHistory *biasHistory,
1615 gmx::AwhBiasStateHistory *state = &biasHistory->state;
1616 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1618 if (fflags & (1<<i))
1622 case eawhhIN_INITIAL:
1623 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list); break;
1624 case eawhhEQUILIBRATEHISTOGRAM:
1625 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1627 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1633 numPoints = biasHistory->pointState.size();
1635 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1638 biasHistory->pointState.resize(numPoints);
1642 case eawhhCOORDPOINT:
1643 for (auto &psh : biasHistory->pointState)
1645 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1646 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1647 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1648 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1649 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1650 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1651 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1652 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1653 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1654 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1655 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1658 case eawhhUMBRELLAGRIDPOINT:
1659 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1660 case eawhhUPDATELIST:
1661 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1662 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1664 case eawhhLOGSCALEDSAMPLEWEIGHT:
1665 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1666 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1668 case eawhhNUMUPDATES:
1669 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1671 case eawhhFORCECORRELATIONGRID:
1672 ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1675 gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1683 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1684 int fflags, gmx::AwhHistory *awhHistory,
1691 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1693 if (awhHistory == nullptr)
1695 GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1697 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
1698 awhHistory = awhHistoryLocal.get();
1701 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1702 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1703 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1704 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1705 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1706 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1708 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1709 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1714 numBias = awhHistory->bias.size();
1716 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1720 awhHistory->bias.resize(numBias);
1722 for (auto &bias : awhHistory->bias)
1724 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1730 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1735 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1736 std::vector<gmx_file_position_t> *outputfiles,
1737 FILE *list, int file_version)
1740 gmx_off_t mask = 0xFFFFFFFFL;
1741 int offset_high, offset_low;
1742 std::array<char, CPTSTRLEN> buf;
1743 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1745 // Ensure that reading pre-allocates outputfiles, while writing
1746 // writes what is already there.
1747 int nfiles = outputfiles->size();
1748 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1754 outputfiles->resize(nfiles);
1757 for (auto &outputfile : *outputfiles)
1759 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1762 do_cpt_string_err(xd, "output filename", buf, list);
1763 std::strncpy(outputfile.filename, buf.data(), buf.size()-1);
1765 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1769 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1773 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1777 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1779 offset = outputfile.offset;
1787 offset_low = static_cast<int>(offset & mask);
1788 offset_high = static_cast<int>((offset >> 32) & mask);
1790 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1794 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1799 if (file_version >= 8)
1801 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize,
1806 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(), outputfile.checksum.data(), list) != 0)
1813 outputfile.checksumSize = -1;
1820 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1821 FILE *fplog, const t_commrec *cr,
1822 ivec domdecCells, int nppnodes,
1823 int eIntegrator, int simulation_part,
1824 gmx_bool bExpanded, int elamstats,
1825 int64_t step, double t,
1826 t_state *state, ObservablesHistory *observablesHistory)
1829 char *fntemp; /* the temporary checkpoint file name */
1831 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1834 if (DOMAINDECOMP(cr))
1836 npmenodes = cr->npmenodes;
1844 /* make the new temporary filename */
1845 snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1846 std::strcpy(fntemp, fn);
1847 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1848 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1849 std::strcat(fntemp, suffix);
1850 std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1852 /* if we can't rename, we just overwrite the cpt file.
1853 * dangerous if interrupted.
1855 snew(fntemp, std::strlen(fn));
1856 std::strcpy(fntemp, fn);
1858 std::string timebuf = gmx_format_current_time();
1862 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1863 gmx_step_str(step, buf), timebuf.c_str());
1866 /* Get offsets for open files */
1867 auto outputfiles = gmx_fio_get_output_file_positions();
1869 fp = gmx_fio_open(fntemp, "w");
1872 if (state->ekinstate.bUpToDate)
1875 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1876 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1877 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1884 energyhistory_t *enerhist = observablesHistory->energyHistory.get();
1886 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
1888 flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
1889 if (enerhist->nsum > 0)
1891 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1892 (1<<eenhENERGY_NSUM));
1894 if (enerhist->nsum_sim > 0)
1896 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
1898 if (enerhist->deltaHForeignLambdas != nullptr)
1900 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1901 (1<< eenhENERGY_DELTA_H_LIST) |
1902 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1903 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1910 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1911 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1914 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1916 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1918 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1919 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1928 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
1930 flags_awhh |= ( (1 << eawhhIN_INITIAL) |
1931 (1 << eawhhEQUILIBRATEHISTOGRAM) |
1932 (1 << eawhhHISTSIZE) |
1933 (1 << eawhhNPOINTS) |
1934 (1 << eawhhCOORDPOINT) |
1935 (1 << eawhhUMBRELLAGRIDPOINT) |
1936 (1 << eawhhUPDATELIST) |
1937 (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
1938 (1 << eawhhNUMUPDATES) |
1939 (1 << eawhhFORCECORRELATIONGRID));
1942 /* We can check many more things now (CPU, acceleration, etc), but
1943 * it is highly unlikely to have two separate builds with exactly
1944 * the same version, user, time, and build host!
1947 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
1949 edsamhistory_t *edsamhist = observablesHistory->edsamHistory.get();
1950 int nED = (edsamhist ? edsamhist->nED : 0);
1952 swaphistory_t *swaphist = observablesHistory->swapHistory.get();
1953 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
1955 CheckpointHeaderContents headerContents =
1957 0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
1958 eIntegrator, simulation_part, step, t, nppnodes,
1960 state->natoms, state->ngtc, state->nnhpres,
1961 state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh, flags_dfh, flags_awhh,
1964 std::strcpy(headerContents.version, gmx_version());
1965 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
1966 std::strcpy(headerContents.ftime, timebuf.c_str());
1967 if (DOMAINDECOMP(cr))
1969 copy_ivec(domdecCells, headerContents.dd_nc);
1972 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
1974 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0) ||
1975 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
1976 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0) ||
1977 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0) ||
1978 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0) ||
1979 (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
1980 (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
1981 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
1982 headerContents.file_version) < 0))
1984 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1987 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
1989 /* we really, REALLY, want to make sure to physically write the checkpoint,
1990 and all the files it depends on, out to disk. Because we've
1991 opened the checkpoint with gmx_fio_open(), it's in our list
1993 ret = gmx_fio_all_output_fsync();
1999 "Cannot fsync '%s'; maybe you are out of disk space?",
2000 gmx_fio_getname(ret));
2002 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2008 gmx_warning("%s", buf);
2012 if (gmx_fio_close(fp) != 0)
2014 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2017 /* we don't move the checkpoint if the user specified they didn't want it,
2018 or if the fsyncs failed */
2020 if (!bNumberAndKeep && !ret)
2024 /* Rename the previous checkpoint file */
2025 std::strcpy(buf, fn);
2026 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2027 std::strcat(buf, "_prev");
2028 std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2031 /* we copy here so that if something goes wrong between now and
2032 * the rename below, there's always a state.cpt.
2033 * If renames are atomic (such as in POSIX systems),
2034 * this copying should be unneccesary.
2036 gmx_file_copy(fn, buf, FALSE);
2037 /* We don't really care if this fails:
2038 * there's already a new checkpoint.
2043 gmx_file_rename(fn, buf);
2046 if (gmx_file_rename(fntemp, fn) != 0)
2048 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2051 #endif /* GMX_NO_RENAME */
2056 /*code for alternate checkpointing scheme. moved from top of loop over
2058 fcRequestCheckPoint();
2059 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2061 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2063 #endif /* end GMX_FAHCORE block */
2066 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2068 bool foundMismatch = (p != f);
2076 fprintf(fplog, " %s mismatch,\n", type);
2077 fprintf(fplog, " current program: %d\n", p);
2078 fprintf(fplog, " checkpoint file: %d\n", f);
2079 fprintf(fplog, "\n");
2083 static void check_string(FILE *fplog, const char *type, const char *p,
2084 const char *f, gmx_bool *mm)
2086 bool foundMismatch = (std::strcmp(p, f) != 0);
2094 fprintf(fplog, " %s mismatch,\n", type);
2095 fprintf(fplog, " current program: %s\n", p);
2096 fprintf(fplog, " checkpoint file: %s\n", f);
2097 fprintf(fplog, "\n");
2101 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2102 const CheckpointHeaderContents &headerContents,
2103 gmx_bool reproducibilityRequested)
2105 /* Note that this check_string on the version will also print a message
2106 * when only the minor version differs. But we only print a warning
2107 * message further down with reproducibilityRequested=TRUE.
2109 gmx_bool versionDiffers = FALSE;
2110 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2112 gmx_bool precisionDiffers = FALSE;
2113 check_int (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2114 if (precisionDiffers)
2116 const char msg_precision_difference[] =
2117 "You are continuing a simulation with a different precision. Not matching\n"
2118 "single/double precision will lead to precision or performance loss.\n";
2121 fprintf(fplog, "%s\n", msg_precision_difference);
2125 gmx_bool mm = (versionDiffers || precisionDiffers);
2127 if (reproducibilityRequested)
2129 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2131 check_int (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2134 if (cr->nnodes > 1 && reproducibilityRequested)
2136 check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2138 int npp = cr->nnodes;
2139 if (cr->npmenodes >= 0)
2141 npp -= cr->npmenodes;
2143 int npp_f = headerContents.nnodes;
2144 if (headerContents.npme >= 0)
2146 npp_f -= headerContents.npme;
2150 check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2151 check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2152 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2158 /* Gromacs should be able to continue from checkpoints between
2159 * different patch level versions, but we do not guarantee
2160 * compatibility between different major/minor versions - check this.
2164 sscanf(gmx_version(), "%5d", &gmx_major);
2165 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2166 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2168 const char msg_major_version_difference[] =
2169 "The current GROMACS major version is not identical to the one that\n"
2170 "generated the checkpoint file. In principle GROMACS does not support\n"
2171 "continuation from checkpoints between different versions, so we advise\n"
2172 "against this. If you still want to try your luck we recommend that you use\n"
2173 "the -noappend flag to keep your output files from the two versions separate.\n"
2174 "This might also work around errors where the output fields in the energy\n"
2175 "file have changed between the different versions.\n";
2177 const char msg_mismatch_notice[] =
2178 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2179 "Continuation is exact, but not guaranteed to be binary identical.\n";
2181 if (majorVersionDiffers)
2185 fprintf(fplog, "%s\n", msg_major_version_difference);
2188 else if (reproducibilityRequested)
2190 /* Major & minor versions match at least, but something is different. */
2193 fprintf(fplog, "%s\n", msg_mismatch_notice);
2199 static void read_checkpoint(const char *fn, FILE **pfplog,
2200 const t_commrec *cr,
2203 int *init_fep_state,
2204 CheckpointHeaderContents *headerContents,
2205 t_state *state, gmx_bool *bReadEkin,
2206 ObservablesHistory *observablesHistory,
2207 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend,
2208 gmx_bool reproducibilityRequested)
2212 char buf[STEPSTRSIZE];
2214 t_fileio *chksum_file;
2215 FILE * fplog = *pfplog;
2216 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2217 struct flock fl; /* don't initialize here: the struct order is OS
2221 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2222 fl.l_type = F_WRLCK;
2223 fl.l_whence = SEEK_SET;
2229 fp = gmx_fio_open(fn, "r");
2230 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2232 if (bAppendOutputFiles &&
2233 headerContents->file_version >= 13 && headerContents->double_prec != GMX_DOUBLE)
2235 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
2238 /* This will not be written if we do appending, since fplog is still NULL then */
2241 fprintf(fplog, "\n");
2242 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2243 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2244 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2245 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2246 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2247 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2248 fprintf(fplog, " time: %f\n", headerContents->t);
2249 fprintf(fplog, "\n");
2252 if (headerContents->natoms != state->natoms)
2254 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2256 if (headerContents->ngtc != state->ngtc)
2258 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2260 if (headerContents->nnhpres != state->nnhpres)
2262 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2265 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2266 if (headerContents->nlambda != nlambdaHistory)
2268 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2271 init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2272 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2274 if (headerContents->eIntegrator != eIntegrator)
2276 gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2279 if (headerContents->flags_state != state->flags)
2281 gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2286 check_match(fplog, cr, dd_nc, *headerContents,
2287 reproducibilityRequested);
2290 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2291 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
2292 Investigate for 5.0. */
2297 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2302 *bReadEkin = (((headerContents->flags_eks & (1<<eeksEKINH)) != 0) ||
2303 ((headerContents->flags_eks & (1<<eeksEKINF)) != 0) ||
2304 ((headerContents->flags_eks & (1<<eeksEKINO)) != 0) ||
2305 (((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2306 (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2307 (headerContents->flags_eks & (1<<eeksVSCALE))) != 0));
2309 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2311 observablesHistory->energyHistory = gmx::compat::make_unique<energyhistory_t>();
2313 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2314 headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2320 if (headerContents->file_version < 6)
2322 gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2325 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2331 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2333 observablesHistory->edsamHistory = gmx::compat::make_unique<edsamhistory_t>(edsamhistory_t {});
2335 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2341 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2343 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2345 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2346 headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2352 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2354 observablesHistory->swapHistory = gmx::compat::make_unique<swaphistory_t>(swaphistory_t {});
2356 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2362 std::vector<gmx_file_position_t> outputfiles;
2363 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2369 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2374 if (gmx_fio_close(fp) != 0)
2376 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2379 /* If the user wants to append to output files,
2380 * we use the file pointer positions of the output files stored
2381 * in the checkpoint file and truncate the files such that any frames
2382 * written after the checkpoint time are removed.
2383 * All files are md5sum checked such that we can be sure that
2384 * we do not truncate other (maybe imprortant) files.
2386 if (bAppendOutputFiles)
2388 if (outputfiles.empty())
2390 gmx_fatal(FARGS, "No names of output files were recorded in the checkpoint");
2392 if (fn2ftp(outputfiles[0].filename) != efLOG)
2394 /* make sure first file is log file so that it is OK to use it for
2397 gmx_fatal(FARGS, "The first output file should always be the log "
2398 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2400 bool firstFile = true;
2401 for (const auto &outputfile : outputfiles)
2403 if (outputfile.offset < 0)
2405 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2406 "is larger than 2 GB, but mdrun did not support large file"
2407 " offsets. Can not append. Run mdrun with -noappend",
2408 outputfile.filename);
2410 std::array<unsigned char, 16> digest;
2413 chksum_file = gmx_fio_open(outputfile.filename, "a");
2417 chksum_file = gmx_fio_open(outputfile.filename, "r+");
2422 /* Note that there are systems where the lock operation
2423 * will succeed, but a second process can also lock the file.
2424 * We should probably try to detect this.
2426 #if defined __native_client__
2430 #elif GMX_NATIVE_WINDOWS
2431 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2433 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2436 if (errno == ENOSYS)
2440 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2446 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfile.filename);
2450 else if (errno == EACCES || errno == EAGAIN)
2452 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2453 "simulation?", outputfile.filename);
2457 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2458 outputfile.filename, std::strerror(errno));
2463 /* compute md5 chksum */
2464 if (outputfile.checksumSize != -1)
2466 if (gmx_fio_get_file_md5(chksum_file, outputfile.offset,
2467 &digest) != outputfile.checksumSize) /*at the end of the call the file position is at the end of the file*/
2469 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2470 outputfile.checksumSize,
2471 outputfile.filename);
2474 /* log file needs to be seeked in case we need to truncate
2475 * (other files are truncated below)*/
2478 if (gmx_fio_seek(chksum_file, outputfile.offset))
2480 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", std::strerror(errno));
2485 /* open log file here - so that lock is never lifted
2486 * after chksum is calculated */
2489 *pfplog = gmx_fio_getfp(chksum_file);
2493 gmx_fio_close(chksum_file);
2495 /* compare md5 chksum */
2497 outputfile.checksumSize != -1 &&
2498 digest != outputfile.checksum)
2502 fprintf(debug, "chksum for %s: ", outputfile.filename);
2503 for (j = 0; j < 16; j++)
2505 fprintf(debug, "%02x", digest[j]);
2507 fprintf(debug, "\n");
2509 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2510 outputfile.filename);
2513 // We could preprocess less, but then checkers complain
2514 // about possible bugs when using logic on constant
2516 #if !GMX_NATIVE_WINDOWS || !GMX_FAHCORE
2517 // The log file is already seeked to correct position, but
2518 // others must be truncated.
2521 /* For FAHCORE, we do this elsewhere*/
2522 rc = gmx_truncate(outputfile.filename, outputfile.offset);
2525 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfile.filename);
2535 void load_checkpoint(const char *fn, FILE **fplog,
2536 const t_commrec *cr, const ivec dd_nc,
2537 t_inputrec *ir, t_state *state,
2538 gmx_bool *bReadEkin,
2539 ObservablesHistory *observablesHistory,
2540 gmx_bool bAppend, gmx_bool bForceAppend,
2541 gmx_bool reproducibilityRequested)
2543 CheckpointHeaderContents headerContents;
2546 /* Read the state from the checkpoint file */
2547 read_checkpoint(fn, fplog,
2549 ir->eI, &(ir->fepvals->init_fep_state),
2551 state, bReadEkin, observablesHistory,
2552 bAppend, bForceAppend,
2553 reproducibilityRequested);
2557 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2558 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2560 ir->bContinuation = TRUE;
2561 if (ir->nsteps >= 0)
2563 ir->nsteps += ir->init_step - headerContents.step;
2565 ir->init_step = headerContents.step;
2566 ir->simulation_part = headerContents.simulation_part + 1;
2569 void read_checkpoint_part_and_step(const char *filename,
2570 int *simulation_part,
2575 if (filename == nullptr ||
2576 !gmx_fexist(filename) ||
2577 ((fp = gmx_fio_open(filename, "r")) == nullptr))
2579 *simulation_part = 0;
2584 CheckpointHeaderContents headerContents;
2585 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2587 *simulation_part = headerContents.simulation_part;
2588 *step = headerContents.step;
2591 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2592 int64_t *step, double *t, t_state *state,
2593 std::vector<gmx_file_position_t> *outputfiles)
2597 CheckpointHeaderContents headerContents;
2598 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2599 *simulation_part = headerContents.simulation_part;
2600 *step = headerContents.step;
2601 *t = headerContents.t;
2602 state->natoms = headerContents.natoms;
2603 state->ngtc = headerContents.ngtc;
2604 state->nnhpres = headerContents.nnhpres;
2605 state->nhchainlength = headerContents.nhchainlength;
2606 state->flags = headerContents.flags_state;
2608 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2613 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2619 energyhistory_t enerhist;
2620 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2621 headerContents.flags_enh, &enerhist, nullptr);
2626 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2632 edsamhistory_t edsamhist = {};
2633 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2639 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2640 headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2646 swaphistory_t swaphist = {};
2647 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2653 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2655 nullptr, headerContents.file_version);
2662 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2669 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2672 int simulation_part;
2676 std::vector<gmx_file_position_t> outputfiles;
2677 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, &outputfiles);
2679 fr->natoms = state.natoms;
2681 fr->step = int64_to_int(step,
2682 "conversion of checkpoint to trajectory");
2686 fr->lambda = state.lambda[efptFEP];
2687 fr->fep_state = state.fep_state;
2689 fr->bX = ((state.flags & (1<<estX)) != 0);
2692 fr->x = makeRvecArray(state.x, state.natoms);
2694 fr->bV = ((state.flags & (1<<estV)) != 0);
2697 fr->v = makeRvecArray(state.v, state.natoms);
2700 fr->bBox = ((state.flags & (1<<estBOX)) != 0);
2703 copy_mat(state.box, fr->box);
2707 void list_checkpoint(const char *fn, FILE *out)
2714 fp = gmx_fio_open(fn, "r");
2715 CheckpointHeaderContents headerContents;
2716 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2717 state.natoms = headerContents.natoms;
2718 state.ngtc = headerContents.ngtc;
2719 state.nnhpres = headerContents.nnhpres;
2720 state.nhchainlength = headerContents.nhchainlength;
2721 state.flags = headerContents.flags_state;
2722 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2727 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2733 energyhistory_t enerhist;
2734 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2735 headerContents.flags_enh, &enerhist, out);
2739 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2740 headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2745 edsamhistory_t edsamhist = {};
2746 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2751 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2752 headerContents.flags_awhh, state.awhHistory.get(), out);
2757 swaphistory_t swaphist = {};
2758 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2763 std::vector<gmx_file_position_t> outputfiles;
2764 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2769 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2776 if (gmx_fio_close(fp) != 0)
2778 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2782 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2784 read_checkpoint_simulation_part_and_filenames(t_fileio *fp,
2785 int *simulation_part,
2786 std::vector<gmx_file_position_t> *outputfiles)
2792 read_checkpoint_data(fp, simulation_part, &step, &t, &state, outputfiles);
2793 if (gmx_fio_close(fp) != 0)
2795 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");