2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
49 #if GMX_NATIVE_WINDOWS
51 #include <sys/locking.h>
56 #include "buildinfo.h"
57 #include "gromacs/fileio/filetypes.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/gmxfio-xdr.h"
60 #include "gromacs/fileio/xdr_datatype.h"
61 #include "gromacs/fileio/xdrf.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/math/vecdump.h"
65 #include "gromacs/math/vectypes.h"
66 #include "gromacs/mdtypes/awh-correlation-history.h"
67 #include "gromacs/mdtypes/awh-history.h"
68 #include "gromacs/mdtypes/commrec.h"
69 #include "gromacs/mdtypes/df_history.h"
70 #include "gromacs/mdtypes/edsamhistory.h"
71 #include "gromacs/mdtypes/energyhistory.h"
72 #include "gromacs/mdtypes/inputrec.h"
73 #include "gromacs/mdtypes/md_enums.h"
74 #include "gromacs/mdtypes/observableshistory.h"
75 #include "gromacs/mdtypes/state.h"
76 #include "gromacs/mdtypes/swaphistory.h"
77 #include "gromacs/trajectory/trajectoryframe.h"
78 #include "gromacs/utility/arrayref.h"
79 #include "gromacs/utility/baseversion.h"
80 #include "gromacs/utility/cstringutil.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/futil.h"
83 #include "gromacs/utility/gmxassert.h"
84 #include "gromacs/utility/int64_to_int.h"
85 #include "gromacs/utility/programcontext.h"
86 #include "gromacs/utility/smalloc.h"
87 #include "gromacs/utility/sysinfo.h"
88 #include "gromacs/utility/txtdump.h"
94 #define CPT_MAGIC1 171817
95 #define CPT_MAGIC2 171819
96 #define CPTSTRLEN 1024
98 /* cpt_version should normally only be changed
99 * when the header or footer format changes.
100 * The state data format itself is backward and forward compatible.
101 * But old code can not read a new entry that is present in the file
102 * (but can read a new format when new entries are not present).
104 static const int cpt_version = 17;
107 const char *est_names[estNR] =
110 "box", "box-rel", "box-v", "pres_prev",
111 "nosehoover-xi", "thermostat-integral",
112 "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
113 "disre_initf", "disre_rm3tav",
114 "orire_initf", "orire_Dtav",
115 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
120 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
123 const char *eeks_names[eeksNR] =
125 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
126 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
130 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
131 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
132 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
133 eenhENERGY_DELTA_H_NN,
134 eenhENERGY_DELTA_H_LIST,
135 eenhENERGY_DELTA_H_STARTTIME,
136 eenhENERGY_DELTA_H_STARTLAMBDA,
140 const char *eenh_names[eenhNR] =
142 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
143 "energy_sum_sim", "energy_nsum_sim",
144 "energy_nsteps", "energy_nsteps_sim",
146 "energy_delta_h_list",
147 "energy_delta_h_start_time",
148 "energy_delta_h_start_lambda"
151 /* free energy history variables -- need to be preserved over checkpoint */
153 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
154 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
156 /* free energy history variable names */
157 const char *edfh_names[edfhNR] =
159 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
160 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
163 /* AWH biasing history variables */
166 eawhhEQUILIBRATEHISTOGRAM,
169 eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
171 eawhhLOGSCALEDSAMPLEWEIGHT,
173 eawhhFORCECORRELATIONGRID,
177 const char *eawhh_names[eawhhNR] =
180 "awh_equilibrateHistogram",
183 "awh_coordpoint", "awh_umbrellaGridpoint",
185 "awh_logScaledSampleWeight",
187 "awh_forceCorrelationGrid"
190 //! Higher level vector element type, only used for formatting checkpoint dumps
191 enum class CptElementType
193 integer, //!< integer
194 real, //!< float or double, not linked to precision of type real
195 real3, //!< float[3] or double[3], not linked to precision of type real
196 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
199 //! \brief Parts of the checkpoint state, only used for reporting
202 microState, //!< The microstate of the simulated system
203 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
204 energyHistory, //!< Energy observable statistics
205 freeEnergyHistory, //!< Free-energy state and observable statistics
206 accWeightHistogram //!< Accelerated weight histogram method state
209 //! \brief Return the name of a checkpoint entry based on part and part entry
210 static const char *entryName(StatePart part, int ecpt)
214 case StatePart::microState: return est_names [ecpt];
215 case StatePart::kineticEnergy: return eeks_names[ecpt];
216 case StatePart::energyHistory: return eenh_names[ecpt];
217 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
218 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
224 static void cp_warning(FILE *fp)
226 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
229 static void cp_error()
231 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
234 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
236 char *data = s.data();
237 if (xdr_string(xd, &data, s.size()) == 0)
243 fprintf(list, "%s = %s\n", desc, data);
247 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
249 if (xdr_int(xd, i) == 0)
255 fprintf(list, "%s = %d\n", desc, *i);
260 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
264 fprintf(list, "%s = ", desc);
267 for (int j = 0; j < n && res; j++)
269 res &= xdr_u_char(xd, &i[j]);
272 fprintf(list, "%02x", i[j]);
287 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
289 if (do_cpt_int(xd, desc, i, list) < 0)
295 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
297 char buf[STEPSTRSIZE];
299 if (xdr_int64(xd, i) == 0)
305 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
309 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
311 if (xdr_double(xd, f) == 0)
317 fprintf(list, "%s = %f\n", desc, *f);
321 static void do_cpt_real_err(XDR *xd, real *f)
324 bool_t res = xdr_double(xd, f);
326 bool_t res = xdr_float(xd, f);
334 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
336 for (int i = 0; i < n; i++)
338 for (int j = 0; j < DIM; j++)
340 do_cpt_real_err(xd, &f[i][j]);
346 pr_rvecs(list, 0, desc, f, n);
350 template <typename T>
358 // cppcheck-suppress unusedStructMember
359 static const int value = xdr_datatype_int;
363 struct xdr_type<float>
365 // cppcheck-suppress unusedStructMember
366 static const int value = xdr_datatype_float;
370 struct xdr_type<double>
372 // cppcheck-suppress unusedStructMember
373 static const int value = xdr_datatype_double;
376 //! \brief Returns size in byte of an xdr_datatype
377 static inline unsigned int sizeOfXdrType(int xdrType)
381 case xdr_datatype_int:
384 case xdr_datatype_float:
385 return sizeof(float);
387 case xdr_datatype_double:
388 return sizeof(double);
390 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
396 //! \brief Returns the XDR process function for i/o of an XDR type
397 static inline xdrproc_t xdrProc(int xdrType)
401 case xdr_datatype_int:
402 return reinterpret_cast<xdrproc_t>(xdr_int);
404 case xdr_datatype_float:
405 return reinterpret_cast<xdrproc_t>(xdr_float);
407 case xdr_datatype_double:
408 return reinterpret_cast<xdrproc_t>(xdr_double);
410 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
416 /*! \brief Lists or only reads an xdr vector from checkpoint file
418 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
419 * The header for the print is set by \p part and \p ecpt.
420 * The formatting of the printing is set by \p cptElementType.
421 * When list==NULL only reads the elements.
423 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
424 FILE *list, CptElementType cptElementType)
428 const unsigned int elemSize = sizeOfXdrType(xdrType);
429 std::vector<char> data(nf*elemSize);
430 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
436 case xdr_datatype_int:
437 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
439 case xdr_datatype_float:
441 if (cptElementType == CptElementType::real3)
443 // cppcheck-suppress invalidPointerCast
444 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
449 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
450 // cppcheck-suppress invalidPointerCast
451 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
454 case xdr_datatype_double:
456 if (cptElementType == CptElementType::real3)
458 // cppcheck-suppress invalidPointerCast
459 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
464 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
465 // cppcheck-suppress invalidPointerCast
466 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
469 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
476 //! \brief Convert a double array, typed char*, to float
477 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
479 // cppcheck-suppress invalidPointerCast
480 const double *d = reinterpret_cast<const double *>(c);
481 for (int i = 0; i < n; i++)
483 v[i] = static_cast<float>(d[i]);
487 //! \brief Convert a float array, typed char*, to double
488 static void convertArrayRealPrecision(const char *c, double *v, int n)
490 // cppcheck-suppress invalidPointerCast
491 const float *f = reinterpret_cast<const float *>(c);
492 for (int i = 0; i < n; i++)
494 v[i] = static_cast<double>(f[i]);
498 //! \brief Generate an error for trying to convert to integer
499 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
501 GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
504 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
506 * This is the only routine that does the actually i/o of real vector,
507 * all other routines are intermediate level routines for specific real
508 * data types, calling this routine.
509 * Currently this routine is (too) complex, since it handles both real *
510 * and std::vector<real>. Using real * is deprecated and this routine
511 * will simplify a lot when only std::vector needs to be supported.
513 * The routine is generic to vectors with different allocators,
514 * because as part of reading a checkpoint there are vectors whose
515 * size is not known until reading has progressed far enough, so a
516 * resize method must be called.
518 * When not listing, we use either v or vector, depending on which is !=NULL.
519 * If nval >= 0, nval is used; on read this should match the passed value.
520 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
521 * the value is stored in nptr
523 template<typename T, typename AllocatorType>
524 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
526 T **v, std::vector<T, AllocatorType> *vector,
527 FILE *list, CptElementType cptElementType)
529 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
533 int numElemInTheFile;
538 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
539 numElemInTheFile = nval;
545 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
546 // cppcheck-suppress nullPointer
547 numElemInTheFile = *nptr;
551 numElemInTheFile = vector->size();
555 /* Read/write the vector element count */
556 res = xdr_int(xd, &numElemInTheFile);
561 /* Read/write the element data type */
562 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
563 int xdrTypeInTheFile = xdrTypeInTheCode;
564 res = xdr_int(xd, &xdrTypeInTheFile);
570 if (list == nullptr && (sflags & (1 << ecpt)))
574 if (numElemInTheFile != nval)
576 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
579 else if (nptr != nullptr)
581 *nptr = numElemInTheFile;
584 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
588 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
589 entryName(part, ecpt),
590 xdr_datatype_names[xdrTypeInTheCode],
591 xdr_datatype_names[xdrTypeInTheFile]);
593 /* Matching int and real should never occur, but check anyhow */
594 if (xdrTypeInTheFile == xdr_datatype_int ||
595 xdrTypeInTheCode == xdr_datatype_int)
597 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
606 snew(*v, numElemInTheFile);
612 /* This conditional ensures that we don't resize on write.
613 * In particular in the state where this code was written
614 * PaddedRVecVector has a size of numElemInThefile and we
615 * don't want to lose that padding here.
617 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
619 vector->resize(numElemInTheFile);
627 vChar = reinterpret_cast<char *>(vp);
631 snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
633 res = xdr_vector(xd, vChar,
634 numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
635 xdrProc(xdrTypeInTheFile));
643 /* In the old code float-double conversion came for free.
644 * In the new code we still support it, mainly because
645 * the tip4p_continue regression test makes use of this.
646 * It's an open question if we do or don't want to allow this.
648 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
654 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
655 list, cptElementType);
661 //! \brief Read/Write a std::vector.
662 template <typename T>
663 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
664 std::vector<T> *vector, FILE *list)
666 return doVectorLow<T>(xd, part, ecpt, sflags, -1, nullptr, nullptr, vector, list, CptElementType::real);
669 //! \brief Read/Write an ArrayRef<real>.
670 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
671 gmx::ArrayRef<real> vector, FILE *list)
673 real *v_real = vector.data();
674 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
677 //! \brief Read/Write a vector whose value_type is RVec and whose allocator is \c AllocatorType.
678 template <typename AllocatorType>
679 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
680 std::vector<gmx::RVec, AllocatorType> *v, int numAtoms, FILE *list)
682 const int numReals = numAtoms*DIM;
686 GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
687 GMX_RELEASE_ASSERT(v->size() >= static_cast<size_t>(numAtoms), "v should have sufficient size for numAtoms");
689 real *v_real = v->data()->as_vec();
691 // PaddedRVecVector is padded beyond numAtoms, we should only write
693 gmx::ArrayRef<real> ref(v_real, v_real + numReals);
695 return doRealArrayRef(xd, part, ecpt, sflags, ref, list);
699 // Use the rebind facility to change the value_type of the
700 // allocator from RVec to real.
701 using realAllocator = typename AllocatorType::template rebind<real>::other;
702 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
706 /* This function stores n along with the reals for reading,
707 * but on reading it assumes that n matches the value in the checkpoint file,
708 * a fatal error is generated when this is not the case.
710 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
711 int n, real **v, FILE *list)
713 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
716 /* This function does the same as do_cpte_reals,
717 * except that on reading it ignores the passed value of *n
718 * and stores the value read from the checkpoint file in *n.
720 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
721 int *n, real **v, FILE *list)
723 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
726 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
729 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
732 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
733 int n, int **v, FILE *list)
735 return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
738 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
741 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
744 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
745 int n, double **v, FILE *list)
747 return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
750 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
751 double *r, FILE *list)
753 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
756 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
757 matrix v, FILE *list)
763 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
764 DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
766 if (list && ret == 0)
768 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
775 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
776 int n, real **v, FILE *list)
780 char name[CPTSTRLEN];
787 for (i = 0; i < n; i++)
789 reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
790 if (list && reti == 0)
792 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
793 pr_reals(list, 0, name, v[i], n);
803 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
804 int n, matrix **v, FILE *list)
807 matrix *vp, *va = nullptr;
813 res = xdr_int(xd, &nf);
818 if (list == nullptr && nf != n)
820 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
822 if (list || !(sflags & (1<<ecpt)))
835 snew(vr, nf*DIM*DIM);
836 for (i = 0; i < nf; i++)
838 for (j = 0; j < DIM; j++)
840 for (k = 0; k < DIM; k++)
842 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
846 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
847 nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
848 CptElementType::matrix3x3);
849 for (i = 0; i < nf; i++)
851 for (j = 0; j < DIM; j++)
853 for (k = 0; k < DIM; k++)
855 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
861 if (list && ret == 0)
863 for (i = 0; i < nf; i++)
865 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
876 // TODO Expand this into being a container of all data for
877 // serialization of a checkpoint, which can be stored by the caller
878 // (e.g. so that mdrun doesn't have to open the checkpoint twice).
879 // This will separate issues of allocation from those of
880 // serialization, help separate comparison from reading, and have
881 // better defined transformation functions to/from trajectory frame
883 struct CheckpointHeaderContents
886 char version[CPTSTRLEN];
887 char btime[CPTSTRLEN];
888 char buser[CPTSTRLEN];
889 char bhost[CPTSTRLEN];
891 char fprog[CPTSTRLEN];
892 char ftime[CPTSTRLEN];
914 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
915 CheckpointHeaderContents *contents)
928 res = xdr_int(xd, &magic);
931 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
933 if (magic != CPT_MAGIC1)
935 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
936 "The checkpoint file is corrupted or not a checkpoint file",
942 gmx_gethostname(fhost, 255);
944 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
945 do_cpt_string_err(xd, "GROMACS build time", contents->btime, list);
946 do_cpt_string_err(xd, "GROMACS build user", contents->buser, list);
947 do_cpt_string_err(xd, "GROMACS build host", contents->bhost, list);
948 do_cpt_string_err(xd, "generating program", contents->fprog, list);
949 do_cpt_string_err(xd, "generation time", contents->ftime, list);
950 contents->file_version = cpt_version;
951 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
952 if (contents->file_version > cpt_version)
954 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
956 if (contents->file_version >= 13)
958 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
962 contents->double_prec = -1;
964 if (contents->file_version >= 12)
966 do_cpt_string_err(xd, "generating host", fhost, list);
968 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
969 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
970 if (contents->file_version >= 10)
972 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
976 contents->nhchainlength = 1;
978 if (contents->file_version >= 11)
980 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
984 contents->nnhpres = 0;
986 if (contents->file_version >= 14)
988 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
992 contents->nlambda = 0;
994 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
995 if (contents->file_version >= 3)
997 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1001 contents->simulation_part = 1;
1003 if (contents->file_version >= 5)
1005 do_cpt_step_err(xd, "step", &contents->step, list);
1010 do_cpt_int_err(xd, "step", &idum, list);
1011 contents->step = static_cast<gmx_int64_t>(idum);
1013 do_cpt_double_err(xd, "t", &contents->t, list);
1014 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1015 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1016 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1017 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1018 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1019 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1020 if (contents->file_version >= 4)
1022 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1023 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1027 contents->flags_eks = 0;
1028 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV+1));
1029 contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1030 (1<<(estORIRE_DTAV+2)) |
1031 (1<<(estORIRE_DTAV+3))));
1033 if (contents->file_version >= 14)
1035 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1039 contents->flags_dfh = 0;
1042 if (contents->file_version >= 15)
1044 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1051 if (contents->file_version >= 16)
1053 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1057 contents->eSwapCoords = eswapNO;
1060 if (contents->file_version >= 17)
1062 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1066 contents->flags_awhh = 0;
1070 static int do_cpt_footer(XDR *xd, int file_version)
1075 if (file_version >= 2)
1078 res = xdr_int(xd, &magic);
1083 if (magic != CPT_MAGIC2)
1092 static int do_cpt_state(XDR *xd,
1093 int fflags, t_state *state,
1097 const StatePart part = StatePart::microState;
1098 const int sflags = state->flags;
1099 // If reading, state->natoms was probably just read, so
1100 // allocations need to be managed. If writing, this won't change
1101 // anything that matters.
1102 state_change_natoms(state, state->natoms);
1103 for (int i = 0; (i < estNR && ret == 0); i++)
1105 if (fflags & (1<<i))
1109 case estLAMBDA: ret = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1110 case estFEPSTATE: ret = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1111 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1112 case estBOX_REL: ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1113 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1114 case estPRES_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1115 case estSVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1116 case estFVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1117 case estNH_XI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1118 case estNH_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1119 case estNHPRES_XI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1120 case estNHPRES_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1121 case estTHERM_INT: ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1122 case estBAROS_INT: ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1123 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1124 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1125 case estX: ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1126 case estV: ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1127 /* The RNG entries are no longer written,
1128 * the next 4 lines are only for reading old files.
1130 case estLD_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1131 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1132 case estMC_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1133 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1134 case estDISRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1135 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1136 case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1137 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1139 gmx_fatal(FARGS, "Unknown state entry %d\n"
1140 "You are reading a checkpoint file written by different code, which is not supported", i);
1148 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1153 const StatePart part = StatePart::kineticEnergy;
1154 for (int i = 0; (i < eeksNR && ret == 0); i++)
1156 if (fflags & (1<<i))
1161 case eeksEKIN_N: ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1162 case eeksEKINH: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1163 case eeksEKINF: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1164 case eeksEKINO: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1165 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1166 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1167 case eeksVSCALE: ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1168 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1169 case eeksDEKINDL: ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1170 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1172 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1173 "You are probably reading a new checkpoint file with old code", i);
1182 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1183 int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1185 int swap_cpt_version = 2;
1187 if (eSwapCoords == eswapNO)
1192 swapstate->bFromCpt = bRead;
1193 swapstate->eSwapCoords = eSwapCoords;
1195 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1196 if (bRead && swap_cpt_version < 2)
1198 gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1199 "of the ion/water position swapping protocol.\n");
1202 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1204 /* When reading, init_swapcoords has not been called yet,
1205 * so we have to allocate memory first. */
1206 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1209 snew(swapstate->ionType, swapstate->nIonTypes);
1212 for (int ic = 0; ic < eCompNR; ic++)
1214 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1216 swapstateIons_t *gs = &swapstate->ionType[ii];
1220 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1224 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1229 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1233 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1236 if (bRead && (nullptr == gs->nMolPast[ic]) )
1238 snew(gs->nMolPast[ic], swapstate->nAverage);
1241 for (int j = 0; j < swapstate->nAverage; j++)
1245 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1249 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1255 /* Ion flux per channel */
1256 for (int ic = 0; ic < eChanNR; ic++)
1258 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1260 swapstateIons_t *gs = &swapstate->ionType[ii];
1264 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1268 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1273 /* Ion flux leakage */
1276 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1280 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1284 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1286 swapstateIons_t *gs = &swapstate->ionType[ii];
1288 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1292 snew(gs->channel_label, gs->nMol);
1293 snew(gs->comp_from, gs->nMol);
1296 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1297 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1300 /* Save the last known whole positions to checkpoint
1301 * file to be able to also make multimeric channels whole in PBC */
1302 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1303 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1306 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1307 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1308 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1309 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1313 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1314 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1321 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1322 int fflags, energyhistory_t *enerhist,
1332 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1334 /* This is stored/read for backward compatibility */
1335 int energyHistoryNumEnergies = 0;
1338 enerhist->nsteps = 0;
1340 enerhist->nsteps_sim = 0;
1341 enerhist->nsum_sim = 0;
1343 else if (enerhist != nullptr)
1345 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1348 delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1349 const StatePart part = StatePart::energyHistory;
1350 for (int i = 0; (i < eenhNR && ret == 0); i++)
1352 if (fflags & (1<<i))
1356 case eenhENERGY_N: ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1357 case eenhENERGY_AVER: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1358 case eenhENERGY_SUM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1359 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1360 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1361 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1362 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1363 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1364 case eenhENERGY_DELTA_H_NN:
1367 if (!bRead && deltaH != nullptr)
1369 numDeltaH = deltaH->dh.size();
1371 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1374 if (deltaH == nullptr)
1376 enerhist->deltaHForeignLambdas.reset(new delta_h_history_t);
1377 deltaH = enerhist->deltaHForeignLambdas.get();
1379 deltaH->dh.resize(numDeltaH);
1380 deltaH->start_lambda_set = FALSE;
1384 case eenhENERGY_DELTA_H_LIST:
1385 for (auto dh : deltaH->dh)
1387 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1390 case eenhENERGY_DELTA_H_STARTTIME:
1391 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1392 case eenhENERGY_DELTA_H_STARTLAMBDA:
1393 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1395 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1396 "You are probably reading a new checkpoint file with old code", i);
1401 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1403 /* Assume we have an old file format and copy sum to sum_sim */
1404 enerhist->ener_sum_sim = enerhist->ener_sum;
1407 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1408 !(fflags & (1<<eenhENERGY_NSTEPS)))
1410 /* Assume we have an old file format and copy nsum to nsteps */
1411 enerhist->nsteps = enerhist->nsum;
1413 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1414 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1416 /* Assume we have an old file format and copy nsum to nsteps */
1417 enerhist->nsteps_sim = enerhist->nsum_sim;
1423 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1432 if (*dfhistPtr == nullptr)
1434 snew(*dfhistPtr, 1);
1435 (*dfhistPtr)->nlambda = nlambda;
1436 init_df_history(*dfhistPtr, nlambda);
1438 df_history_t *dfhist = *dfhistPtr;
1440 const StatePart part = StatePart::freeEnergyHistory;
1441 for (int i = 0; (i < edfhNR && ret == 0); i++)
1443 if (fflags & (1<<i))
1447 case edfhBEQUIL: ret = do_cpte_int(xd, part, i, fflags, &dfhist->bEquil, list); break;
1448 case edfhNATLAMBDA: ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1449 case edfhWLHISTO: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1450 case edfhWLDELTA: ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1451 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1452 case edfhSUMDG: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1453 case edfhSUMMINVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1454 case edfhSUMVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1455 case edfhACCUMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1456 case edfhACCUMM: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1457 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1458 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1459 case edfhTIJ: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1460 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1463 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1464 "You are probably reading a new checkpoint file with old code", i);
1473 /* This function stores the last whole configuration of the reference and
1474 * average structure in the .cpt file
1476 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1477 int nED, edsamhistory_t *EDstate, FILE *list)
1484 EDstate->bFromCpt = bRead;
1487 /* When reading, init_edsam has not been called yet,
1488 * so we have to allocate memory first. */
1491 snew(EDstate->nref, EDstate->nED);
1492 snew(EDstate->old_sref, EDstate->nED);
1493 snew(EDstate->nav, EDstate->nED);
1494 snew(EDstate->old_sav, EDstate->nED);
1497 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1498 for (int i = 0; i < EDstate->nED; i++)
1502 /* Reference structure SREF */
1503 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1504 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1505 sprintf(buf, "ED%d x_ref", i+1);
1508 snew(EDstate->old_sref[i], EDstate->nref[i]);
1509 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1513 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1516 /* Average structure SAV */
1517 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1518 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1519 sprintf(buf, "ED%d x_av", i+1);
1522 snew(EDstate->old_sav[i], EDstate->nav[i]);
1523 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1527 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1534 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1535 gmx::CorrelationGridHistory *corrGrid,
1536 FILE *list, int eawhh)
1540 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1541 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1542 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1546 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1549 for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1551 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1552 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1553 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1554 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1555 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1556 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1557 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1558 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1559 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1560 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1561 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1567 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1568 int fflags, gmx::AwhBiasHistory *biasHistory,
1573 gmx::AwhBiasStateHistory *state = &biasHistory->state;
1574 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1576 if (fflags & (1<<i))
1580 case eawhhIN_INITIAL:
1581 do_cpt_int_err(xd, eawhh_names[i], &state->in_initial, list); break;
1582 case eawhhEQUILIBRATEHISTOGRAM:
1583 do_cpt_int_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1585 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1591 numPoints = biasHistory->pointState.size();
1593 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1596 biasHistory->pointState.resize(numPoints);
1600 case eawhhCOORDPOINT:
1601 for (auto &psh : biasHistory->pointState)
1603 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1604 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1605 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1606 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1607 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1608 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1609 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1610 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1611 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1612 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1613 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1616 case eawhhUMBRELLAGRIDPOINT:
1617 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1618 case eawhhUPDATELIST:
1619 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1620 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1622 case eawhhLOGSCALEDSAMPLEWEIGHT:
1623 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1624 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1626 case eawhhNUMUPDATES:
1627 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1629 case eawhhFORCECORRELATIONGRID:
1630 ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1633 gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1641 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1642 int fflags, gmx::AwhHistory *awhHistory,
1649 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1651 if (awhHistory == nullptr)
1653 GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1655 awhHistoryLocal = std::shared_ptr<gmx::AwhHistory>(new gmx::AwhHistory());
1656 awhHistory = awhHistoryLocal.get();
1659 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1660 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1661 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1662 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1663 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1664 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1666 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1667 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1672 numBias = awhHistory->bias.size();
1674 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1678 awhHistory->bias.resize(numBias);
1680 for (auto &bias : awhHistory->bias)
1682 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1688 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1693 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1694 std::vector<gmx_file_position_t> *outputfiles,
1695 FILE *list, int file_version)
1698 gmx_off_t mask = 0xFFFFFFFFL;
1699 int offset_high, offset_low;
1700 std::array<char, CPTSTRLEN> buf;
1701 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1703 // Ensure that reading pre-allocates outputfiles, while writing
1704 // writes what is already there.
1705 int nfiles = outputfiles->size();
1706 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1712 outputfiles->resize(nfiles);
1715 for (auto &outputfile : *outputfiles)
1717 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1720 do_cpt_string_err(xd, "output filename", buf, list);
1721 std::strncpy(outputfile.filename, buf.data(), buf.size()-1);
1723 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1727 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1731 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1735 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1737 offset = outputfile.offset;
1745 offset_low = static_cast<int>(offset & mask);
1746 offset_high = static_cast<int>((offset >> 32) & mask);
1748 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1752 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1757 if (file_version >= 8)
1759 if (do_cpt_int(xd, "file_checksum_size", &outputfile.chksum_size,
1764 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfile.chksum, list) != 0)
1771 outputfile.chksum_size = -1;
1778 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1779 FILE *fplog, const t_commrec *cr,
1780 ivec domdecCells, int nppnodes,
1781 int eIntegrator, int simulation_part,
1782 gmx_bool bExpanded, int elamstats,
1783 gmx_int64_t step, double t,
1784 t_state *state, ObservablesHistory *observablesHistory)
1787 char *fntemp; /* the temporary checkpoint file name */
1789 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1792 if (DOMAINDECOMP(cr))
1794 npmenodes = cr->npmenodes;
1802 /* make the new temporary filename */
1803 snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1804 std::strcpy(fntemp, fn);
1805 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1806 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1807 std::strcat(fntemp, suffix);
1808 std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1810 /* if we can't rename, we just overwrite the cpt file.
1811 * dangerous if interrupted.
1813 snew(fntemp, std::strlen(fn));
1814 std::strcpy(fntemp, fn);
1816 char timebuf[STRLEN];
1817 gmx_format_current_time(timebuf, STRLEN);
1821 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1822 gmx_step_str(step, buf), timebuf);
1825 /* Get offsets for open files */
1826 auto outputfiles = gmx_fio_get_output_file_positions();
1828 fp = gmx_fio_open(fntemp, "w");
1831 if (state->ekinstate.bUpToDate)
1834 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1835 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1836 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1843 energyhistory_t *enerhist = observablesHistory->energyHistory.get();
1845 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
1847 flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
1848 if (enerhist->nsum > 0)
1850 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1851 (1<<eenhENERGY_NSUM));
1853 if (enerhist->nsum_sim > 0)
1855 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
1857 if (enerhist->deltaHForeignLambdas != nullptr)
1859 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1860 (1<< eenhENERGY_DELTA_H_LIST) |
1861 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1862 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1869 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1870 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1873 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1875 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1877 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1878 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1887 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
1889 flags_awhh |= ( (1 << eawhhIN_INITIAL) |
1890 (1 << eawhhEQUILIBRATEHISTOGRAM) |
1891 (1 << eawhhHISTSIZE) |
1892 (1 << eawhhNPOINTS) |
1893 (1 << eawhhCOORDPOINT) |
1894 (1 << eawhhUMBRELLAGRIDPOINT) |
1895 (1 << eawhhUPDATELIST) |
1896 (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
1897 (1 << eawhhNUMUPDATES) |
1898 (1 << eawhhFORCECORRELATIONGRID));
1901 /* We can check many more things now (CPU, acceleration, etc), but
1902 * it is highly unlikely to have two separate builds with exactly
1903 * the same version, user, time, and build host!
1906 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
1908 edsamhistory_t *edsamhist = observablesHistory->edsamHistory.get();
1909 int nED = (edsamhist ? edsamhist->nED : 0);
1911 swaphistory_t *swaphist = observablesHistory->swapHistory.get();
1912 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
1914 CheckpointHeaderContents headerContents =
1916 0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
1917 eIntegrator, simulation_part, step, t, nppnodes,
1919 state->natoms, state->ngtc, state->nnhpres,
1920 state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh, flags_dfh, flags_awhh,
1923 std::strcpy(headerContents.version, gmx_version());
1924 std::strcpy(headerContents.btime, BUILD_TIME);
1925 std::strcpy(headerContents.buser, BUILD_USER);
1926 std::strcpy(headerContents.bhost, BUILD_HOST);
1927 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
1928 std::strcpy(headerContents.ftime, timebuf);
1929 if (DOMAINDECOMP(cr))
1931 copy_ivec(domdecCells, headerContents.dd_nc);
1934 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
1936 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0) ||
1937 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
1938 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0) ||
1939 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0) ||
1940 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0) ||
1941 (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
1942 (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
1943 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
1944 headerContents.file_version) < 0))
1946 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1949 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
1951 /* we really, REALLY, want to make sure to physically write the checkpoint,
1952 and all the files it depends on, out to disk. Because we've
1953 opened the checkpoint with gmx_fio_open(), it's in our list
1955 ret = gmx_fio_all_output_fsync();
1961 "Cannot fsync '%s'; maybe you are out of disk space?",
1962 gmx_fio_getname(ret));
1964 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
1974 if (gmx_fio_close(fp) != 0)
1976 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1979 /* we don't move the checkpoint if the user specified they didn't want it,
1980 or if the fsyncs failed */
1982 if (!bNumberAndKeep && !ret)
1986 /* Rename the previous checkpoint file */
1987 std::strcpy(buf, fn);
1988 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1989 std::strcat(buf, "_prev");
1990 std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1992 /* we copy here so that if something goes wrong between now and
1993 * the rename below, there's always a state.cpt.
1994 * If renames are atomic (such as in POSIX systems),
1995 * this copying should be unneccesary.
1997 gmx_file_copy(fn, buf, FALSE);
1998 /* We don't really care if this fails:
1999 * there's already a new checkpoint.
2002 gmx_file_rename(fn, buf);
2005 if (gmx_file_rename(fntemp, fn) != 0)
2007 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2010 #endif /* GMX_NO_RENAME */
2015 /*code for alternate checkpointing scheme. moved from top of loop over
2017 fcRequestCheckPoint();
2018 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2020 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2022 #endif /* end GMX_FAHCORE block */
2025 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2027 bool foundMismatch = (p != f);
2035 fprintf(fplog, " %s mismatch,\n", type);
2036 fprintf(fplog, " current program: %d\n", p);
2037 fprintf(fplog, " checkpoint file: %d\n", f);
2038 fprintf(fplog, "\n");
2042 static void check_string(FILE *fplog, const char *type, const char *p,
2043 const char *f, gmx_bool *mm)
2045 bool foundMismatch = (std::strcmp(p, f) != 0);
2053 fprintf(fplog, " %s mismatch,\n", type);
2054 fprintf(fplog, " current program: %s\n", p);
2055 fprintf(fplog, " checkpoint file: %s\n", f);
2056 fprintf(fplog, "\n");
2060 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2061 const CheckpointHeaderContents &headerContents,
2062 gmx_bool reproducibilityRequested)
2064 /* Note that this check_string on the version will also print a message
2065 * when only the minor version differs. But we only print a warning
2066 * message further down with reproducibilityRequested=TRUE.
2068 gmx_bool versionDiffers = FALSE;
2069 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2071 gmx_bool precisionDiffers = FALSE;
2072 check_int (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2073 if (precisionDiffers)
2075 const char msg_precision_difference[] =
2076 "You are continuing a simulation with a different precision. Not matching\n"
2077 "single/double precision will lead to precision or performance loss.\n";
2080 fprintf(fplog, "%s\n", msg_precision_difference);
2084 gmx_bool mm = (versionDiffers || precisionDiffers);
2086 if (reproducibilityRequested)
2088 check_string(fplog, "Build time", BUILD_TIME, headerContents.btime, &mm);
2089 check_string(fplog, "Build user", BUILD_USER, headerContents.buser, &mm);
2090 check_string(fplog, "Build host", BUILD_HOST, headerContents.bhost, &mm);
2091 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2093 check_int (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2096 if (cr->nnodes > 1 && reproducibilityRequested)
2098 check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2100 int npp = cr->nnodes;
2101 if (cr->npmenodes >= 0)
2103 npp -= cr->npmenodes;
2105 int npp_f = headerContents.nnodes;
2106 if (headerContents.npme >= 0)
2108 npp_f -= headerContents.npme;
2112 check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2113 check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2114 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2120 /* Gromacs should be able to continue from checkpoints between
2121 * different patch level versions, but we do not guarantee
2122 * compatibility between different major/minor versions - check this.
2126 sscanf(gmx_version(), "%5d", &gmx_major);
2127 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2128 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2130 const char msg_major_version_difference[] =
2131 "The current GROMACS major version is not identical to the one that\n"
2132 "generated the checkpoint file. In principle GROMACS does not support\n"
2133 "continuation from checkpoints between different versions, so we advise\n"
2134 "against this. If you still want to try your luck we recommend that you use\n"
2135 "the -noappend flag to keep your output files from the two versions separate.\n"
2136 "This might also work around errors where the output fields in the energy\n"
2137 "file have changed between the different versions.\n";
2139 const char msg_mismatch_notice[] =
2140 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2141 "Continuation is exact, but not guaranteed to be binary identical.\n";
2143 if (majorVersionDiffers)
2147 fprintf(fplog, "%s\n", msg_major_version_difference);
2150 else if (reproducibilityRequested)
2152 /* Major & minor versions match at least, but something is different. */
2155 fprintf(fplog, "%s\n", msg_mismatch_notice);
2161 static void read_checkpoint(const char *fn, FILE **pfplog,
2162 const t_commrec *cr,
2165 int *init_fep_state,
2166 CheckpointHeaderContents *headerContents,
2167 t_state *state, gmx_bool *bReadEkin,
2168 ObservablesHistory *observablesHistory,
2169 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend,
2170 gmx_bool reproducibilityRequested)
2174 char buf[STEPSTRSIZE];
2176 t_fileio *chksum_file;
2177 FILE * fplog = *pfplog;
2178 unsigned char digest[16];
2179 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2180 struct flock fl; /* don't initialize here: the struct order is OS
2184 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2185 fl.l_type = F_WRLCK;
2186 fl.l_whence = SEEK_SET;
2192 fp = gmx_fio_open(fn, "r");
2193 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2195 if (bAppendOutputFiles &&
2196 headerContents->file_version >= 13 && headerContents->double_prec != GMX_DOUBLE)
2198 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
2201 /* This will not be written if we do appending, since fplog is still NULL then */
2204 fprintf(fplog, "\n");
2205 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2206 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2207 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2208 fprintf(fplog, " GROMACS build time: %s\n", headerContents->btime);
2209 fprintf(fplog, " GROMACS build user: %s\n", headerContents->buser);
2210 fprintf(fplog, " GROMACS build host: %s\n", headerContents->bhost);
2211 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2212 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2213 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2214 fprintf(fplog, " time: %f\n", headerContents->t);
2215 fprintf(fplog, "\n");
2218 if (headerContents->natoms != state->natoms)
2220 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2222 if (headerContents->ngtc != state->ngtc)
2224 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2226 if (headerContents->nnhpres != state->nnhpres)
2228 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2231 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2232 if (headerContents->nlambda != nlambdaHistory)
2234 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2237 init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2238 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2240 if (headerContents->eIntegrator != eIntegrator)
2242 gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2245 if (headerContents->flags_state != state->flags)
2247 gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2252 check_match(fplog, cr, dd_nc, *headerContents,
2253 reproducibilityRequested);
2256 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2257 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
2258 Investigate for 5.0. */
2263 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2268 *bReadEkin = ((headerContents->flags_eks & (1<<eeksEKINH)) ||
2269 (headerContents->flags_eks & (1<<eeksEKINF)) ||
2270 (headerContents->flags_eks & (1<<eeksEKINO)) ||
2271 ((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2272 (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2273 (headerContents->flags_eks & (1<<eeksVSCALE))));
2275 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2277 observablesHistory->energyHistory = std::unique_ptr<energyhistory_t>(new energyhistory_t {});
2279 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2280 headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2286 if (headerContents->file_version < 6)
2288 gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2291 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2297 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2299 observablesHistory->edsamHistory = std::unique_ptr<edsamhistory_t>(new edsamhistory_t {});
2301 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2307 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2309 state->awhHistory = std::shared_ptr<gmx::AwhHistory>(new gmx::AwhHistory());
2311 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2312 headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2318 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2320 observablesHistory->swapHistory = std::unique_ptr<swaphistory_t>(new swaphistory_t {});
2322 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2328 std::vector<gmx_file_position_t> outputfiles;
2329 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2335 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2340 if (gmx_fio_close(fp) != 0)
2342 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2345 /* If the user wants to append to output files,
2346 * we use the file pointer positions of the output files stored
2347 * in the checkpoint file and truncate the files such that any frames
2348 * written after the checkpoint time are removed.
2349 * All files are md5sum checked such that we can be sure that
2350 * we do not truncate other (maybe imprortant) files.
2352 if (bAppendOutputFiles)
2354 if (outputfiles.empty())
2356 gmx_fatal(FARGS, "No names of output files were recorded in the checkpoint");
2358 if (fn2ftp(outputfiles[0].filename) != efLOG)
2360 /* make sure first file is log file so that it is OK to use it for
2363 gmx_fatal(FARGS, "The first output file should always be the log "
2364 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2366 bool firstFile = true;
2367 for (const auto &outputfile : outputfiles)
2369 if (outputfile.offset < 0)
2371 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2372 "is larger than 2 GB, but mdrun did not support large file"
2373 " offsets. Can not append. Run mdrun with -noappend",
2374 outputfile.filename);
2377 chksum_file = gmx_fio_open(outputfile.filename, "a");
2380 chksum_file = gmx_fio_open(outputfile.filename, "r+");
2385 /* Note that there are systems where the lock operation
2386 * will succeed, but a second process can also lock the file.
2387 * We should probably try to detect this.
2389 #if defined __native_client__
2393 #elif GMX_NATIVE_WINDOWS
2394 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2396 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2399 if (errno == ENOSYS)
2403 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2409 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfile.filename);
2413 else if (errno == EACCES || errno == EAGAIN)
2415 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2416 "simulation?", outputfile.filename);
2420 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2421 outputfile.filename, std::strerror(errno));
2426 /* compute md5 chksum */
2427 if (outputfile.chksum_size != -1)
2429 if (gmx_fio_get_file_md5(chksum_file, outputfile.offset,
2430 digest) != outputfile.chksum_size) /*at the end of the call the file position is at the end of the file*/
2432 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2433 outputfile.chksum_size,
2434 outputfile.filename);
2437 /* log file needs to be seeked in case we need to truncate
2438 * (other files are truncated below)*/
2441 if (gmx_fio_seek(chksum_file, outputfile.offset))
2443 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", std::strerror(errno));
2448 /* open log file here - so that lock is never lifted
2449 * after chksum is calculated */
2452 *pfplog = gmx_fio_getfp(chksum_file);
2456 gmx_fio_close(chksum_file);
2459 /* compare md5 chksum */
2460 if (outputfile.chksum_size != -1 &&
2461 memcmp(digest, outputfile.chksum, 16) != 0)
2465 fprintf(debug, "chksum for %s: ", outputfile.filename);
2466 for (j = 0; j < 16; j++)
2468 fprintf(debug, "%02x", digest[j]);
2470 fprintf(debug, "\n");
2472 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2473 outputfile.filename);
2478 if (!firstFile) /* log file is already seeked to correct position */
2480 #if !GMX_NATIVE_WINDOWS || !defined(GMX_FAHCORE)
2481 /* For FAHCORE, we do this elsewhere*/
2482 rc = gmx_truncate(outputfile.filename, outputfile.offset);
2485 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfile.filename);
2495 void load_checkpoint(const char *fn, FILE **fplog,
2496 const t_commrec *cr, const ivec dd_nc,
2497 t_inputrec *ir, t_state *state,
2498 gmx_bool *bReadEkin,
2499 ObservablesHistory *observablesHistory,
2500 gmx_bool bAppend, gmx_bool bForceAppend,
2501 gmx_bool reproducibilityRequested)
2503 CheckpointHeaderContents headerContents;
2506 /* Read the state from the checkpoint file */
2507 read_checkpoint(fn, fplog,
2509 ir->eI, &(ir->fepvals->init_fep_state),
2511 state, bReadEkin, observablesHistory,
2512 bAppend, bForceAppend,
2513 reproducibilityRequested);
2517 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2518 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2520 ir->bContinuation = TRUE;
2521 if (ir->nsteps >= 0)
2523 ir->nsteps += ir->init_step - headerContents.step;
2525 ir->init_step = headerContents.step;
2526 ir->simulation_part = headerContents.simulation_part + 1;
2529 void read_checkpoint_part_and_step(const char *filename,
2530 int *simulation_part,
2535 if (filename == nullptr ||
2536 !gmx_fexist(filename) ||
2537 (!(fp = gmx_fio_open(filename, "r"))))
2539 *simulation_part = 0;
2544 CheckpointHeaderContents headerContents;
2545 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2547 *simulation_part = headerContents.simulation_part;
2548 *step = headerContents.step;
2551 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2552 gmx_int64_t *step, double *t, t_state *state,
2553 std::vector<gmx_file_position_t> *outputfiles)
2557 CheckpointHeaderContents headerContents;
2558 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2559 *simulation_part = headerContents.simulation_part;
2560 *step = headerContents.step;
2561 *t = headerContents.t;
2562 state->natoms = headerContents.natoms;
2563 state->ngtc = headerContents.ngtc;
2564 state->nnhpres = headerContents.nnhpres;
2565 state->nhchainlength = headerContents.nhchainlength;
2566 state->flags = headerContents.flags_state;
2568 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2573 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2579 energyhistory_t enerhist;
2580 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2581 headerContents.flags_enh, &enerhist, nullptr);
2586 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2592 edsamhistory_t edsamhist = {};
2593 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2599 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2600 headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2606 swaphistory_t swaphist = {};
2607 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2613 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2615 nullptr, headerContents.file_version);
2622 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2629 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2632 int simulation_part;
2636 std::vector<gmx_file_position_t> outputfiles;
2637 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, &outputfiles);
2639 fr->natoms = state.natoms;
2641 fr->step = gmx_int64_to_int(step,
2642 "conversion of checkpoint to trajectory");
2646 fr->lambda = state.lambda[efptFEP];
2647 fr->fep_state = state.fep_state;
2649 fr->bX = (state.flags & (1<<estX));
2652 fr->x = makeRvecArray(state.x, state.natoms);
2654 fr->bV = (state.flags & (1<<estV));
2657 fr->v = makeRvecArray(state.v, state.natoms);
2660 fr->bBox = (state.flags & (1<<estBOX));
2663 copy_mat(state.box, fr->box);
2667 void list_checkpoint(const char *fn, FILE *out)
2674 fp = gmx_fio_open(fn, "r");
2675 CheckpointHeaderContents headerContents;
2676 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2677 state.natoms = headerContents.natoms;
2678 state.ngtc = headerContents.ngtc;
2679 state.nnhpres = headerContents.nnhpres;
2680 state.nhchainlength = headerContents.nhchainlength;
2681 state.flags = headerContents.flags_state;
2682 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2687 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2693 energyhistory_t enerhist;
2694 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2695 headerContents.flags_enh, &enerhist, out);
2699 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2700 headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2705 edsamhistory_t edsamhist = {};
2706 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2711 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2712 headerContents.flags_awhh, state.awhHistory.get(), out);
2717 swaphistory_t swaphist = {};
2718 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2723 std::vector<gmx_file_position_t> outputfiles;
2724 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2729 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2736 if (gmx_fio_close(fp) != 0)
2738 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2742 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2744 read_checkpoint_simulation_part_and_filenames(t_fileio *fp,
2745 int *simulation_part,
2746 std::vector<gmx_file_position_t> *outputfiles)
2748 gmx_int64_t step = 0;
2752 read_checkpoint_data(fp, simulation_part, &step, &t, &state, outputfiles);
2753 if (gmx_fio_close(fp) != 0)
2755 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");