2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
49 #if GMX_NATIVE_WINDOWS
51 #include <sys/locking.h>
57 #include "buildinfo.h"
58 #include "gromacs/compat/make_unique.h"
59 #include "gromacs/fileio/filetypes.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/gmxfio-xdr.h"
62 #include "gromacs/fileio/xdr_datatype.h"
63 #include "gromacs/fileio/xdrf.h"
64 #include "gromacs/gmxlib/network.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vecdump.h"
67 #include "gromacs/math/vectypes.h"
68 #include "gromacs/mdtypes/awh-correlation-history.h"
69 #include "gromacs/mdtypes/awh-history.h"
70 #include "gromacs/mdtypes/commrec.h"
71 #include "gromacs/mdtypes/df_history.h"
72 #include "gromacs/mdtypes/edsamhistory.h"
73 #include "gromacs/mdtypes/energyhistory.h"
74 #include "gromacs/mdtypes/inputrec.h"
75 #include "gromacs/mdtypes/md_enums.h"
76 #include "gromacs/mdtypes/observableshistory.h"
77 #include "gromacs/mdtypes/state.h"
78 #include "gromacs/mdtypes/swaphistory.h"
79 #include "gromacs/trajectory/trajectoryframe.h"
80 #include "gromacs/utility/arrayref.h"
81 #include "gromacs/utility/baseversion.h"
82 #include "gromacs/utility/cstringutil.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/futil.h"
85 #include "gromacs/utility/gmxassert.h"
86 #include "gromacs/utility/int64_to_int.h"
87 #include "gromacs/utility/programcontext.h"
88 #include "gromacs/utility/smalloc.h"
89 #include "gromacs/utility/sysinfo.h"
90 #include "gromacs/utility/txtdump.h"
96 #define CPT_MAGIC1 171817
97 #define CPT_MAGIC2 171819
98 #define CPTSTRLEN 1024
100 /* cpt_version should normally only be changed
101 * when the header or footer format changes.
102 * The state data format itself is backward and forward compatible.
103 * But old code can not read a new entry that is present in the file
104 * (but can read a new format when new entries are not present).
106 static const int cpt_version = 17;
109 const char *est_names[estNR] =
112 "box", "box-rel", "box-v", "pres_prev",
113 "nosehoover-xi", "thermostat-integral",
114 "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
115 "disre_initf", "disre_rm3tav",
116 "orire_initf", "orire_Dtav",
117 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
122 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
125 static const char *eeks_names[eeksNR] =
127 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
128 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
132 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
133 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
134 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
135 eenhENERGY_DELTA_H_NN,
136 eenhENERGY_DELTA_H_LIST,
137 eenhENERGY_DELTA_H_STARTTIME,
138 eenhENERGY_DELTA_H_STARTLAMBDA,
142 static const char *eenh_names[eenhNR] =
144 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
145 "energy_sum_sim", "energy_nsum_sim",
146 "energy_nsteps", "energy_nsteps_sim",
148 "energy_delta_h_list",
149 "energy_delta_h_start_time",
150 "energy_delta_h_start_lambda"
153 /* free energy history variables -- need to be preserved over checkpoint */
155 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
156 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
158 /* free energy history variable names */
159 static const char *edfh_names[edfhNR] =
161 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
162 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
165 /* AWH biasing history variables */
168 eawhhEQUILIBRATEHISTOGRAM,
171 eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
173 eawhhLOGSCALEDSAMPLEWEIGHT,
175 eawhhFORCECORRELATIONGRID,
179 static const char *eawhh_names[eawhhNR] =
182 "awh_equilibrateHistogram",
185 "awh_coordpoint", "awh_umbrellaGridpoint",
187 "awh_logScaledSampleWeight",
189 "awh_forceCorrelationGrid"
192 //! Higher level vector element type, only used for formatting checkpoint dumps
193 enum class CptElementType
195 integer, //!< integer
196 real, //!< float or double, not linked to precision of type real
197 real3, //!< float[3] or double[3], not linked to precision of type real
198 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
201 //! \brief Parts of the checkpoint state, only used for reporting
204 microState, //!< The microstate of the simulated system
205 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
206 energyHistory, //!< Energy observable statistics
207 freeEnergyHistory, //!< Free-energy state and observable statistics
208 accWeightHistogram //!< Accelerated weight histogram method state
211 //! \brief Return the name of a checkpoint entry based on part and part entry
212 static const char *entryName(StatePart part, int ecpt)
216 case StatePart::microState: return est_names [ecpt];
217 case StatePart::kineticEnergy: return eeks_names[ecpt];
218 case StatePart::energyHistory: return eenh_names[ecpt];
219 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
220 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
226 static void cp_warning(FILE *fp)
228 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
231 [[ noreturn ]] static void cp_error()
233 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
236 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
238 char *data = s.data();
239 if (xdr_string(xd, &data, s.size()) == 0)
245 fprintf(list, "%s = %s\n", desc, data);
249 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
251 if (xdr_int(xd, i) == 0)
257 fprintf(list, "%s = %d\n", desc, *i);
262 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
266 fprintf(list, "%s = ", desc);
269 for (int j = 0; j < n && res; j++)
271 res &= xdr_u_char(xd, &i[j]);
274 fprintf(list, "%02x", i[j]);
289 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
291 if (do_cpt_int(xd, desc, i, list) < 0)
297 static void do_cpt_step_err(XDR *xd, const char *desc, int64_t *i, FILE *list)
299 char buf[STEPSTRSIZE];
301 if (xdr_int64(xd, i) == 0)
307 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
311 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
313 if (xdr_double(xd, f) == 0)
319 fprintf(list, "%s = %f\n", desc, *f);
323 static void do_cpt_real_err(XDR *xd, real *f)
326 bool_t res = xdr_double(xd, f);
328 bool_t res = xdr_float(xd, f);
336 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
338 for (int i = 0; i < n; i++)
340 for (int j = 0; j < DIM; j++)
342 do_cpt_real_err(xd, &f[i][j]);
348 pr_rvecs(list, 0, desc, f, n);
352 template <typename T>
360 static const int value = xdr_datatype_int;
364 struct xdr_type<float>
366 static const int value = xdr_datatype_float;
370 struct xdr_type<double>
372 static const int value = xdr_datatype_double;
375 //! \brief Returns size in byte of an xdr_datatype
376 static inline unsigned int sizeOfXdrType(int xdrType)
380 case xdr_datatype_int:
382 case xdr_datatype_float:
383 return sizeof(float);
384 case xdr_datatype_double:
385 return sizeof(double);
386 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
392 //! \brief Returns the XDR process function for i/o of an XDR type
393 static inline xdrproc_t xdrProc(int xdrType)
397 case xdr_datatype_int:
398 return reinterpret_cast<xdrproc_t>(xdr_int);
399 case xdr_datatype_float:
400 return reinterpret_cast<xdrproc_t>(xdr_float);
401 case xdr_datatype_double:
402 return reinterpret_cast<xdrproc_t>(xdr_double);
403 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
409 /*! \brief Lists or only reads an xdr vector from checkpoint file
411 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
412 * The header for the print is set by \p part and \p ecpt.
413 * The formatting of the printing is set by \p cptElementType.
414 * When list==NULL only reads the elements.
416 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
417 FILE *list, CptElementType cptElementType)
421 const unsigned int elemSize = sizeOfXdrType(xdrType);
422 std::vector<char> data(nf*elemSize);
423 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
429 case xdr_datatype_int:
430 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
432 case xdr_datatype_float:
434 if (cptElementType == CptElementType::real3)
436 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
441 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
442 pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
445 case xdr_datatype_double:
447 if (cptElementType == CptElementType::real3)
449 pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
454 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
455 pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
458 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
465 //! \brief Convert a double array, typed char*, to float
466 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
468 const double *d = reinterpret_cast<const double *>(c);
469 for (int i = 0; i < n; i++)
471 v[i] = static_cast<float>(d[i]);
475 //! \brief Convert a float array, typed char*, to double
476 static void convertArrayRealPrecision(const char *c, double *v, int n)
478 const float *f = reinterpret_cast<const float *>(c);
479 for (int i = 0; i < n; i++)
481 v[i] = static_cast<double>(f[i]);
485 //! \brief Generate an error for trying to convert to integer
486 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
488 GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
491 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
493 * This is the only routine that does the actually i/o of real vector,
494 * all other routines are intermediate level routines for specific real
495 * data types, calling this routine.
496 * Currently this routine is (too) complex, since it handles both real *
497 * and std::vector<real>. Using real * is deprecated and this routine
498 * will simplify a lot when only std::vector needs to be supported.
500 * The routine is generic to vectors with different allocators,
501 * because as part of reading a checkpoint there are vectors whose
502 * size is not known until reading has progressed far enough, so a
503 * resize method must be called.
505 * When not listing, we use either v or vector, depending on which is !=NULL.
506 * If nval >= 0, nval is used; on read this should match the passed value.
507 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
508 * the value is stored in nptr
510 template<typename T, typename AllocatorType>
511 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
513 T **v, std::vector<T, AllocatorType> *vector,
514 FILE *list, CptElementType cptElementType)
516 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
520 int numElemInTheFile;
525 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
526 numElemInTheFile = nval;
532 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
533 numElemInTheFile = *nptr;
537 numElemInTheFile = vector->size();
541 /* Read/write the vector element count */
542 res = xdr_int(xd, &numElemInTheFile);
547 /* Read/write the element data type */
548 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
549 int xdrTypeInTheFile = xdrTypeInTheCode;
550 res = xdr_int(xd, &xdrTypeInTheFile);
556 if (list == nullptr && (sflags & (1 << ecpt)))
560 if (numElemInTheFile != nval)
562 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
565 else if (nptr != nullptr)
567 *nptr = numElemInTheFile;
570 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
574 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
575 entryName(part, ecpt),
576 xdr_datatype_names[xdrTypeInTheCode],
577 xdr_datatype_names[xdrTypeInTheFile]);
579 /* Matching int and real should never occur, but check anyhow */
580 if (xdrTypeInTheFile == xdr_datatype_int ||
581 xdrTypeInTheCode == xdr_datatype_int)
583 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
592 snew(*v, numElemInTheFile);
598 /* This conditional ensures that we don't resize on write.
599 * In particular in the state where this code was written
600 * PaddedRVecVector has a size of numElemInThefile and we
601 * don't want to lose that padding here.
603 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
605 vector->resize(numElemInTheFile);
613 vChar = reinterpret_cast<char *>(vp);
617 snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
619 res = xdr_vector(xd, vChar,
620 numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
621 xdrProc(xdrTypeInTheFile));
629 /* In the old code float-double conversion came for free.
630 * In the new code we still support it, mainly because
631 * the tip4p_continue regression test makes use of this.
632 * It's an open question if we do or don't want to allow this.
634 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
640 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
641 list, cptElementType);
647 //! \brief Read/Write a std::vector.
648 template <typename T>
649 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
650 std::vector<T> *vector, FILE *list)
652 return doVectorLow<T>(xd, part, ecpt, sflags, -1, nullptr, nullptr, vector, list, CptElementType::real);
655 //! \brief Read/Write an ArrayRef<real>.
656 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
657 gmx::ArrayRef<real> vector, FILE *list)
659 real *v_real = vector.data();
660 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
663 //! \brief Read/Write a vector whose value_type is RVec and whose allocator is \c AllocatorType.
664 template <typename AllocatorType>
665 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
666 std::vector<gmx::RVec, AllocatorType> *v, int numAtoms, FILE *list)
668 const int numReals = numAtoms*DIM;
672 GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
673 GMX_RELEASE_ASSERT(v->size() >= static_cast<size_t>(numAtoms), "v should have sufficient size for numAtoms");
675 real *v_real = (*v)[0];
677 // PaddedRVecVector is padded beyond numAtoms, we should only write
679 gmx::ArrayRef<real> ref(v_real, v_real + numReals);
681 return doRealArrayRef(xd, part, ecpt, sflags, ref, list);
685 // Use the rebind facility to change the value_type of the
686 // allocator from RVec to real.
687 using realAllocator = typename AllocatorType::template rebind<real>::other;
688 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
692 /* This function stores n along with the reals for reading,
693 * but on reading it assumes that n matches the value in the checkpoint file,
694 * a fatal error is generated when this is not the case.
696 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
697 int n, real **v, FILE *list)
699 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
702 /* This function does the same as do_cpte_reals,
703 * except that on reading it ignores the passed value of *n
704 * and stores the value read from the checkpoint file in *n.
706 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
707 int *n, real **v, FILE *list)
709 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
712 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
715 return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
718 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
719 int n, int **v, FILE *list)
721 return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
724 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
727 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
730 static int do_cpte_bool(XDR *xd, StatePart part, int ecpt, int sflags,
734 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
739 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
740 int n, double **v, FILE *list)
742 return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
745 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
746 double *r, FILE *list)
748 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
751 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
752 matrix v, FILE *list)
758 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
759 DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
761 if (list && ret == 0)
763 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
770 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
771 int n, real **v, FILE *list)
775 char name[CPTSTRLEN];
782 for (i = 0; i < n; i++)
784 reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
785 if (list && reti == 0)
787 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
788 pr_reals(list, 0, name, v[i], n);
798 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
799 int n, matrix **v, FILE *list)
802 matrix *vp, *va = nullptr;
808 res = xdr_int(xd, &nf);
813 if (list == nullptr && nf != n)
815 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
817 if (list || !(sflags & (1<<ecpt)))
830 snew(vr, nf*DIM*DIM);
831 for (i = 0; i < nf; i++)
833 for (j = 0; j < DIM; j++)
835 for (k = 0; k < DIM; k++)
837 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
841 ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
842 nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
843 CptElementType::matrix3x3);
844 for (i = 0; i < nf; i++)
846 for (j = 0; j < DIM; j++)
848 for (k = 0; k < DIM; k++)
850 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
856 if (list && ret == 0)
858 for (i = 0; i < nf; i++)
860 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
871 // TODO Expand this into being a container of all data for
872 // serialization of a checkpoint, which can be stored by the caller
873 // (e.g. so that mdrun doesn't have to open the checkpoint twice).
874 // This will separate issues of allocation from those of
875 // serialization, help separate comparison from reading, and have
876 // better defined transformation functions to/from trajectory frame
878 struct CheckpointHeaderContents
881 char version[CPTSTRLEN];
882 char btime[CPTSTRLEN];
883 char buser[CPTSTRLEN];
884 char bhost[CPTSTRLEN];
886 char fprog[CPTSTRLEN];
887 char ftime[CPTSTRLEN];
909 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
910 CheckpointHeaderContents *contents)
923 res = xdr_int(xd, &magic);
926 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
928 if (magic != CPT_MAGIC1)
930 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
931 "The checkpoint file is corrupted or not a checkpoint file",
937 gmx_gethostname(fhost, 255);
939 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
940 do_cpt_string_err(xd, "GROMACS build time", contents->btime, list);
941 do_cpt_string_err(xd, "GROMACS build user", contents->buser, list);
942 do_cpt_string_err(xd, "GROMACS build host", contents->bhost, list);
943 do_cpt_string_err(xd, "generating program", contents->fprog, list);
944 do_cpt_string_err(xd, "generation time", contents->ftime, list);
945 contents->file_version = cpt_version;
946 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
947 if (contents->file_version > cpt_version)
949 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
951 if (contents->file_version >= 13)
953 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
957 contents->double_prec = -1;
959 if (contents->file_version >= 12)
961 do_cpt_string_err(xd, "generating host", fhost, list);
963 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
964 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
965 if (contents->file_version >= 10)
967 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
971 contents->nhchainlength = 1;
973 if (contents->file_version >= 11)
975 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
979 contents->nnhpres = 0;
981 if (contents->file_version >= 14)
983 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
987 contents->nlambda = 0;
989 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
990 if (contents->file_version >= 3)
992 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
996 contents->simulation_part = 1;
998 if (contents->file_version >= 5)
1000 do_cpt_step_err(xd, "step", &contents->step, list);
1005 do_cpt_int_err(xd, "step", &idum, list);
1006 contents->step = static_cast<int64_t>(idum);
1008 do_cpt_double_err(xd, "t", &contents->t, list);
1009 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1010 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1011 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1012 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1013 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1014 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1015 if (contents->file_version >= 4)
1017 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1018 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1022 contents->flags_eks = 0;
1023 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV+1));
1024 contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1025 (1<<(estORIRE_DTAV+2)) |
1026 (1<<(estORIRE_DTAV+3))));
1028 if (contents->file_version >= 14)
1030 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1034 contents->flags_dfh = 0;
1037 if (contents->file_version >= 15)
1039 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1046 if (contents->file_version >= 16)
1048 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1052 contents->eSwapCoords = eswapNO;
1055 if (contents->file_version >= 17)
1057 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1061 contents->flags_awhh = 0;
1065 static int do_cpt_footer(XDR *xd, int file_version)
1070 if (file_version >= 2)
1073 res = xdr_int(xd, &magic);
1078 if (magic != CPT_MAGIC2)
1087 static int do_cpt_state(XDR *xd,
1088 int fflags, t_state *state,
1092 const StatePart part = StatePart::microState;
1093 const int sflags = state->flags;
1094 // If reading, state->natoms was probably just read, so
1095 // allocations need to be managed. If writing, this won't change
1096 // anything that matters.
1097 state_change_natoms(state, state->natoms);
1098 for (int i = 0; (i < estNR && ret == 0); i++)
1100 if (fflags & (1<<i))
1104 case estLAMBDA: ret = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1105 case estFEPSTATE: ret = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1106 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1107 case estBOX_REL: ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1108 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1109 case estPRES_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1110 case estSVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1111 case estFVIR_PREV: ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1112 case estNH_XI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1113 case estNH_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1114 case estNHPRES_XI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1115 case estNHPRES_VXI: ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1116 case estTHERM_INT: ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1117 case estBAROS_INT: ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1118 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1119 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1120 case estX: ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1121 case estV: ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1122 /* The RNG entries are no longer written,
1123 * the next 4 lines are only for reading old files.
1125 case estLD_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1126 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1127 case estMC_RNG_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1128 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1129 case estDISRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1130 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1131 case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1132 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1134 gmx_fatal(FARGS, "Unknown state entry %d\n"
1135 "You are reading a checkpoint file written by different code, which is not supported", i);
1143 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1148 const StatePart part = StatePart::kineticEnergy;
1149 for (int i = 0; (i < eeksNR && ret == 0); i++)
1151 if (fflags & (1<<i))
1156 case eeksEKIN_N: ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1157 case eeksEKINH: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1158 case eeksEKINF: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1159 case eeksEKINO: ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1160 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1161 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1162 case eeksVSCALE: ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1163 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1164 case eeksDEKINDL: ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1165 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1167 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1168 "You are probably reading a new checkpoint file with old code", i);
1177 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1178 int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1180 int swap_cpt_version = 2;
1182 if (eSwapCoords == eswapNO)
1187 swapstate->bFromCpt = bRead;
1188 swapstate->eSwapCoords = eSwapCoords;
1190 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1191 if (bRead && swap_cpt_version < 2)
1193 gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1194 "of the ion/water position swapping protocol.\n");
1197 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1199 /* When reading, init_swapcoords has not been called yet,
1200 * so we have to allocate memory first. */
1201 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1204 snew(swapstate->ionType, swapstate->nIonTypes);
1207 for (int ic = 0; ic < eCompNR; ic++)
1209 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1211 swapstateIons_t *gs = &swapstate->ionType[ii];
1215 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1219 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1224 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1228 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1231 if (bRead && (nullptr == gs->nMolPast[ic]) )
1233 snew(gs->nMolPast[ic], swapstate->nAverage);
1236 for (int j = 0; j < swapstate->nAverage; j++)
1240 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1244 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1250 /* Ion flux per channel */
1251 for (int ic = 0; ic < eChanNR; ic++)
1253 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1255 swapstateIons_t *gs = &swapstate->ionType[ii];
1259 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1263 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1268 /* Ion flux leakage */
1271 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1275 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1279 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1281 swapstateIons_t *gs = &swapstate->ionType[ii];
1283 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1287 snew(gs->channel_label, gs->nMol);
1288 snew(gs->comp_from, gs->nMol);
1291 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1292 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1295 /* Save the last known whole positions to checkpoint
1296 * file to be able to also make multimeric channels whole in PBC */
1297 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1298 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1301 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1302 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1303 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1304 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1308 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1309 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1316 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1317 int fflags, energyhistory_t *enerhist,
1327 GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1329 /* This is stored/read for backward compatibility */
1330 int energyHistoryNumEnergies = 0;
1333 enerhist->nsteps = 0;
1335 enerhist->nsteps_sim = 0;
1336 enerhist->nsum_sim = 0;
1338 else if (enerhist != nullptr)
1340 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1343 delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1344 const StatePart part = StatePart::energyHistory;
1345 for (int i = 0; (i < eenhNR && ret == 0); i++)
1347 if (fflags & (1<<i))
1351 case eenhENERGY_N: ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1352 case eenhENERGY_AVER: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1353 case eenhENERGY_SUM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1354 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1355 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1356 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1357 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1358 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1359 case eenhENERGY_DELTA_H_NN:
1362 if (!bRead && deltaH != nullptr)
1364 numDeltaH = deltaH->dh.size();
1366 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1369 if (deltaH == nullptr)
1371 enerhist->deltaHForeignLambdas = gmx::compat::make_unique<delta_h_history_t>();
1372 deltaH = enerhist->deltaHForeignLambdas.get();
1374 deltaH->dh.resize(numDeltaH);
1375 deltaH->start_lambda_set = FALSE;
1379 case eenhENERGY_DELTA_H_LIST:
1380 for (auto dh : deltaH->dh)
1382 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1385 case eenhENERGY_DELTA_H_STARTTIME:
1386 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1387 case eenhENERGY_DELTA_H_STARTLAMBDA:
1388 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1390 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1391 "You are probably reading a new checkpoint file with old code", i);
1396 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1398 /* Assume we have an old file format and copy sum to sum_sim */
1399 enerhist->ener_sum_sim = enerhist->ener_sum;
1402 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1403 !(fflags & (1<<eenhENERGY_NSTEPS)))
1405 /* Assume we have an old file format and copy nsum to nsteps */
1406 enerhist->nsteps = enerhist->nsum;
1408 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1409 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1411 /* Assume we have an old file format and copy nsum to nsteps */
1412 enerhist->nsteps_sim = enerhist->nsum_sim;
1418 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1427 if (*dfhistPtr == nullptr)
1429 snew(*dfhistPtr, 1);
1430 (*dfhistPtr)->nlambda = nlambda;
1431 init_df_history(*dfhistPtr, nlambda);
1433 df_history_t *dfhist = *dfhistPtr;
1435 const StatePart part = StatePart::freeEnergyHistory;
1436 for (int i = 0; (i < edfhNR && ret == 0); i++)
1438 if (fflags & (1<<i))
1442 case edfhBEQUIL: ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list); break;
1443 case edfhNATLAMBDA: ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1444 case edfhWLHISTO: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1445 case edfhWLDELTA: ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1446 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1447 case edfhSUMDG: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1448 case edfhSUMMINVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1449 case edfhSUMVAR: ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1450 case edfhACCUMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1451 case edfhACCUMM: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1452 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1453 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1454 case edfhTIJ: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1455 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1458 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1459 "You are probably reading a new checkpoint file with old code", i);
1468 /* This function stores the last whole configuration of the reference and
1469 * average structure in the .cpt file
1471 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1472 int nED, edsamhistory_t *EDstate, FILE *list)
1479 EDstate->bFromCpt = bRead;
1482 /* When reading, init_edsam has not been called yet,
1483 * so we have to allocate memory first. */
1486 snew(EDstate->nref, EDstate->nED);
1487 snew(EDstate->old_sref, EDstate->nED);
1488 snew(EDstate->nav, EDstate->nED);
1489 snew(EDstate->old_sav, EDstate->nED);
1492 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1493 for (int i = 0; i < EDstate->nED; i++)
1497 /* Reference structure SREF */
1498 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1499 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1500 sprintf(buf, "ED%d x_ref", i+1);
1503 snew(EDstate->old_sref[i], EDstate->nref[i]);
1504 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1508 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1511 /* Average structure SAV */
1512 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1513 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1514 sprintf(buf, "ED%d x_av", i+1);
1517 snew(EDstate->old_sav[i], EDstate->nav[i]);
1518 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1522 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1529 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1530 gmx::CorrelationGridHistory *corrGrid,
1531 FILE *list, int eawhh)
1535 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1536 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1537 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1541 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1544 for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1546 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1547 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1548 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1549 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1550 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1551 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1552 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1553 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1554 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1555 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1556 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1562 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1563 int fflags, gmx::AwhBiasHistory *biasHistory,
1568 gmx::AwhBiasStateHistory *state = &biasHistory->state;
1569 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1571 if (fflags & (1<<i))
1575 case eawhhIN_INITIAL:
1576 do_cpt_int_err(xd, eawhh_names[i], &state->in_initial, list); break;
1577 case eawhhEQUILIBRATEHISTOGRAM:
1578 do_cpt_int_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1580 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1586 numPoints = biasHistory->pointState.size();
1588 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1591 biasHistory->pointState.resize(numPoints);
1595 case eawhhCOORDPOINT:
1596 for (auto &psh : biasHistory->pointState)
1598 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1599 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1600 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1601 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1602 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1603 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1604 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1605 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1606 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1607 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1608 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1611 case eawhhUMBRELLAGRIDPOINT:
1612 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1613 case eawhhUPDATELIST:
1614 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1615 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1617 case eawhhLOGSCALEDSAMPLEWEIGHT:
1618 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1619 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1621 case eawhhNUMUPDATES:
1622 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1624 case eawhhFORCECORRELATIONGRID:
1625 ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1628 gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1636 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1637 int fflags, gmx::AwhHistory *awhHistory,
1644 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1646 if (awhHistory == nullptr)
1648 GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1650 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
1651 awhHistory = awhHistoryLocal.get();
1654 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1655 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1656 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1657 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1658 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1659 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1661 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1662 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1667 numBias = awhHistory->bias.size();
1669 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1673 awhHistory->bias.resize(numBias);
1675 for (auto &bias : awhHistory->bias)
1677 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1683 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1688 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1689 std::vector<gmx_file_position_t> *outputfiles,
1690 FILE *list, int file_version)
1693 gmx_off_t mask = 0xFFFFFFFFL;
1694 int offset_high, offset_low;
1695 std::array<char, CPTSTRLEN> buf;
1696 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1698 // Ensure that reading pre-allocates outputfiles, while writing
1699 // writes what is already there.
1700 int nfiles = outputfiles->size();
1701 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1707 outputfiles->resize(nfiles);
1710 for (auto &outputfile : *outputfiles)
1712 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1715 do_cpt_string_err(xd, "output filename", buf, list);
1716 std::strncpy(outputfile.filename, buf.data(), buf.size()-1);
1718 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1722 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1726 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1730 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1732 offset = outputfile.offset;
1740 offset_low = static_cast<int>(offset & mask);
1741 offset_high = static_cast<int>((offset >> 32) & mask);
1743 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1747 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1752 if (file_version >= 8)
1754 if (do_cpt_int(xd, "file_checksum_size", &outputfile.chksum_size,
1759 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfile.chksum, list) != 0)
1766 outputfile.chksum_size = -1;
1773 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1774 FILE *fplog, const t_commrec *cr,
1775 ivec domdecCells, int nppnodes,
1776 int eIntegrator, int simulation_part,
1777 gmx_bool bExpanded, int elamstats,
1778 int64_t step, double t,
1779 t_state *state, ObservablesHistory *observablesHistory)
1782 char *fntemp; /* the temporary checkpoint file name */
1784 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1787 if (DOMAINDECOMP(cr))
1789 npmenodes = cr->npmenodes;
1797 /* make the new temporary filename */
1798 snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1799 std::strcpy(fntemp, fn);
1800 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1801 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1802 std::strcat(fntemp, suffix);
1803 std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1805 /* if we can't rename, we just overwrite the cpt file.
1806 * dangerous if interrupted.
1808 snew(fntemp, std::strlen(fn));
1809 std::strcpy(fntemp, fn);
1811 char timebuf[STRLEN];
1812 gmx_format_current_time(timebuf, STRLEN);
1816 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1817 gmx_step_str(step, buf), timebuf);
1820 /* Get offsets for open files */
1821 auto outputfiles = gmx_fio_get_output_file_positions();
1823 fp = gmx_fio_open(fntemp, "w");
1826 if (state->ekinstate.bUpToDate)
1829 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1830 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1831 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1838 energyhistory_t *enerhist = observablesHistory->energyHistory.get();
1840 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
1842 flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
1843 if (enerhist->nsum > 0)
1845 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1846 (1<<eenhENERGY_NSUM));
1848 if (enerhist->nsum_sim > 0)
1850 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
1852 if (enerhist->deltaHForeignLambdas != nullptr)
1854 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1855 (1<< eenhENERGY_DELTA_H_LIST) |
1856 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1857 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1864 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1865 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1868 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1870 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1872 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1873 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1882 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
1884 flags_awhh |= ( (1 << eawhhIN_INITIAL) |
1885 (1 << eawhhEQUILIBRATEHISTOGRAM) |
1886 (1 << eawhhHISTSIZE) |
1887 (1 << eawhhNPOINTS) |
1888 (1 << eawhhCOORDPOINT) |
1889 (1 << eawhhUMBRELLAGRIDPOINT) |
1890 (1 << eawhhUPDATELIST) |
1891 (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
1892 (1 << eawhhNUMUPDATES) |
1893 (1 << eawhhFORCECORRELATIONGRID));
1896 /* We can check many more things now (CPU, acceleration, etc), but
1897 * it is highly unlikely to have two separate builds with exactly
1898 * the same version, user, time, and build host!
1901 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
1903 edsamhistory_t *edsamhist = observablesHistory->edsamHistory.get();
1904 int nED = (edsamhist ? edsamhist->nED : 0);
1906 swaphistory_t *swaphist = observablesHistory->swapHistory.get();
1907 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
1909 CheckpointHeaderContents headerContents =
1911 0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
1912 eIntegrator, simulation_part, step, t, nppnodes,
1914 state->natoms, state->ngtc, state->nnhpres,
1915 state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh, flags_dfh, flags_awhh,
1918 std::strcpy(headerContents.version, gmx_version());
1919 std::strcpy(headerContents.btime, BUILD_TIME);
1920 std::strcpy(headerContents.buser, BUILD_USER);
1921 std::strcpy(headerContents.bhost, BUILD_HOST);
1922 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
1923 std::strcpy(headerContents.ftime, timebuf);
1924 if (DOMAINDECOMP(cr))
1926 copy_ivec(domdecCells, headerContents.dd_nc);
1929 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
1931 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0) ||
1932 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
1933 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0) ||
1934 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0) ||
1935 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0) ||
1936 (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
1937 (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
1938 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
1939 headerContents.file_version) < 0))
1941 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1944 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
1946 /* we really, REALLY, want to make sure to physically write the checkpoint,
1947 and all the files it depends on, out to disk. Because we've
1948 opened the checkpoint with gmx_fio_open(), it's in our list
1950 ret = gmx_fio_all_output_fsync();
1956 "Cannot fsync '%s'; maybe you are out of disk space?",
1957 gmx_fio_getname(ret));
1959 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
1965 gmx_warning("%s", buf);
1969 if (gmx_fio_close(fp) != 0)
1971 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1974 /* we don't move the checkpoint if the user specified they didn't want it,
1975 or if the fsyncs failed */
1977 if (!bNumberAndKeep && !ret)
1981 /* Rename the previous checkpoint file */
1982 std::strcpy(buf, fn);
1983 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1984 std::strcat(buf, "_prev");
1985 std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1987 /* we copy here so that if something goes wrong between now and
1988 * the rename below, there's always a state.cpt.
1989 * If renames are atomic (such as in POSIX systems),
1990 * this copying should be unneccesary.
1992 gmx_file_copy(fn, buf, FALSE);
1993 /* We don't really care if this fails:
1994 * there's already a new checkpoint.
1997 gmx_file_rename(fn, buf);
2000 if (gmx_file_rename(fntemp, fn) != 0)
2002 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2005 #endif /* GMX_NO_RENAME */
2010 /*code for alternate checkpointing scheme. moved from top of loop over
2012 fcRequestCheckPoint();
2013 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2015 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2017 #endif /* end GMX_FAHCORE block */
2020 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2022 bool foundMismatch = (p != f);
2030 fprintf(fplog, " %s mismatch,\n", type);
2031 fprintf(fplog, " current program: %d\n", p);
2032 fprintf(fplog, " checkpoint file: %d\n", f);
2033 fprintf(fplog, "\n");
2037 static void check_string(FILE *fplog, const char *type, const char *p,
2038 const char *f, gmx_bool *mm)
2040 bool foundMismatch = (std::strcmp(p, f) != 0);
2048 fprintf(fplog, " %s mismatch,\n", type);
2049 fprintf(fplog, " current program: %s\n", p);
2050 fprintf(fplog, " checkpoint file: %s\n", f);
2051 fprintf(fplog, "\n");
2055 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2056 const CheckpointHeaderContents &headerContents,
2057 gmx_bool reproducibilityRequested)
2059 /* Note that this check_string on the version will also print a message
2060 * when only the minor version differs. But we only print a warning
2061 * message further down with reproducibilityRequested=TRUE.
2063 gmx_bool versionDiffers = FALSE;
2064 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2066 gmx_bool precisionDiffers = FALSE;
2067 check_int (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2068 if (precisionDiffers)
2070 const char msg_precision_difference[] =
2071 "You are continuing a simulation with a different precision. Not matching\n"
2072 "single/double precision will lead to precision or performance loss.\n";
2075 fprintf(fplog, "%s\n", msg_precision_difference);
2079 gmx_bool mm = (versionDiffers || precisionDiffers);
2081 if (reproducibilityRequested)
2083 check_string(fplog, "Build time", BUILD_TIME, headerContents.btime, &mm);
2084 check_string(fplog, "Build user", BUILD_USER, headerContents.buser, &mm);
2085 check_string(fplog, "Build host", BUILD_HOST, headerContents.bhost, &mm);
2086 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2088 check_int (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2091 if (cr->nnodes > 1 && reproducibilityRequested)
2093 check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2095 int npp = cr->nnodes;
2096 if (cr->npmenodes >= 0)
2098 npp -= cr->npmenodes;
2100 int npp_f = headerContents.nnodes;
2101 if (headerContents.npme >= 0)
2103 npp_f -= headerContents.npme;
2107 check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2108 check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2109 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2115 /* Gromacs should be able to continue from checkpoints between
2116 * different patch level versions, but we do not guarantee
2117 * compatibility between different major/minor versions - check this.
2121 sscanf(gmx_version(), "%5d", &gmx_major);
2122 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2123 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2125 const char msg_major_version_difference[] =
2126 "The current GROMACS major version is not identical to the one that\n"
2127 "generated the checkpoint file. In principle GROMACS does not support\n"
2128 "continuation from checkpoints between different versions, so we advise\n"
2129 "against this. If you still want to try your luck we recommend that you use\n"
2130 "the -noappend flag to keep your output files from the two versions separate.\n"
2131 "This might also work around errors where the output fields in the energy\n"
2132 "file have changed between the different versions.\n";
2134 const char msg_mismatch_notice[] =
2135 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2136 "Continuation is exact, but not guaranteed to be binary identical.\n";
2138 if (majorVersionDiffers)
2142 fprintf(fplog, "%s\n", msg_major_version_difference);
2145 else if (reproducibilityRequested)
2147 /* Major & minor versions match at least, but something is different. */
2150 fprintf(fplog, "%s\n", msg_mismatch_notice);
2156 static void read_checkpoint(const char *fn, FILE **pfplog,
2157 const t_commrec *cr,
2160 int *init_fep_state,
2161 CheckpointHeaderContents *headerContents,
2162 t_state *state, gmx_bool *bReadEkin,
2163 ObservablesHistory *observablesHistory,
2164 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend,
2165 gmx_bool reproducibilityRequested)
2169 char buf[STEPSTRSIZE];
2171 t_fileio *chksum_file;
2172 FILE * fplog = *pfplog;
2173 unsigned char digest[16];
2174 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2175 struct flock fl; /* don't initialize here: the struct order is OS
2179 #if !defined __native_client__ && !GMX_NATIVE_WINDOWS
2180 fl.l_type = F_WRLCK;
2181 fl.l_whence = SEEK_SET;
2187 fp = gmx_fio_open(fn, "r");
2188 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2190 if (bAppendOutputFiles &&
2191 headerContents->file_version >= 13 && headerContents->double_prec != GMX_DOUBLE)
2193 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
2196 /* This will not be written if we do appending, since fplog is still NULL then */
2199 fprintf(fplog, "\n");
2200 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2201 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2202 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2203 fprintf(fplog, " GROMACS build time: %s\n", headerContents->btime);
2204 fprintf(fplog, " GROMACS build user: %s\n", headerContents->buser);
2205 fprintf(fplog, " GROMACS build host: %s\n", headerContents->bhost);
2206 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2207 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2208 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2209 fprintf(fplog, " time: %f\n", headerContents->t);
2210 fprintf(fplog, "\n");
2213 if (headerContents->natoms != state->natoms)
2215 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2217 if (headerContents->ngtc != state->ngtc)
2219 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2221 if (headerContents->nnhpres != state->nnhpres)
2223 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2226 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2227 if (headerContents->nlambda != nlambdaHistory)
2229 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2232 init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2233 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2235 if (headerContents->eIntegrator != eIntegrator)
2237 gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2240 if (headerContents->flags_state != state->flags)
2242 gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2247 check_match(fplog, cr, dd_nc, *headerContents,
2248 reproducibilityRequested);
2251 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2252 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
2253 Investigate for 5.0. */
2258 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2263 *bReadEkin = ((headerContents->flags_eks & (1<<eeksEKINH)) ||
2264 (headerContents->flags_eks & (1<<eeksEKINF)) ||
2265 (headerContents->flags_eks & (1<<eeksEKINO)) ||
2266 ((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2267 (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2268 (headerContents->flags_eks & (1<<eeksVSCALE))));
2270 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2272 observablesHistory->energyHistory = gmx::compat::make_unique<energyhistory_t>();
2274 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2275 headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2281 if (headerContents->file_version < 6)
2283 gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2286 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2292 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2294 observablesHistory->edsamHistory = gmx::compat::make_unique<edsamhistory_t>(edsamhistory_t {});
2296 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2302 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2304 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2306 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2307 headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2313 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2315 observablesHistory->swapHistory = gmx::compat::make_unique<swaphistory_t>(swaphistory_t {});
2317 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2323 std::vector<gmx_file_position_t> outputfiles;
2324 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2330 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2335 if (gmx_fio_close(fp) != 0)
2337 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2340 /* If the user wants to append to output files,
2341 * we use the file pointer positions of the output files stored
2342 * in the checkpoint file and truncate the files such that any frames
2343 * written after the checkpoint time are removed.
2344 * All files are md5sum checked such that we can be sure that
2345 * we do not truncate other (maybe imprortant) files.
2347 if (bAppendOutputFiles)
2349 if (outputfiles.empty())
2351 gmx_fatal(FARGS, "No names of output files were recorded in the checkpoint");
2353 if (fn2ftp(outputfiles[0].filename) != efLOG)
2355 /* make sure first file is log file so that it is OK to use it for
2358 gmx_fatal(FARGS, "The first output file should always be the log "
2359 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2361 bool firstFile = true;
2362 for (const auto &outputfile : outputfiles)
2364 if (outputfile.offset < 0)
2366 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2367 "is larger than 2 GB, but mdrun did not support large file"
2368 " offsets. Can not append. Run mdrun with -noappend",
2369 outputfile.filename);
2372 chksum_file = gmx_fio_open(outputfile.filename, "a");
2375 chksum_file = gmx_fio_open(outputfile.filename, "r+");
2380 /* Note that there are systems where the lock operation
2381 * will succeed, but a second process can also lock the file.
2382 * We should probably try to detect this.
2384 #if defined __native_client__
2388 #elif GMX_NATIVE_WINDOWS
2389 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2391 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2394 if (errno == ENOSYS)
2398 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2404 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfile.filename);
2408 else if (errno == EACCES || errno == EAGAIN)
2410 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2411 "simulation?", outputfile.filename);
2415 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2416 outputfile.filename, std::strerror(errno));
2421 /* compute md5 chksum */
2422 if (outputfile.chksum_size != -1)
2424 if (gmx_fio_get_file_md5(chksum_file, outputfile.offset,
2425 digest) != outputfile.chksum_size) /*at the end of the call the file position is at the end of the file*/
2427 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2428 outputfile.chksum_size,
2429 outputfile.filename);
2432 /* log file needs to be seeked in case we need to truncate
2433 * (other files are truncated below)*/
2436 if (gmx_fio_seek(chksum_file, outputfile.offset))
2438 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", std::strerror(errno));
2443 /* open log file here - so that lock is never lifted
2444 * after chksum is calculated */
2447 *pfplog = gmx_fio_getfp(chksum_file);
2451 gmx_fio_close(chksum_file);
2454 /* compare md5 chksum */
2455 if (outputfile.chksum_size != -1 &&
2456 memcmp(digest, outputfile.chksum, 16) != 0)
2460 fprintf(debug, "chksum for %s: ", outputfile.filename);
2461 for (j = 0; j < 16; j++)
2463 fprintf(debug, "%02x", digest[j]);
2465 fprintf(debug, "\n");
2467 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2468 outputfile.filename);
2473 if (!firstFile) /* log file is already seeked to correct position */
2475 #if !GMX_NATIVE_WINDOWS || !defined(GMX_FAHCORE)
2476 /* For FAHCORE, we do this elsewhere*/
2477 rc = gmx_truncate(outputfile.filename, outputfile.offset);
2480 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfile.filename);
2490 void load_checkpoint(const char *fn, FILE **fplog,
2491 const t_commrec *cr, const ivec dd_nc,
2492 t_inputrec *ir, t_state *state,
2493 gmx_bool *bReadEkin,
2494 ObservablesHistory *observablesHistory,
2495 gmx_bool bAppend, gmx_bool bForceAppend,
2496 gmx_bool reproducibilityRequested)
2498 CheckpointHeaderContents headerContents;
2501 /* Read the state from the checkpoint file */
2502 read_checkpoint(fn, fplog,
2504 ir->eI, &(ir->fepvals->init_fep_state),
2506 state, bReadEkin, observablesHistory,
2507 bAppend, bForceAppend,
2508 reproducibilityRequested);
2512 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2513 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2515 ir->bContinuation = TRUE;
2516 if (ir->nsteps >= 0)
2518 ir->nsteps += ir->init_step - headerContents.step;
2520 ir->init_step = headerContents.step;
2521 ir->simulation_part = headerContents.simulation_part + 1;
2524 void read_checkpoint_part_and_step(const char *filename,
2525 int *simulation_part,
2530 if (filename == nullptr ||
2531 !gmx_fexist(filename) ||
2532 (!(fp = gmx_fio_open(filename, "r"))))
2534 *simulation_part = 0;
2539 CheckpointHeaderContents headerContents;
2540 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2542 *simulation_part = headerContents.simulation_part;
2543 *step = headerContents.step;
2546 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2547 int64_t *step, double *t, t_state *state,
2548 std::vector<gmx_file_position_t> *outputfiles)
2552 CheckpointHeaderContents headerContents;
2553 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2554 *simulation_part = headerContents.simulation_part;
2555 *step = headerContents.step;
2556 *t = headerContents.t;
2557 state->natoms = headerContents.natoms;
2558 state->ngtc = headerContents.ngtc;
2559 state->nnhpres = headerContents.nnhpres;
2560 state->nhchainlength = headerContents.nhchainlength;
2561 state->flags = headerContents.flags_state;
2563 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2568 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2574 energyhistory_t enerhist;
2575 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2576 headerContents.flags_enh, &enerhist, nullptr);
2581 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2587 edsamhistory_t edsamhist = {};
2588 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2594 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2595 headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2601 swaphistory_t swaphist = {};
2602 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2608 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2610 nullptr, headerContents.file_version);
2617 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2624 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2627 int simulation_part;
2631 std::vector<gmx_file_position_t> outputfiles;
2632 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, &outputfiles);
2634 fr->natoms = state.natoms;
2636 fr->step = int64_to_int(step,
2637 "conversion of checkpoint to trajectory");
2641 fr->lambda = state.lambda[efptFEP];
2642 fr->fep_state = state.fep_state;
2644 fr->bX = (state.flags & (1<<estX));
2647 fr->x = makeRvecArray(state.x, state.natoms);
2649 fr->bV = (state.flags & (1<<estV));
2652 fr->v = makeRvecArray(state.v, state.natoms);
2655 fr->bBox = (state.flags & (1<<estBOX));
2658 copy_mat(state.box, fr->box);
2662 void list_checkpoint(const char *fn, FILE *out)
2669 fp = gmx_fio_open(fn, "r");
2670 CheckpointHeaderContents headerContents;
2671 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2672 state.natoms = headerContents.natoms;
2673 state.ngtc = headerContents.ngtc;
2674 state.nnhpres = headerContents.nnhpres;
2675 state.nhchainlength = headerContents.nhchainlength;
2676 state.flags = headerContents.flags_state;
2677 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2682 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2688 energyhistory_t enerhist;
2689 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2690 headerContents.flags_enh, &enerhist, out);
2694 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2695 headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2700 edsamhistory_t edsamhist = {};
2701 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2706 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2707 headerContents.flags_awhh, state.awhHistory.get(), out);
2712 swaphistory_t swaphist = {};
2713 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2718 std::vector<gmx_file_position_t> outputfiles;
2719 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2724 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2731 if (gmx_fio_close(fp) != 0)
2733 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2737 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2739 read_checkpoint_simulation_part_and_filenames(t_fileio *fp,
2740 int *simulation_part,
2741 std::vector<gmx_file_position_t> *outputfiles)
2747 read_checkpoint_data(fp, simulation_part, &step, &t, &state, outputfiles);
2748 if (gmx_fio_close(fp) != 0)
2750 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");