2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
40 #include "checkpoint.h"
51 #include "buildinfo.h"
52 #include "gromacs/fileio/filetypes.h"
53 #include "gromacs/fileio/gmxfio.h"
54 #include "gromacs/fileio/gmxfio_xdr.h"
55 #include "gromacs/fileio/xdr_datatype.h"
56 #include "gromacs/fileio/xdrf.h"
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/math/vecdump.h"
60 #include "gromacs/math/vectypes.h"
61 #include "gromacs/mdtypes/awh_correlation_history.h"
62 #include "gromacs/mdtypes/awh_history.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/df_history.h"
65 #include "gromacs/mdtypes/edsamhistory.h"
66 #include "gromacs/mdtypes/energyhistory.h"
67 #include "gromacs/mdtypes/inputrec.h"
68 #include "gromacs/mdtypes/md_enums.h"
69 #include "gromacs/mdtypes/mdrunoptions.h"
70 #include "gromacs/mdtypes/observableshistory.h"
71 #include "gromacs/mdtypes/pullhistory.h"
72 #include "gromacs/mdtypes/state.h"
73 #include "gromacs/mdtypes/swaphistory.h"
74 #include "gromacs/trajectory/trajectoryframe.h"
75 #include "gromacs/utility/arrayref.h"
76 #include "gromacs/utility/baseversion.h"
77 #include "gromacs/utility/cstringutil.h"
78 #include "gromacs/utility/fatalerror.h"
79 #include "gromacs/utility/futil.h"
80 #include "gromacs/utility/gmxassert.h"
81 #include "gromacs/utility/int64_to_int.h"
82 #include "gromacs/utility/keyvaluetree.h"
83 #include "gromacs/utility/keyvaluetreebuilder.h"
84 #include "gromacs/utility/keyvaluetreeserializer.h"
85 #include "gromacs/utility/mdmodulenotification.h"
86 #include "gromacs/utility/programcontext.h"
87 #include "gromacs/utility/smalloc.h"
88 #include "gromacs/utility/sysinfo.h"
89 #include "gromacs/utility/txtdump.h"
92 # include "corewrap.h"
95 #define CPT_MAGIC1 171817
96 #define CPT_MAGIC2 171819
98 /*! \brief Enum of values that describe the contents of a cpt file
99 * whose format matches a version number
101 * The enum helps the code be more self-documenting and ensure merges
102 * do not silently resolve when two patches make the same bump. When
103 * adding new functionality, add a new element just above cptv_Count
104 * in this enumeration, and write code below that does the right thing
105 * according to the value of file_version.
109 cptv_Unknown = 17, /**< Version before numbering scheme */
110 cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
111 cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
112 cptv_PullAverage, /**< Added possibility to output average pull force and position */
113 cptv_MdModules, /**< Added checkpointing for MdModules */
114 cptv_Count /**< the total number of cptv versions */
117 /*! \brief Version number of the file format written to checkpoint
118 * files by this version of the code.
120 * cpt_version should normally only be changed, via adding a new field
121 * to cptv enumeration, when the header or footer format changes.
123 * The state data format itself is backward and forward compatible.
124 * But old code can not read a new entry that is present in the file
125 * (but can read a new format when new entries are not present).
127 * The cpt_version increases whenever the file format in the main
128 * development branch changes, due to an extension of the cptv enum above.
129 * Backward compatibility for reading old run input files is maintained
130 * by checking this version number against that of the file and then using
131 * the correct code path. */
132 static const int cpt_version = cptv_Count - 1;
135 const char* est_names[estNR] = { "FE-lambda",
141 "thermostat-integral",
146 "LD-rng-unsupported",
147 "LD-rng-i-unsupported",
160 "MC-rng-unsupported",
161 "MC-rng-i-unsupported",
162 "barostat-integral" };
179 static const char* eeks_names[eeksNR] = { "Ekin_n", "Ekinh", "dEkindlambda",
180 "mv_cos", "Ekinf", "Ekinh_old",
181 "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC",
193 eenhENERGY_NSTEPS_SIM,
194 eenhENERGY_DELTA_H_NN,
195 eenhENERGY_DELTA_H_LIST,
196 eenhENERGY_DELTA_H_STARTTIME,
197 eenhENERGY_DELTA_H_STARTLAMBDA,
203 epullhPULL_NUMCOORDINATES,
204 epullhPULL_NUMGROUPS,
205 epullhPULL_NUMVALUESINXSUM,
206 epullhPULL_NUMVALUESINFSUM,
212 epullcoordh_VALUE_REF_SUM,
213 epullcoordh_VALUE_SUM,
214 epullcoordh_DR01_SUM,
215 epullcoordh_DR23_SUM,
216 epullcoordh_DR45_SUM,
217 epullcoordh_FSCAL_SUM,
218 epullcoordh_DYNAX_SUM,
228 static const char* eenh_names[eenhNR] = { "energy_n",
237 "energy_delta_h_list",
238 "energy_delta_h_start_time",
239 "energy_delta_h_start_lambda" };
241 static const char* ePullhNames[epullhNR] = { "pullhistory_numcoordinates", "pullhistory_numgroups",
242 "pullhistory_numvaluesinxsum",
243 "pullhistory_numvaluesinfsum" };
245 /* free energy history variables -- need to be preserved over checkpoint */
264 /* free energy history variable names */
265 static const char* edfh_names[edfhNR] = { "bEquilibrated",
267 "Wang-Landau Histogram",
275 "accumulated_plus_2",
276 "accumulated_minus_2",
280 /* AWH biasing history variables */
284 eawhhEQUILIBRATEHISTOGRAM,
288 eawhhUMBRELLAGRIDPOINT,
290 eawhhLOGSCALEDSAMPLEWEIGHT,
292 eawhhFORCECORRELATIONGRID,
296 static const char* eawhh_names[eawhhNR] = { "awh_in_initial", "awh_equilibrateHistogram",
297 "awh_histsize", "awh_npoints",
298 "awh_coordpoint", "awh_umbrellaGridpoint",
299 "awh_updatelist", "awh_logScaledSampleWeight",
300 "awh_numupdates", "awh_forceCorrelationGrid" };
308 static const char* epull_prev_step_com_names[epullsNR] = { "Pull groups prev step COM" };
311 //! Higher level vector element type, only used for formatting checkpoint dumps
312 enum class CptElementType
314 integer, //!< integer
315 real, //!< float or double, not linked to precision of type real
316 real3, //!< float[3] or double[3], not linked to precision of type real
317 matrix3x3 //!< float[3][3] or double[3][3], not linked to precision of type real
320 //! \brief Parts of the checkpoint state, only used for reporting
323 microState, //!< The microstate of the simulated system
324 kineticEnergy, //!< Kinetic energy, needed for T/P-coupling state
325 energyHistory, //!< Energy observable statistics
326 freeEnergyHistory, //!< Free-energy state and observable statistics
327 accWeightHistogram, //!< Accelerated weight histogram method state
328 pullState, //!< COM of previous step.
329 pullHistory //!< Pull history statistics (sums since last written output)
332 //! \brief Return the name of a checkpoint entry based on part and part entry
333 static const char* entryName(StatePart part, int ecpt)
337 case StatePart::microState: return est_names[ecpt];
338 case StatePart::kineticEnergy: return eeks_names[ecpt];
339 case StatePart::energyHistory: return eenh_names[ecpt];
340 case StatePart::freeEnergyHistory: return edfh_names[ecpt];
341 case StatePart::accWeightHistogram: return eawhh_names[ecpt];
342 case StatePart::pullState: return epull_prev_step_com_names[ecpt];
343 case StatePart::pullHistory: return ePullhNames[ecpt];
349 static void cp_warning(FILE* fp)
351 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
354 [[noreturn]] static void cp_error()
356 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
359 static void do_cpt_string_err(XDR* xd, const char* desc, gmx::ArrayRef<char> s, FILE* list)
361 char* data = s.data();
362 if (xdr_string(xd, &data, s.size()) == 0)
368 fprintf(list, "%s = %s\n", desc, data);
372 static int do_cpt_int(XDR* xd, const char* desc, int* i, FILE* list)
374 if (xdr_int(xd, i) == 0)
380 fprintf(list, "%s = %d\n", desc, *i);
385 static int do_cpt_u_chars(XDR* xd, const char* desc, int n, unsigned char* i, FILE* list)
389 fprintf(list, "%s = ", desc);
392 for (int j = 0; j < n && res; j++)
394 res &= xdr_u_char(xd, &i[j]);
397 fprintf(list, "%02x", i[j]);
412 static void do_cpt_int_err(XDR* xd, const char* desc, int* i, FILE* list)
414 if (do_cpt_int(xd, desc, i, list) < 0)
420 static void do_cpt_bool_err(XDR* xd, const char* desc, bool* b, FILE* list)
422 int i = static_cast<int>(*b);
424 if (do_cpt_int(xd, desc, &i, list) < 0)
432 static void do_cpt_step_err(XDR* xd, const char* desc, int64_t* i, FILE* list)
434 char buf[STEPSTRSIZE];
436 if (xdr_int64(xd, i) == 0)
442 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
446 static void do_cpt_double_err(XDR* xd, const char* desc, double* f, FILE* list)
448 if (xdr_double(xd, f) == 0)
454 fprintf(list, "%s = %f\n", desc, *f);
458 static void do_cpt_real_err(XDR* xd, real* f)
461 bool_t res = xdr_double(xd, f);
463 bool_t res = xdr_float(xd, f);
471 static void do_cpt_n_rvecs_err(XDR* xd, const char* desc, int n, rvec f[], FILE* list)
473 for (int i = 0; i < n; i++)
475 for (int j = 0; j < DIM; j++)
477 do_cpt_real_err(xd, &f[i][j]);
483 pr_rvecs(list, 0, desc, f, n);
495 static const int value = xdr_datatype_int;
499 struct xdr_type<float>
501 static const int value = xdr_datatype_float;
505 struct xdr_type<double>
507 static const int value = xdr_datatype_double;
510 //! \brief Returns size in byte of an xdr_datatype
511 static inline unsigned int sizeOfXdrType(int xdrType)
515 case xdr_datatype_int: return sizeof(int);
516 case xdr_datatype_float: return sizeof(float);
517 case xdr_datatype_double: return sizeof(double);
518 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
524 //! \brief Returns the XDR process function for i/o of an XDR type
525 static inline xdrproc_t xdrProc(int xdrType)
529 case xdr_datatype_int: return reinterpret_cast<xdrproc_t>(xdr_int);
530 case xdr_datatype_float: return reinterpret_cast<xdrproc_t>(xdr_float);
531 case xdr_datatype_double: return reinterpret_cast<xdrproc_t>(xdr_double);
532 default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
538 /*! \brief Lists or only reads an xdr vector from checkpoint file
540 * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
541 * The header for the print is set by \p part and \p ecpt.
542 * The formatting of the printing is set by \p cptElementType.
543 * When list==NULL only reads the elements.
545 static bool_t listXdrVector(XDR* xd, StatePart part, int ecpt, int nf, int xdrType, FILE* list, CptElementType cptElementType)
549 const unsigned int elemSize = sizeOfXdrType(xdrType);
550 std::vector<char> data(nf * elemSize);
551 res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
557 case xdr_datatype_int:
558 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int*>(data.data()), nf, TRUE);
560 case xdr_datatype_float:
562 if (cptElementType == CptElementType::real3)
564 pr_rvecs(list, 0, entryName(part, ecpt),
565 reinterpret_cast<const rvec*>(data.data()), nf / 3);
570 /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
571 pr_fvec(list, 0, entryName(part, ecpt),
572 reinterpret_cast<const float*>(data.data()), nf, TRUE);
575 case xdr_datatype_double:
577 if (cptElementType == CptElementType::real3)
579 pr_rvecs(list, 0, entryName(part, ecpt),
580 reinterpret_cast<const rvec*>(data.data()), nf / 3);
585 /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
586 pr_dvec(list, 0, entryName(part, ecpt),
587 reinterpret_cast<const double*>(data.data()), nf, TRUE);
590 default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
597 //! \brief Convert a double array, typed char*, to float
598 gmx_unused static void convertArrayRealPrecision(const char* c, float* v, int n)
600 const double* d = reinterpret_cast<const double*>(c);
601 for (int i = 0; i < n; i++)
603 v[i] = static_cast<float>(d[i]);
607 //! \brief Convert a float array, typed char*, to double
608 static void convertArrayRealPrecision(const char* c, double* v, int n)
610 const float* f = reinterpret_cast<const float*>(c);
611 for (int i = 0; i < n; i++)
613 v[i] = static_cast<double>(f[i]);
617 //! \brief Generate an error for trying to convert to integer
618 static void convertArrayRealPrecision(const char gmx_unused* c, int gmx_unused* v, int gmx_unused n)
620 GMX_RELEASE_ASSERT(false,
621 "We only expect type mismatches between float and double, not integer");
624 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
626 * This is the only routine that does the actually i/o of real vector,
627 * all other routines are intermediate level routines for specific real
628 * data types, calling this routine.
629 * Currently this routine is (too) complex, since it handles both real *
630 * and std::vector<real>. Using real * is deprecated and this routine
631 * will simplify a lot when only std::vector needs to be supported.
633 * The routine is generic to vectors with different allocators,
634 * because as part of reading a checkpoint there are vectors whose
635 * size is not known until reading has progressed far enough, so a
636 * resize method must be called.
638 * When not listing, we use either v or vector, depending on which is !=NULL.
639 * If nval >= 0, nval is used; on read this should match the passed value.
640 * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
641 * the value is stored in nptr
643 template<typename T, typename AllocatorType>
644 static int doVectorLow(XDR* xd,
651 std::vector<T, AllocatorType>* vector,
653 CptElementType cptElementType)
655 GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr)
656 || (v == nullptr && vector != nullptr),
657 "Without list, we should have exactly one of v and vector != NULL");
661 int numElemInTheFile;
666 GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
667 numElemInTheFile = nval;
673 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
674 numElemInTheFile = *nptr;
678 numElemInTheFile = vector->size();
682 /* Read/write the vector element count */
683 res = xdr_int(xd, &numElemInTheFile);
688 /* Read/write the element data type */
689 constexpr int xdrTypeInTheCode = xdr_type<T>::value;
690 int xdrTypeInTheFile = xdrTypeInTheCode;
691 res = xdr_int(xd, &xdrTypeInTheFile);
697 if (list == nullptr && (sflags & (1 << ecpt)))
701 if (numElemInTheFile != nval)
704 "Count mismatch for state entry %s, code count is %d, file count is %d\n",
705 entryName(part, ecpt), nval, numElemInTheFile);
708 else if (nptr != nullptr)
710 *nptr = numElemInTheFile;
713 bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
717 sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
718 entryName(part, ecpt), xdr_datatype_names[xdrTypeInTheCode],
719 xdr_datatype_names[xdrTypeInTheFile]);
721 /* Matching int and real should never occur, but check anyhow */
722 if (xdrTypeInTheFile == xdr_datatype_int || xdrTypeInTheCode == xdr_datatype_int)
725 "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
734 snew(*v, numElemInTheFile);
740 /* This conditional ensures that we don't resize on write.
741 * In particular in the state where this code was written
742 * vector has a size of numElemInThefile and we
743 * don't want to lose that padding here.
745 if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
747 vector->resize(numElemInTheFile);
755 vChar = reinterpret_cast<char*>(vp);
759 snew(vChar, numElemInTheFile * sizeOfXdrType(xdrTypeInTheFile));
761 res = xdr_vector(xd, vChar, numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
762 xdrProc(xdrTypeInTheFile));
770 /* In the old code float-double conversion came for free.
771 * In the new code we still support it, mainly because
772 * the tip4p_continue regression test makes use of this.
773 * It's an open question if we do or don't want to allow this.
775 convertArrayRealPrecision(vChar, vp, numElemInTheFile);
781 res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile, list, cptElementType);
787 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
790 doVector(XDR* xd, StatePart part, int ecpt, int sflags, std::vector<T>* vector, FILE* list, int numElements = -1)
792 return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list,
793 CptElementType::real);
796 //! \brief Read/Write an ArrayRef<real>.
797 static int doRealArrayRef(XDR* xd, StatePart part, int ecpt, int sflags, gmx::ArrayRef<real> vector, FILE* list)
799 real* v_real = vector.data();
800 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, vector.size(), nullptr,
801 &v_real, nullptr, list, CptElementType::real);
804 //! Convert from view of RVec to view of real.
805 static gmx::ArrayRef<real> realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
807 return gmx::arrayRefFromArray<real>(reinterpret_cast<real*>(ofRVecs.data()), ofRVecs.size() * DIM);
810 //! \brief Read/Write a PaddedVector whose value_type is RVec.
811 template<typename PaddedVectorOfRVecType>
813 doRvecVector(XDR* xd, StatePart part, int ecpt, int sflags, PaddedVectorOfRVecType* v, int numAtoms, FILE* list)
815 const int numReals = numAtoms * DIM;
820 sflags & (1 << ecpt),
821 "When not listing, the flag for the entry should be set when requesting i/o");
822 GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
824 return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
828 // Use the rebind facility to change the value_type of the
829 // allocator from RVec to real.
830 using realAllocator =
831 typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
832 return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr,
833 nullptr, list, CptElementType::real);
837 /* This function stores n along with the reals for reading,
838 * but on reading it assumes that n matches the value in the checkpoint file,
839 * a fatal error is generated when this is not the case.
841 static int do_cpte_reals(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
843 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
844 list, CptElementType::real);
847 /* This function does the same as do_cpte_reals,
848 * except that on reading it ignores the passed value of *n
849 * and stores the value read from the checkpoint file in *n.
851 static int do_cpte_n_reals(XDR* xd, StatePart part, int ecpt, int sflags, int* n, real** v, FILE* list)
853 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list,
854 CptElementType::real);
857 static int do_cpte_real(XDR* xd, StatePart part, int ecpt, int sflags, real* r, FILE* list)
859 return doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr,
860 list, CptElementType::real);
863 static int do_cpte_ints(XDR* xd, StatePart part, int ecpt, int sflags, int n, int** v, FILE* list)
865 return doVectorLow<int, std::allocator<int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr,
866 list, CptElementType::integer);
869 static int do_cpte_int(XDR* xd, StatePart part, int ecpt, int sflags, int* i, FILE* list)
871 return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
874 static int do_cpte_bool(XDR* xd, StatePart part, int ecpt, int sflags, bool* b, FILE* list)
876 int i = static_cast<int>(*b);
877 int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
882 static int do_cpte_doubles(XDR* xd, StatePart part, int ecpt, int sflags, int n, double** v, FILE* list)
884 return doVectorLow<double, std::allocator<double>>(xd, part, ecpt, sflags, n, nullptr, v,
885 nullptr, list, CptElementType::real);
888 static int do_cpte_double(XDR* xd, StatePart part, int ecpt, int sflags, double* r, FILE* list)
890 return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
893 static int do_cpte_matrix(XDR* xd, StatePart part, int ecpt, int sflags, matrix v, FILE* list)
899 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, DIM * DIM, nullptr, &vr,
900 nullptr, nullptr, CptElementType::matrix3x3);
902 if (list && ret == 0)
904 pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
911 static int do_cpte_nmatrix(XDR* xd, StatePart part, int ecpt, int sflags, int n, real** v, FILE* list)
915 char name[CPTSTRLEN];
922 for (i = 0; i < n; i++)
924 reti = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]),
925 nullptr, nullptr, CptElementType::matrix3x3);
926 if (list && reti == 0)
928 sprintf(name, "%s[%d]", entryName(part, ecpt), i);
929 pr_reals(list, 0, name, v[i], n);
939 static int do_cpte_matrices(XDR* xd, StatePart part, int ecpt, int sflags, int n, matrix** v, FILE* list)
942 matrix *vp, *va = nullptr;
948 res = xdr_int(xd, &nf);
953 if (list == nullptr && nf != n)
955 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n",
956 entryName(part, ecpt), n, nf);
958 if (list || !(sflags & (1 << ecpt)))
971 snew(vr, nf * DIM * DIM);
972 for (i = 0; i < nf; i++)
974 for (j = 0; j < DIM; j++)
976 for (k = 0; k < DIM; k++)
978 vr[(i * DIM + j) * DIM + k] = vp[i][j][k];
982 ret = doVectorLow<real, std::allocator<real>>(xd, part, ecpt, sflags, nf * DIM * DIM, nullptr,
983 &vr, nullptr, nullptr, CptElementType::matrix3x3);
984 for (i = 0; i < nf; i++)
986 for (j = 0; j < DIM; j++)
988 for (k = 0; k < DIM; k++)
990 vp[i][j][k] = vr[(i * DIM + j) * DIM + k];
996 if (list && ret == 0)
998 for (i = 0; i < nf; i++)
1000 pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
1011 static void do_cpt_header(XDR* xd, gmx_bool bRead, FILE* list, CheckpointHeaderContents* contents)
1024 res = xdr_int(xd, &magic);
1028 "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
1030 if (magic != CPT_MAGIC1)
1033 "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
1034 "The checkpoint file is corrupted or not a checkpoint file",
1040 gmx_gethostname(fhost, 255);
1042 do_cpt_string_err(xd, "GROMACS version", contents->version, list);
1043 // The following fields are no longer ever written with meaningful
1044 // content, but because they precede the file version, there is no
1045 // good way for new code to read the old and new formats, nor a
1046 // good way for old code to avoid giving an error while reading a
1047 // new format. So we read and write a field that no longer has a
1049 do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
1050 do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
1051 do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
1052 do_cpt_string_err(xd, "generating program", contents->fprog, list);
1053 do_cpt_string_err(xd, "generation time", contents->ftime, list);
1054 contents->file_version = cpt_version;
1055 do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
1056 if (contents->file_version > cpt_version)
1059 "Attempting to read a checkpoint file of version %d with code of version %d\n",
1060 contents->file_version, cpt_version);
1062 if (contents->file_version >= 13)
1064 do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
1068 contents->double_prec = -1;
1070 if (contents->file_version >= 12)
1072 do_cpt_string_err(xd, "generating host", fhost, list);
1074 do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1075 do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1076 if (contents->file_version >= 10)
1078 do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1082 contents->nhchainlength = 1;
1084 if (contents->file_version >= 11)
1086 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1090 contents->nnhpres = 0;
1092 if (contents->file_version >= 14)
1094 do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1098 contents->nlambda = 0;
1100 do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1101 if (contents->file_version >= 3)
1103 do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1107 contents->simulation_part = 1;
1109 if (contents->file_version >= 5)
1111 do_cpt_step_err(xd, "step", &contents->step, list);
1116 do_cpt_int_err(xd, "step", &idum, list);
1117 contents->step = static_cast<int64_t>(idum);
1119 do_cpt_double_err(xd, "t", &contents->t, list);
1120 do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1121 do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1122 do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1123 do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1124 do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1125 do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1126 if (contents->file_version >= 4)
1128 do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1129 do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1133 contents->flags_eks = 0;
1134 contents->flags_enh = (contents->flags_state >> (estORIRE_DTAV + 1));
1135 contents->flags_state = (contents->flags_state
1136 & ~((1 << (estORIRE_DTAV + 1)) | (1 << (estORIRE_DTAV + 2))
1137 | (1 << (estORIRE_DTAV + 3))));
1139 if (contents->file_version >= 14)
1141 do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1145 contents->flags_dfh = 0;
1148 if (contents->file_version >= 15)
1150 do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1157 if (contents->file_version >= 16)
1159 do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1163 contents->eSwapCoords = eswapNO;
1166 if (contents->file_version >= 17)
1168 do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1172 contents->flags_awhh = 0;
1175 if (contents->file_version >= 18)
1177 do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1181 contents->flagsPullHistory = 0;
1185 static int do_cpt_footer(XDR* xd, int file_version)
1190 if (file_version >= 2)
1193 res = xdr_int(xd, &magic);
1198 if (magic != CPT_MAGIC2)
1207 static int do_cpt_state(XDR* xd, int fflags, t_state* state, FILE* list)
1210 const StatePart part = StatePart::microState;
1211 const int sflags = state->flags;
1212 // If reading, state->natoms was probably just read, so
1213 // allocations need to be managed. If writing, this won't change
1214 // anything that matters.
1215 state_change_natoms(state, state->natoms);
1216 for (int i = 0; (i < estNR && ret == 0); i++)
1218 if (fflags & (1 << i))
1223 ret = doRealArrayRef(
1224 xd, part, i, sflags,
1225 gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()),
1229 ret = do_cpte_int(xd, part, i, sflags, &state->fep_state, list);
1231 case estBOX: ret = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1233 ret = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list);
1235 case estBOXV: ret = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1237 ret = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list);
1240 ret = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list);
1243 ret = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list);
1246 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list);
1249 ret = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list);
1252 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list);
1255 ret = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list);
1258 ret = doVector<double>(xd, part, i, sflags, &state->therm_integral, list);
1261 ret = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list);
1263 case estVETA: ret = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1264 case estVOL0: ret = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1266 ret = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list);
1269 ret = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list);
1271 /* The RNG entries are no longer written,
1272 * the next 4 lines are only for reading old files.
1274 case estLD_RNG_NOTSUPPORTED:
1275 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1277 case estLD_RNGI_NOTSUPPORTED:
1278 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1280 case estMC_RNG_NOTSUPPORTED:
1281 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1283 case estMC_RNGI_NOTSUPPORTED:
1284 ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
1286 case estDISRE_INITF:
1287 ret = do_cpte_real(xd, part, i, sflags, &state->hist.disre_initf, list);
1289 case estDISRE_RM3TAV:
1290 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs,
1291 &state->hist.disre_rm3tav, list);
1293 case estORIRE_INITF:
1294 ret = do_cpte_real(xd, part, i, sflags, &state->hist.orire_initf, list);
1297 ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav,
1298 &state->hist.orire_Dtav, list);
1300 case estPULLCOMPREVSTEP:
1301 ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list);
1305 "Unknown state entry %d\n"
1306 "You are reading a checkpoint file written by different code, which "
1315 static int do_cpt_ekinstate(XDR* xd, int fflags, ekinstate_t* ekins, FILE* list)
1319 const StatePart part = StatePart::kineticEnergy;
1320 for (int i = 0; (i < eeksNR && ret == 0); i++)
1322 if (fflags & (1 << i))
1328 ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list);
1331 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list);
1334 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list);
1337 ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list);
1340 ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list);
1342 case eeksEKINSCALEF:
1343 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list);
1346 ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list);
1348 case eeksEKINSCALEH:
1349 ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list);
1352 ret = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list);
1354 case eeksMVCOS: ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1357 "Unknown ekin data state entry %d\n"
1358 "You are probably reading a new checkpoint file with old code",
1368 static int do_cpt_swapstate(XDR* xd, gmx_bool bRead, int eSwapCoords, swaphistory_t* swapstate, FILE* list)
1370 int swap_cpt_version = 2;
1372 if (eSwapCoords == eswapNO)
1377 swapstate->bFromCpt = bRead;
1378 swapstate->eSwapCoords = eSwapCoords;
1380 do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1381 if (bRead && swap_cpt_version < 2)
1384 "Cannot read checkpoint files that were written with old versions"
1385 "of the ion/water position swapping protocol.\n");
1388 do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1390 /* When reading, init_swapcoords has not been called yet,
1391 * so we have to allocate memory first. */
1392 do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1395 snew(swapstate->ionType, swapstate->nIonTypes);
1398 for (int ic = 0; ic < eCompNR; ic++)
1400 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1402 swapstateIons_t* gs = &swapstate->ionType[ii];
1406 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1410 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1415 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1419 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1422 if (bRead && (nullptr == gs->nMolPast[ic]))
1424 snew(gs->nMolPast[ic], swapstate->nAverage);
1427 for (int j = 0; j < swapstate->nAverage; j++)
1431 do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1435 do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1441 /* Ion flux per channel */
1442 for (int ic = 0; ic < eChanNR; ic++)
1444 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1446 swapstateIons_t* gs = &swapstate->ionType[ii];
1450 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1454 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1459 /* Ion flux leakage */
1462 do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1466 do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1470 for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1472 swapstateIons_t* gs = &swapstate->ionType[ii];
1474 do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1478 snew(gs->channel_label, gs->nMol);
1479 snew(gs->comp_from, gs->nMol);
1482 do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1483 do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1486 /* Save the last known whole positions to checkpoint
1487 * file to be able to also make multimeric channels whole in PBC */
1488 do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1489 do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1492 snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1493 snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1494 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1495 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1499 do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0],
1500 *swapstate->xc_old_whole_p[eChan0], list);
1501 do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1],
1502 *swapstate->xc_old_whole_p[eChan1], list);
1509 static int do_cpt_enerhist(XDR* xd, gmx_bool bRead, int fflags, energyhistory_t* enerhist, FILE* list)
1518 GMX_RELEASE_ASSERT(enerhist != nullptr,
1519 "With energy history, we need a valid enerhist pointer");
1521 /* This is stored/read for backward compatibility */
1522 int energyHistoryNumEnergies = 0;
1525 enerhist->nsteps = 0;
1527 enerhist->nsteps_sim = 0;
1528 enerhist->nsum_sim = 0;
1530 else if (enerhist != nullptr)
1532 energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1535 delta_h_history_t* deltaH = enerhist->deltaHForeignLambdas.get();
1536 const StatePart part = StatePart::energyHistory;
1537 for (int i = 0; (i < eenhNR && ret == 0); i++)
1539 if (fflags & (1 << i))
1544 ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list);
1546 case eenhENERGY_AVER:
1547 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list);
1549 case eenhENERGY_SUM:
1550 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list);
1552 case eenhENERGY_NSUM:
1553 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list);
1555 case eenhENERGY_SUM_SIM:
1556 ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list);
1558 case eenhENERGY_NSUM_SIM:
1559 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list);
1561 case eenhENERGY_NSTEPS:
1562 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list);
1564 case eenhENERGY_NSTEPS_SIM:
1565 do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list);
1567 case eenhENERGY_DELTA_H_NN:
1570 if (!bRead && deltaH != nullptr)
1572 numDeltaH = deltaH->dh.size();
1574 do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1577 if (deltaH == nullptr)
1579 enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1580 deltaH = enerhist->deltaHForeignLambdas.get();
1582 deltaH->dh.resize(numDeltaH);
1583 deltaH->start_lambda_set = FALSE;
1587 case eenhENERGY_DELTA_H_LIST:
1588 for (auto dh : deltaH->dh)
1590 ret = doVector<real>(xd, part, i, fflags, &dh, list);
1593 case eenhENERGY_DELTA_H_STARTTIME:
1594 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list);
1596 case eenhENERGY_DELTA_H_STARTLAMBDA:
1597 ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list);
1601 "Unknown energy history entry %d\n"
1602 "You are probably reading a new checkpoint file with old code",
1608 if ((fflags & (1 << eenhENERGY_SUM)) && !(fflags & (1 << eenhENERGY_SUM_SIM)))
1610 /* Assume we have an old file format and copy sum to sum_sim */
1611 enerhist->ener_sum_sim = enerhist->ener_sum;
1614 if ((fflags & (1 << eenhENERGY_NSUM)) && !(fflags & (1 << eenhENERGY_NSTEPS)))
1616 /* Assume we have an old file format and copy nsum to nsteps */
1617 enerhist->nsteps = enerhist->nsum;
1619 if ((fflags & (1 << eenhENERGY_NSUM_SIM)) && !(fflags & (1 << eenhENERGY_NSTEPS_SIM)))
1621 /* Assume we have an old file format and copy nsum to nsteps */
1622 enerhist->nsteps_sim = enerhist->nsum_sim;
1628 static int doCptPullCoordHist(XDR* xd, PullCoordinateHistory* pullCoordHist, const StatePart part, FILE* list)
1633 flags |= ((1 << epullcoordh_VALUE_REF_SUM) | (1 << epullcoordh_VALUE_SUM)
1634 | (1 << epullcoordh_DR01_SUM) | (1 << epullcoordh_DR23_SUM)
1635 | (1 << epullcoordh_DR45_SUM) | (1 << epullcoordh_FSCAL_SUM));
1637 for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1641 case epullcoordh_VALUE_REF_SUM:
1642 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list);
1644 case epullcoordh_VALUE_SUM:
1645 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list);
1647 case epullcoordh_DR01_SUM:
1648 for (int j = 0; j < DIM && ret == 0; j++)
1650 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1653 case epullcoordh_DR23_SUM:
1654 for (int j = 0; j < DIM && ret == 0; j++)
1656 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1659 case epullcoordh_DR45_SUM:
1660 for (int j = 0; j < DIM && ret == 0; j++)
1662 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1665 case epullcoordh_FSCAL_SUM:
1666 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list);
1668 case epullcoordh_DYNAX_SUM:
1669 for (int j = 0; j < DIM && ret == 0; j++)
1671 ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1680 static int doCptPullGroupHist(XDR* xd, PullGroupHistory* pullGroupHist, const StatePart part, FILE* list)
1685 flags |= ((1 << epullgrouph_X_SUM));
1687 for (int i = 0; i < epullgrouph_NR; i++)
1691 case epullgrouph_X_SUM:
1692 for (int j = 0; j < DIM && ret == 0; j++)
1694 ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1704 static int doCptPullHist(XDR* xd, gmx_bool bRead, int fflags, PullHistory* pullHist, const StatePart part, FILE* list)
1707 int pullHistoryNumCoordinates = 0;
1708 int pullHistoryNumGroups = 0;
1710 /* Retain the number of terms in the sum and the number of coordinates (used for writing
1711 * average pull forces and coordinates) in the pull history, in temporary variables,
1712 * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1715 pullHist->numValuesInXSum = 0;
1716 pullHist->numValuesInFSum = 0;
1718 else if (pullHist != nullptr)
1720 pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1721 pullHistoryNumGroups = pullHist->pullGroupSums.size();
1725 GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1728 for (int i = 0; (i < epullhNR && ret == 0); i++)
1730 if (fflags & (1 << i))
1734 case epullhPULL_NUMCOORDINATES:
1735 ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list);
1737 case epullhPULL_NUMGROUPS:
1738 do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list);
1740 case epullhPULL_NUMVALUESINXSUM:
1741 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list);
1743 case epullhPULL_NUMVALUESINFSUM:
1744 do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list);
1748 "Unknown pull history entry %d\n"
1749 "You are probably reading a new checkpoint file with old code",
1756 pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1757 pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1759 if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1761 for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1763 ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1765 for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1767 ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1774 static int do_cpt_df_hist(XDR* xd, int fflags, int nlambda, df_history_t** dfhistPtr, FILE* list)
1783 if (*dfhistPtr == nullptr)
1785 snew(*dfhistPtr, 1);
1786 (*dfhistPtr)->nlambda = nlambda;
1787 init_df_history(*dfhistPtr, nlambda);
1789 df_history_t* dfhist = *dfhistPtr;
1791 const StatePart part = StatePart::freeEnergyHistory;
1792 for (int i = 0; (i < edfhNR && ret == 0); i++)
1794 if (fflags & (1 << i))
1799 ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list);
1802 ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list);
1805 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list);
1808 ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list);
1810 case edfhSUMWEIGHTS:
1811 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list);
1814 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list);
1817 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list);
1820 ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list);
1823 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list);
1826 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list);
1829 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list);
1832 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list);
1835 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list);
1838 ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list);
1843 "Unknown df history entry %d\n"
1844 "You are probably reading a new checkpoint file with old code",
1854 /* This function stores the last whole configuration of the reference and
1855 * average structure in the .cpt file
1857 static int do_cpt_EDstate(XDR* xd, gmx_bool bRead, int nED, edsamhistory_t* EDstate, FILE* list)
1864 EDstate->bFromCpt = bRead;
1867 /* When reading, init_edsam has not been called yet,
1868 * so we have to allocate memory first. */
1871 snew(EDstate->nref, EDstate->nED);
1872 snew(EDstate->old_sref, EDstate->nED);
1873 snew(EDstate->nav, EDstate->nED);
1874 snew(EDstate->old_sav, EDstate->nED);
1877 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1878 for (int i = 0; i < EDstate->nED; i++)
1882 /* Reference structure SREF */
1883 sprintf(buf, "ED%d # of atoms in reference structure", i + 1);
1884 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1885 sprintf(buf, "ED%d x_ref", i + 1);
1888 snew(EDstate->old_sref[i], EDstate->nref[i]);
1889 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1893 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1896 /* Average structure SAV */
1897 sprintf(buf, "ED%d # of atoms in average structure", i + 1);
1898 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1899 sprintf(buf, "ED%d x_av", i + 1);
1902 snew(EDstate->old_sav[i], EDstate->nav[i]);
1903 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1907 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1914 static int do_cpt_correlation_grid(XDR* xd,
1916 gmx_unused int fflags,
1917 gmx::CorrelationGridHistory* corrGrid,
1923 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1924 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1925 do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1929 initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize,
1930 corrGrid->blockDataListSize);
1933 for (gmx::CorrelationBlockDataHistory& blockData : corrGrid->blockDataBuffer)
1935 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1936 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1937 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1938 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1939 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1940 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1941 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1942 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1943 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1944 do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1945 do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1951 static int do_cpt_awh_bias(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhBiasHistory* biasHistory, FILE* list)
1955 gmx::AwhBiasStateHistory* state = &biasHistory->state;
1956 for (int i = 0; (i < eawhhNR && ret == 0); i++)
1958 if (fflags & (1 << i))
1962 case eawhhIN_INITIAL:
1963 do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list);
1965 case eawhhEQUILIBRATEHISTOGRAM:
1966 do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list);
1969 do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list);
1976 numPoints = biasHistory->pointState.size();
1978 do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1981 biasHistory->pointState.resize(numPoints);
1985 case eawhhCOORDPOINT:
1986 for (auto& psh : biasHistory->pointState)
1988 do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1989 do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1990 do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1991 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1992 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1993 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1994 do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1995 do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1996 do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1997 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1998 do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
2001 case eawhhUMBRELLAGRIDPOINT:
2002 do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list);
2004 case eawhhUPDATELIST:
2005 do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
2006 do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
2008 case eawhhLOGSCALEDSAMPLEWEIGHT:
2009 do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
2010 do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
2012 case eawhhNUMUPDATES:
2013 do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
2015 case eawhhFORCECORRELATIONGRID:
2016 ret = do_cpt_correlation_grid(xd, bRead, fflags,
2017 &biasHistory->forceCorrelationGrid, list, i);
2019 default: gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
2027 static int do_cpt_awh(XDR* xd, gmx_bool bRead, int fflags, gmx::AwhHistory* awhHistory, FILE* list)
2033 std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
2035 if (awhHistory == nullptr)
2037 GMX_RELEASE_ASSERT(bRead,
2038 "do_cpt_awh should not be called for writing without an AwhHistory");
2040 awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
2041 awhHistory = awhHistoryLocal.get();
2044 /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
2045 (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
2046 these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
2047 is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
2048 at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
2049 when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
2051 Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
2052 One difficulty is the multilevel structure of the data which would need to be represented somehow. */
2057 numBias = awhHistory->bias.size();
2059 do_cpt_int_err(xd, "awh_nbias", &numBias, list);
2063 awhHistory->bias.resize(numBias);
2065 for (auto& bias : awhHistory->bias)
2067 ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
2073 do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
2078 static void do_cpt_mdmodules(int fileVersion,
2079 t_fileio* checkpointFileHandle,
2080 const gmx::MdModulesNotifier& mdModulesNotifier)
2082 if (fileVersion >= cptv_MdModules)
2084 gmx::FileIOXdrSerializer serializer(checkpointFileHandle);
2085 gmx::KeyValueTreeObject mdModuleCheckpointParameterTree =
2086 gmx::deserializeKeyValueTree(&serializer);
2087 gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
2088 mdModuleCheckpointParameterTree, fileVersion
2090 mdModulesNotifier.notifier_.notify(mdModuleCheckpointReadingDataOnMaster);
2094 static int do_cpt_files(XDR* xd, gmx_bool bRead, std::vector<gmx_file_position_t>* outputfiles, FILE* list, int file_version)
2097 gmx_off_t mask = 0xFFFFFFFFL;
2098 int offset_high, offset_low;
2099 std::array<char, CPTSTRLEN> buf;
2100 GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
2102 // Ensure that reading pre-allocates outputfiles, while writing
2103 // writes what is already there.
2104 int nfiles = outputfiles->size();
2105 if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
2111 outputfiles->resize(nfiles);
2114 for (auto& outputfile : *outputfiles)
2116 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
2119 do_cpt_string_err(xd, "output filename", buf, list);
2120 std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
2122 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2126 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2130 outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32)
2131 | (static_cast<gmx_off_t>(offset_low) & mask);
2135 do_cpt_string_err(xd, "output filename", outputfile.filename, list);
2137 offset = outputfile.offset;
2145 offset_low = static_cast<int>(offset & mask);
2146 offset_high = static_cast<int>((offset >> 32) & mask);
2148 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
2152 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
2157 if (file_version >= 8)
2159 if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize, list) != 0)
2163 if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(),
2164 outputfile.checksum.data(), list)
2172 outputfile.checksumSize = -1;
2179 void write_checkpoint(const char* fn,
2180 gmx_bool bNumberAndKeep,
2182 const t_commrec* cr,
2186 int simulation_part,
2192 ObservablesHistory* observablesHistory,
2193 const gmx::MdModulesNotifier& mdModulesNotifier)
2196 char* fntemp; /* the temporary checkpoint file name */
2198 char buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
2201 if (DOMAINDECOMP(cr))
2203 npmenodes = cr->npmenodes;
2211 /* make the new temporary filename */
2212 snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
2213 std::strcpy(fntemp, fn);
2214 fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2215 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
2216 std::strcat(fntemp, suffix);
2217 std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2219 /* if we can't rename, we just overwrite the cpt file.
2220 * dangerous if interrupted.
2222 snew(fntemp, std::strlen(fn));
2223 std::strcpy(fntemp, fn);
2225 std::string timebuf = gmx_format_current_time();
2229 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
2232 /* Get offsets for open files */
2233 auto outputfiles = gmx_fio_get_output_file_positions();
2235 fp = gmx_fio_open(fntemp, "w");
2238 if (state->ekinstate.bUpToDate)
2240 flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF) | (1 << eeksEKINO)
2241 | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH) | (1 << eeksVSCALE)
2242 | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
2249 energyhistory_t* enerhist = observablesHistory->energyHistory.get();
2251 if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2253 flags_enh |= (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
2254 if (enerhist->nsum > 0)
2256 flags_enh |= ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
2258 if (enerhist->nsum_sim > 0)
2260 flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
2262 if (enerhist->deltaHForeignLambdas != nullptr)
2264 flags_enh |= ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
2265 | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
2269 PullHistory* pullHist = observablesHistory->pullHistory.get();
2270 int flagsPullHistory = 0;
2271 if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2273 flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
2274 flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
2275 | (1 << epullhPULL_NUMVALUESINFSUM));
2281 flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
2282 | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
2285 flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
2287 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
2288 || (elamstats == elamstatsMETROPOLIS))
2290 flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
2291 | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
2300 if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2302 flags_awhh |= ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
2303 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
2304 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
2305 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
2308 /* We can check many more things now (CPU, acceleration, etc), but
2309 * it is highly unlikely to have two separate builds with exactly
2310 * the same version, user, time, and build host!
2313 int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
2315 edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
2316 int nED = (edsamhist ? edsamhist->nED : 0);
2318 swaphistory_t* swaphist = observablesHistory->swapHistory.get();
2319 int eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2321 CheckpointHeaderContents headerContents = { 0,
2339 state->nhchainlength,
2349 std::strcpy(headerContents.version, gmx_version());
2350 std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2351 std::strcpy(headerContents.ftime, timebuf.c_str());
2352 if (DOMAINDECOMP(cr))
2354 copy_ivec(domdecCells, headerContents.dd_nc);
2357 do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2359 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
2360 || (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0)
2361 || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)
2362 || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
2364 || (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)
2365 || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)
2366 || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0)
2367 || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0)
2368 || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr, headerContents.file_version) < 0))
2370 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2373 // Checkpointing MdModules
2375 gmx::KeyValueTreeBuilder builder;
2376 gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
2377 headerContents.file_version };
2378 mdModulesNotifier.notifier_.notify(mdModulesWriteCheckpoint);
2379 auto tree = builder.build();
2380 gmx::FileIOXdrSerializer serializer(fp);
2381 gmx::serializeKeyValueTree(tree, &serializer);
2384 do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2386 /* we really, REALLY, want to make sure to physically write the checkpoint,
2387 and all the files it depends on, out to disk. Because we've
2388 opened the checkpoint with gmx_fio_open(), it's in our list
2390 ret = gmx_fio_all_output_fsync();
2395 sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
2397 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2403 gmx_warning("%s", buf);
2407 if (gmx_fio_close(fp) != 0)
2409 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2412 /* we don't move the checkpoint if the user specified they didn't want it,
2413 or if the fsyncs failed */
2415 if (!bNumberAndKeep && !ret)
2419 /* Rename the previous checkpoint file */
2420 std::strcpy(buf, fn);
2421 buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2422 std::strcat(buf, "_prev");
2423 std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2426 /* we copy here so that if something goes wrong between now and
2427 * the rename below, there's always a state.cpt.
2428 * If renames are atomic (such as in POSIX systems),
2429 * this copying should be unneccesary.
2431 gmx_file_copy(fn, buf, FALSE);
2432 /* We don't really care if this fails:
2433 * there's already a new checkpoint.
2438 gmx_file_rename(fn, buf);
2441 if (gmx_file_rename(fntemp, fn) != 0)
2443 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2446 #endif /* GMX_NO_RENAME */
2451 /*code for alternate checkpointing scheme. moved from top of loop over
2453 fcRequestCheckPoint();
2454 if (fcCheckPointParallel(cr->nodeid, NULL, 0) == 0)
2456 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step);
2458 #endif /* end GMX_FAHCORE block */
2461 static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
2463 bool foundMismatch = (p != f);
2471 fprintf(fplog, " %s mismatch,\n", type);
2472 fprintf(fplog, " current program: %d\n", p);
2473 fprintf(fplog, " checkpoint file: %d\n", f);
2474 fprintf(fplog, "\n");
2478 static void check_string(FILE* fplog, const char* type, const char* p, const char* f, gmx_bool* mm)
2480 bool foundMismatch = (std::strcmp(p, f) != 0);
2488 fprintf(fplog, " %s mismatch,\n", type);
2489 fprintf(fplog, " current program: %s\n", p);
2490 fprintf(fplog, " checkpoint file: %s\n", f);
2491 fprintf(fplog, "\n");
2495 static void check_match(FILE* fplog,
2496 const t_commrec* cr,
2498 const CheckpointHeaderContents& headerContents,
2499 gmx_bool reproducibilityRequested)
2501 /* Note that this check_string on the version will also print a message
2502 * when only the minor version differs. But we only print a warning
2503 * message further down with reproducibilityRequested=TRUE.
2505 gmx_bool versionDiffers = FALSE;
2506 check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2508 gmx_bool precisionDiffers = FALSE;
2509 check_int(fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2510 if (precisionDiffers)
2512 const char msg_precision_difference[] =
2513 "You are continuing a simulation with a different precision. Not matching\n"
2514 "single/double precision will lead to precision or performance loss.\n";
2517 fprintf(fplog, "%s\n", msg_precision_difference);
2521 gmx_bool mm = (versionDiffers || precisionDiffers);
2523 if (reproducibilityRequested)
2525 check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(),
2526 headerContents.fprog, &mm);
2528 check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2531 if (cr->nnodes > 1 && reproducibilityRequested)
2533 check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2535 int npp = cr->nnodes;
2536 if (cr->npmenodes >= 0)
2538 npp -= cr->npmenodes;
2540 int npp_f = headerContents.nnodes;
2541 if (headerContents.npme >= 0)
2543 npp_f -= headerContents.npme;
2547 check_int(fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2548 check_int(fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2549 check_int(fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2555 /* Gromacs should be able to continue from checkpoints between
2556 * different patch level versions, but we do not guarantee
2557 * compatibility between different major/minor versions - check this.
2561 sscanf(gmx_version(), "%5d", &gmx_major);
2562 int ret = sscanf(headerContents.version, "%5d", &cpt_major);
2563 gmx_bool majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2565 const char msg_major_version_difference[] =
2566 "The current GROMACS major version is not identical to the one that\n"
2567 "generated the checkpoint file. In principle GROMACS does not support\n"
2568 "continuation from checkpoints between different versions, so we advise\n"
2569 "against this. If you still want to try your luck we recommend that you use\n"
2570 "the -noappend flag to keep your output files from the two versions separate.\n"
2571 "This might also work around errors where the output fields in the energy\n"
2572 "file have changed between the different versions.\n";
2574 const char msg_mismatch_notice[] =
2575 "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2576 "Continuation is exact, but not guaranteed to be binary identical.\n";
2578 if (majorVersionDiffers)
2582 fprintf(fplog, "%s\n", msg_major_version_difference);
2585 else if (reproducibilityRequested)
2587 /* Major & minor versions match at least, but something is different. */
2590 fprintf(fplog, "%s\n", msg_mismatch_notice);
2596 static void read_checkpoint(const char* fn,
2598 const t_commrec* cr,
2601 int* init_fep_state,
2602 CheckpointHeaderContents* headerContents,
2604 ObservablesHistory* observablesHistory,
2605 gmx_bool reproducibilityRequested,
2606 const gmx::MdModulesNotifier& mdModulesNotifier)
2609 char buf[STEPSTRSIZE];
2612 fp = gmx_fio_open(fn, "r");
2613 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2615 // If we are appending, then we don't want write to the open log
2616 // file because we still need to compute a checksum for it. In
2617 // that case, the filehandle will be nullptr. Otherwise, we report
2618 // to the new log file about the checkpoint file that we are
2620 FILE* fplog = gmx_fio_getfp(logfio);
2623 fprintf(fplog, "\n");
2624 fprintf(fplog, "Reading checkpoint file %s\n", fn);
2625 fprintf(fplog, " file generated by: %s\n", headerContents->fprog);
2626 fprintf(fplog, " file generated at: %s\n", headerContents->ftime);
2627 fprintf(fplog, " GROMACS double prec.: %d\n", headerContents->double_prec);
2628 fprintf(fplog, " simulation part #: %d\n", headerContents->simulation_part);
2629 fprintf(fplog, " step: %s\n", gmx_step_str(headerContents->step, buf));
2630 fprintf(fplog, " time: %f\n", headerContents->t);
2631 fprintf(fplog, "\n");
2634 if (headerContents->natoms != state->natoms)
2637 "Checkpoint file is for a system of %d atoms, while the current system consists "
2639 headerContents->natoms, state->natoms);
2641 if (headerContents->ngtc != state->ngtc)
2644 "Checkpoint file is for a system of %d T-coupling groups, while the current "
2645 "system consists of %d T-coupling groups",
2646 headerContents->ngtc, state->ngtc);
2648 if (headerContents->nnhpres != state->nnhpres)
2651 "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the "
2652 "current system consists of %d NH-pressure-coupling variables",
2653 headerContents->nnhpres, state->nnhpres);
2656 int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2657 if (headerContents->nlambda != nlambdaHistory)
2660 "Checkpoint file is for a system with %d lambda states, while the current system "
2661 "consists of %d lambda states",
2662 headerContents->nlambda, nlambdaHistory);
2665 init_gtc_state(state, state->ngtc, state->nnhpres,
2666 headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2667 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2669 if (headerContents->eIntegrator != eIntegrator)
2672 "Cannot change integrator during a checkpoint restart. Perhaps you should make a "
2673 "new .tpr with grompp -f new.mdp -t %s",
2677 if (headerContents->flags_state != state->flags)
2680 "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
2681 "should make a new .tpr with grompp -f new.mdp -t %s",
2687 check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
2690 ret = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2691 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it
2692 here. Investigate for 5.0. */
2697 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2702 state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1 << eeksEKINH)) != 0)
2703 || ((headerContents->flags_eks & (1 << eeksEKINF)) != 0)
2704 || ((headerContents->flags_eks & (1 << eeksEKINO)) != 0)
2705 || (((headerContents->flags_eks & (1 << eeksEKINSCALEF))
2706 | (headerContents->flags_eks & (1 << eeksEKINSCALEH))
2707 | (headerContents->flags_eks & (1 << eeksVSCALE)))
2710 if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2712 observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2714 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents->flags_enh,
2715 observablesHistory->energyHistory.get(), nullptr);
2721 if (headerContents->flagsPullHistory)
2723 if (observablesHistory->pullHistory == nullptr)
2725 observablesHistory->pullHistory = std::make_unique<PullHistory>();
2727 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents->flagsPullHistory,
2728 observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2735 if (headerContents->file_version < 6)
2738 "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2741 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda,
2742 &state->dfhist, nullptr);
2748 if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2750 observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t{});
2752 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED,
2753 observablesHistory->edsamHistory.get(), nullptr);
2759 if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2761 state->awhHistory = std::make_shared<gmx::AwhHistory>();
2763 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2769 if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2771 observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t{});
2773 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords,
2774 observablesHistory->swapHistory.get(), nullptr);
2780 std::vector<gmx_file_position_t> outputfiles;
2781 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2786 do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
2787 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2792 if (gmx_fio_close(fp) != 0)
2794 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2799 void load_checkpoint(const char* fn,
2801 const t_commrec* cr,
2805 ObservablesHistory* observablesHistory,
2806 gmx_bool reproducibilityRequested,
2807 const gmx::MdModulesNotifier& mdModulesNotifier)
2809 CheckpointHeaderContents headerContents;
2812 /* Read the state from the checkpoint file */
2813 read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state), &headerContents,
2814 state, observablesHistory, reproducibilityRequested, mdModulesNotifier);
2818 gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2819 gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = { *cr, headerContents.file_version };
2820 mdModulesNotifier.notifier_.notify(broadcastCheckPointData);
2822 ir->bContinuation = TRUE;
2823 // TODO Should the following condition be <=? Currently if you
2824 // pass a checkpoint written by an normal completion to a restart,
2825 // mdrun will read all input, does some work but no steps, and
2826 // write successful output. But perhaps that is not desirable.
2827 if ((ir->nsteps >= 0) && (ir->nsteps < headerContents.step))
2829 // Note that we do not intend to support the use of mdrun
2830 // -nsteps to circumvent this condition.
2831 char nstepsString[STEPSTRSIZE], stepString[STEPSTRSIZE];
2832 gmx_step_str(ir->nsteps, nstepsString);
2833 gmx_step_str(headerContents.step, stepString);
2835 "The input requested %s steps, however the checkpoint "
2836 "file has already reached step %s. The simulation will not "
2837 "proceed, because either your simulation is already complete, "
2838 "or your combination of input files don't match.",
2839 nstepsString, stepString);
2841 if (ir->nsteps >= 0)
2843 ir->nsteps += ir->init_step - headerContents.step;
2845 ir->init_step = headerContents.step;
2846 ir->simulation_part = headerContents.simulation_part + 1;
2849 void read_checkpoint_part_and_step(const char* filename, int* simulation_part, int64_t* step)
2853 if (filename == nullptr || !gmx_fexist(filename) || ((fp = gmx_fio_open(filename, "r")) == nullptr))
2855 *simulation_part = 0;
2860 CheckpointHeaderContents headerContents;
2861 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2863 *simulation_part = headerContents.simulation_part;
2864 *step = headerContents.step;
2867 static CheckpointHeaderContents read_checkpoint_data(t_fileio* fp,
2869 std::vector<gmx_file_position_t>* outputfiles)
2871 CheckpointHeaderContents headerContents;
2872 do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2873 state->natoms = headerContents.natoms;
2874 state->ngtc = headerContents.ngtc;
2875 state->nnhpres = headerContents.nnhpres;
2876 state->nhchainlength = headerContents.nhchainlength;
2877 state->flags = headerContents.flags_state;
2878 int ret = do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2883 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2889 energyhistory_t enerhist;
2890 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, nullptr);
2895 PullHistory pullHist = {};
2896 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
2897 StatePart::pullHistory, nullptr);
2903 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
2904 &state->dfhist, nullptr);
2910 edsamhistory_t edsamhist = {};
2911 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2917 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2923 swaphistory_t swaphist = {};
2924 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2930 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, outputfiles, nullptr, headerContents.file_version);
2936 gmx::MdModulesNotifier mdModuleNotifier;
2937 do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
2938 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2943 return headerContents;
2946 void read_checkpoint_trxframe(t_fileio* fp, t_trxframe* fr)
2949 std::vector<gmx_file_position_t> outputfiles;
2950 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, &outputfiles);
2952 fr->natoms = state.natoms;
2954 fr->step = int64_to_int(headerContents.step, "conversion of checkpoint to trajectory");
2956 fr->time = headerContents.t;
2958 fr->lambda = state.lambda[efptFEP];
2959 fr->fep_state = state.fep_state;
2961 fr->bX = ((state.flags & (1 << estX)) != 0);
2964 fr->x = makeRvecArray(state.x, state.natoms);
2966 fr->bV = ((state.flags & (1 << estV)) != 0);
2969 fr->v = makeRvecArray(state.v, state.natoms);
2972 fr->bBox = ((state.flags & (1 << estBOX)) != 0);
2975 copy_mat(state.box, fr->box);
2979 void list_checkpoint(const char* fn, FILE* out)
2986 fp = gmx_fio_open(fn, "r");
2987 CheckpointHeaderContents headerContents;
2988 do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2989 state.natoms = headerContents.natoms;
2990 state.ngtc = headerContents.ngtc;
2991 state.nnhpres = headerContents.nnhpres;
2992 state.nhchainlength = headerContents.nhchainlength;
2993 state.flags = headerContents.flags_state;
2994 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2999 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
3005 energyhistory_t enerhist;
3006 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE, headerContents.flags_enh, &enerhist, out);
3010 PullHistory pullHist = {};
3011 ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE, headerContents.flagsPullHistory, &pullHist,
3012 StatePart::pullHistory, out);
3017 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
3018 &state.dfhist, out);
3023 edsamhistory_t edsamhist = {};
3024 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
3029 ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE, headerContents.flags_awhh, state.awhHistory.get(), out);
3034 swaphistory_t swaphist = {};
3035 ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
3040 std::vector<gmx_file_position_t> outputfiles;
3041 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
3046 ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
3053 if (gmx_fio_close(fp) != 0)
3055 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3059 /* This routine cannot print tons of data, since it is called before the log file is opened. */
3060 CheckpointHeaderContents read_checkpoint_simulation_part_and_filenames(t_fileio* fp,
3061 std::vector<gmx_file_position_t>* outputfiles)
3064 CheckpointHeaderContents headerContents = read_checkpoint_data(fp, &state, outputfiles);
3065 if (gmx_fio_close(fp) != 0)
3067 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
3069 return headerContents;