src/gromacs/fileio/checkpoint.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /* The source code in this file should be thread-safe.
  37    Please keep it that way. */
  38 #include "gmxpre.h"
  39
  40 #include "checkpoint.h"
  41
  42 #include "config.h"
  43
  44 #include <cerrno>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include <array>
  49 #include <memory>
  50
  51 #include "buildinfo.h"
  52 #include "gromacs/fileio/filetypes.h"
  53 #include "gromacs/fileio/gmxfio.h"
  54 #include "gromacs/fileio/gmxfio_xdr.h"
  55 #include "gromacs/fileio/xdr_datatype.h"
  56 #include "gromacs/fileio/xdrf.h"
  57 #include "gromacs/gmxlib/network.h"
  58 #include "gromacs/math/vec.h"
  59 #include "gromacs/math/vecdump.h"
  60 #include "gromacs/math/vectypes.h"
  61 #include "gromacs/mdtypes/awh_correlation_history.h"
  62 #include "gromacs/mdtypes/awh_history.h"
  63 #include "gromacs/mdtypes/commrec.h"
  64 #include "gromacs/mdtypes/df_history.h"
  65 #include "gromacs/mdtypes/edsamhistory.h"
  66 #include "gromacs/mdtypes/energyhistory.h"
  67 #include "gromacs/mdtypes/inputrec.h"
  68 #include "gromacs/mdtypes/md_enums.h"
  69 #include "gromacs/mdtypes/mdrunoptions.h"
  70 #include "gromacs/mdtypes/observableshistory.h"
  71 #include "gromacs/mdtypes/pullhistory.h"
  72 #include "gromacs/mdtypes/state.h"
  73 #include "gromacs/mdtypes/swaphistory.h"
  74 #include "gromacs/trajectory/trajectoryframe.h"
  75 #include "gromacs/utility/arrayref.h"
  76 #include "gromacs/utility/baseversion.h"
  77 #include "gromacs/utility/cstringutil.h"
  78 #include "gromacs/utility/fatalerror.h"
  79 #include "gromacs/utility/futil.h"
  80 #include "gromacs/utility/gmxassert.h"
  81 #include "gromacs/utility/int64_to_int.h"
  82 #include "gromacs/utility/programcontext.h"
  83 #include "gromacs/utility/smalloc.h"
  84 #include "gromacs/utility/sysinfo.h"
  85 #include "gromacs/utility/txtdump.h"
  86
  87 #if GMX_FAHCORE
  88 #include "corewrap.h"
  89 #endif
  90
  91 #define CPT_MAGIC1 171817
  92 #define CPT_MAGIC2 171819
  93
  94 /*! \brief Enum of values that describe the contents of a cpt file
  95  * whose format matches a version number
  96  *
  97  * The enum helps the code be more self-documenting and ensure merges
  98  * do not silently resolve when two patches make the same bump. When
  99  * adding new functionality, add a new element just above cptv_Count
 100  * in this enumeration, and write code below that does the right thing
 101  * according to the value of file_version.
 102  */
 103 enum cptv {
 104     cptv_Unknown = 17,                                       /**< Version before numbering scheme */
 105     cptv_RemoveBuildMachineInformation,                      /**< remove functionality that makes mdrun builds non-reproducible */
 106     cptv_ComPrevStepAsPullGroupReference,                    /**< Allow using COM of previous step as pull group PBC reference */
 107     cptv_PullAverage,                                        /**< Added possibility to output average pull force and position */
 108     cptv_Count                                               /**< the total number of cptv versions */
 109 };
 110
 111 /*! \brief Version number of the file format written to checkpoint
 112  * files by this version of the code.
 113  *
 114  * cpt_version should normally only be changed, via adding a new field
 115  * to cptv enumeration, when the header or footer format changes.
 116  *
 117  * The state data format itself is backward and forward compatible.
 118  * But old code can not read a new entry that is present in the file
 119  * (but can read a new format when new entries are not present).
 120  *
 121  * The cpt_version increases whenever the file format in the main
 122  * development branch changes, due to an extension of the cptv enum above.
 123  * Backward compatibility for reading old run input files is maintained
 124  * by checking this version number against that of the file and then using
 125  * the correct code path. */
 126 static const int cpt_version = cptv_Count - 1;
 127
 128
 129 const char *est_names[estNR] =
 130 {
 131     "FE-lambda",
 132     "box", "box-rel", "box-v", "pres_prev",
 133     "nosehoover-xi", "thermostat-integral",
 134     "x", "v", "sdx-unsupported", "CGp", "LD-rng-unsupported", "LD-rng-i-unsupported",
 135     "disre_initf", "disre_rm3tav",
 136     "orire_initf", "orire_Dtav",
 137     "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng-unsupported", "MC-rng-i-unsupported",
 138     "barostat-integral"
 139 };
 140
 141 enum {
 142     eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
 143 };
 144
 145 static const char *eeks_names[eeksNR] =
 146 {
 147     "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
 148     "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
 149 };
 150
 151 enum {
 152     eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
 153     eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
 154     eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
 155     eenhENERGY_DELTA_H_NN,
 156     eenhENERGY_DELTA_H_LIST,
 157     eenhENERGY_DELTA_H_STARTTIME,
 158     eenhENERGY_DELTA_H_STARTLAMBDA,
 159     eenhNR
 160 };
 161
 162 enum {
 163     epullhPULL_NUMCOORDINATES, epullhPULL_NUMGROUPS, epullhPULL_NUMVALUESINXSUM,
 164     epullhPULL_NUMVALUESINFSUM,  epullhNR
 165 };
 166
 167 enum {
 168     epullcoordh_VALUE_REF_SUM, epullcoordh_VALUE_SUM, epullcoordh_DR01_SUM,
 169     epullcoordh_DR23_SUM, epullcoordh_DR45_SUM, epullcoordh_FSCAL_SUM,
 170     epullcoordh_DYNAX_SUM, epullcoordh_NR
 171 };
 172
 173 enum {
 174     epullgrouph_X_SUM, epullgrouph_NR
 175 };
 176
 177 static const char *eenh_names[eenhNR] =
 178 {
 179     "energy_n", "energy_aver", "energy_sum", "energy_nsum",
 180     "energy_sum_sim", "energy_nsum_sim",
 181     "energy_nsteps", "energy_nsteps_sim",
 182     "energy_delta_h_nn",
 183     "energy_delta_h_list",
 184     "energy_delta_h_start_time",
 185     "energy_delta_h_start_lambda"
 186 };
 187
 188 static const char *ePullhNames[epullhNR] =
 189 {
 190     "pullhistory_numcoordinates", "pullhistory_numgroups", "pullhistory_numvaluesinxsum",
 191     "pullhistory_numvaluesinfsum"
 192 };
 193
 194 /* free energy history variables -- need to be preserved over checkpoint */
 195 enum {
 196     edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
 197     edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
 198 };
 199 /* free energy history variable names  */
 200 static const char *edfh_names[edfhNR] =
 201 {
 202     "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
 203     "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
 204 };
 205
 206 /* AWH biasing history variables */
 207 enum {
 208     eawhhIN_INITIAL,
 209     eawhhEQUILIBRATEHISTOGRAM,
 210     eawhhHISTSIZE,
 211     eawhhNPOINTS,
 212     eawhhCOORDPOINT, eawhhUMBRELLAGRIDPOINT,
 213     eawhhUPDATELIST,
 214     eawhhLOGSCALEDSAMPLEWEIGHT,
 215     eawhhNUMUPDATES,
 216     eawhhFORCECORRELATIONGRID,
 217     eawhhNR
 218 };
 219
 220 static const char *eawhh_names[eawhhNR] =
 221 {
 222     "awh_in_initial",
 223     "awh_equilibrateHistogram",
 224     "awh_histsize",
 225     "awh_npoints",
 226     "awh_coordpoint", "awh_umbrellaGridpoint",
 227     "awh_updatelist",
 228     "awh_logScaledSampleWeight",
 229     "awh_numupdates",
 230     "awh_forceCorrelationGrid"
 231 };
 232
 233 enum {
 234     epullsPREVSTEPCOM,
 235     epullsNR
 236 };
 237
 238 static const char *epull_prev_step_com_names[epullsNR] =
 239 {
 240     "Pull groups prev step COM"
 241 };
 242
 243
 244 //! Higher level vector element type, only used for formatting checkpoint dumps
 245 enum class CptElementType
 246 {
 247     integer,   //!< integer
 248     real,      //!< float or double, not linked to precision of type real
 249     real3,     //!< float[3] or double[3], not linked to precision of type real
 250     matrix3x3  //!< float[3][3] or double[3][3], not linked to precision of type real
 251 };
 252
 253 //! \brief Parts of the checkpoint state, only used for reporting
 254 enum class StatePart
 255 {
 256     microState,         //!< The microstate of the simulated system
 257     kineticEnergy,      //!< Kinetic energy, needed for T/P-coupling state
 258     energyHistory,      //!< Energy observable statistics
 259     freeEnergyHistory,  //!< Free-energy state and observable statistics
 260     accWeightHistogram, //!< Accelerated weight histogram method state
 261     pullState,          //!< COM of previous step.
 262     pullHistory         //!< Pull history statistics (sums since last written output)
 263 };
 264
 265 //! \brief Return the name of a checkpoint entry based on part and part entry
 266 static const char *entryName(StatePart part, int ecpt)
 267 {
 268     switch (part)
 269     {
 270         case StatePart::microState:         return est_names [ecpt];
 271         case StatePart::kineticEnergy:      return eeks_names[ecpt];
 272         case StatePart::energyHistory:      return eenh_names[ecpt];
 273         case StatePart::freeEnergyHistory:  return edfh_names[ecpt];
 274         case StatePart::accWeightHistogram: return eawhh_names[ecpt];
 275         case StatePart::pullState:          return epull_prev_step_com_names[ecpt];
 276         case StatePart::pullHistory:        return ePullhNames[ecpt];
 277     }
 278
 279     return nullptr;
 280 }
 281
 282 static void cp_warning(FILE *fp)
 283 {
 284     fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
 285 }
 286
 287 [[ noreturn ]] static void cp_error()
 288 {
 289     gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
 290 }
 291
 292 static void do_cpt_string_err(XDR *xd, const char *desc, gmx::ArrayRef<char> s, FILE *list)
 293 {
 294     char *data = s.data();
 295     if (xdr_string(xd, &data, s.size()) == 0)
 296     {
 297         cp_error();
 298     }
 299     if (list)
 300     {
 301         fprintf(list, "%s = %s\n", desc, data);
 302     }
 303 }
 304
 305 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
 306 {
 307     if (xdr_int(xd, i) == 0)
 308     {
 309         return -1;
 310     }
 311     if (list)
 312     {
 313         fprintf(list, "%s = %d\n", desc, *i);
 314     }
 315     return 0;
 316 }
 317
 318 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
 319 {
 320     if (list)
 321     {
 322         fprintf(list, "%s = ", desc);
 323     }
 324     bool_t res = 1;
 325     for (int j = 0; j < n && res; j++)
 326     {
 327         res &= xdr_u_char(xd, &i[j]);
 328         if (list)
 329         {
 330             fprintf(list, "%02x", i[j]);
 331         }
 332     }
 333     if (list)
 334     {
 335         fprintf(list, "\n");
 336     }
 337     if (res == 0)
 338     {
 339         return -1;
 340     }
 341
 342     return 0;
 343 }
 344
 345 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
 346 {
 347     if (do_cpt_int(xd, desc, i, list) < 0)
 348     {
 349         cp_error();
 350     }
 351 }
 352
 353 static void do_cpt_bool_err(XDR *xd, const char *desc, bool *b, FILE *list)
 354 {
 355     int i   = static_cast<int>(*b);
 356
 357     if (do_cpt_int(xd, desc, &i, list) < 0)
 358     {
 359         cp_error();
 360     }
 361
 362     *b = (i != 0);
 363 }
 364
 365 static void do_cpt_step_err(XDR *xd, const char *desc, int64_t *i, FILE *list)
 366 {
 367     char   buf[STEPSTRSIZE];
 368
 369     if (xdr_int64(xd, i) == 0)
 370     {
 371         cp_error();
 372     }
 373     if (list)
 374     {
 375         fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
 376     }
 377 }
 378
 379 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
 380 {
 381     if (xdr_double(xd, f) == 0)
 382     {
 383         cp_error();
 384     }
 385     if (list)
 386     {
 387         fprintf(list, "%s = %f\n", desc, *f);
 388     }
 389 }
 390
 391 static void do_cpt_real_err(XDR *xd, real *f)
 392 {
 393 #if GMX_DOUBLE
 394     bool_t res = xdr_double(xd, f);
 395 #else
 396     bool_t res = xdr_float(xd, f);
 397 #endif
 398     if (res == 0)
 399     {
 400         cp_error();
 401     }
 402 }
 403
 404 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
 405 {
 406     for (int i = 0; i < n; i++)
 407     {
 408         for (int j = 0; j < DIM; j++)
 409         {
 410             do_cpt_real_err(xd, &f[i][j]);
 411         }
 412     }
 413
 414     if (list)
 415     {
 416         pr_rvecs(list, 0, desc, f, n);
 417     }
 418 }
 419
 420 template <typename T>
 421 struct xdr_type
 422 {
 423 };
 424
 425 template <>
 426 struct xdr_type<int>
 427 {
 428     static const int value = xdr_datatype_int;
 429 };
 430
 431 template <>
 432 struct xdr_type<float>
 433 {
 434     static const int value = xdr_datatype_float;
 435 };
 436
 437 template <>
 438 struct xdr_type<double>
 439 {
 440     static const int value = xdr_datatype_double;
 441 };
 442
 443 //! \brief Returns size in byte of an xdr_datatype
 444 static inline unsigned int sizeOfXdrType(int xdrType)
 445 {
 446     switch (xdrType)
 447     {
 448         case xdr_datatype_int:
 449             return sizeof(int);
 450         case xdr_datatype_float:
 451             return sizeof(float);
 452         case xdr_datatype_double:
 453             return sizeof(double);
 454         default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
 455     }
 456
 457     return 0;
 458 }
 459
 460 //! \brief Returns the XDR process function for i/o of an XDR type
 461 static inline xdrproc_t xdrProc(int xdrType)
 462 {
 463     switch (xdrType)
 464     {
 465         case xdr_datatype_int:
 466             return reinterpret_cast<xdrproc_t>(xdr_int);
 467         case xdr_datatype_float:
 468             return reinterpret_cast<xdrproc_t>(xdr_float);
 469         case xdr_datatype_double:
 470             return reinterpret_cast<xdrproc_t>(xdr_double);
 471         default: GMX_RELEASE_ASSERT(false, "XDR data type not implemented");
 472     }
 473
 474     return nullptr;
 475 }
 476
 477 /*! \brief Lists or only reads an xdr vector from checkpoint file
 478  *
 479  * When list!=NULL reads and lists the \p nf vector elements of type \p xdrType.
 480  * The header for the print is set by \p part and \p ecpt.
 481  * The formatting of the printing is set by \p cptElementType.
 482  * When list==NULL only reads the elements.
 483  */
 484 static bool_t listXdrVector(XDR *xd, StatePart part, int ecpt, int nf, int xdrType,
 485                             FILE *list, CptElementType cptElementType)
 486 {
 487     bool_t             res = 0;
 488
 489     const unsigned int elemSize = sizeOfXdrType(xdrType);
 490     std::vector<char>  data(nf*elemSize);
 491     res = xdr_vector(xd, data.data(), nf, elemSize, xdrProc(xdrType));
 492
 493     if (list != nullptr)
 494     {
 495         switch (xdrType)
 496         {
 497             case xdr_datatype_int:
 498                 pr_ivec(list, 0, entryName(part, ecpt), reinterpret_cast<const int *>(data.data()), nf, TRUE);
 499                 break;
 500             case xdr_datatype_float:
 501 #if !GMX_DOUBLE
 502                 if (cptElementType == CptElementType::real3)
 503                 {
 504                     pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
 505                 }
 506                 else
 507 #endif
 508                 {
 509                     /* Note: With double precision code dumping a single precision rvec will produce float iso rvec print, but that's a minor annoyance */
 510                     pr_fvec(list, 0, entryName(part, ecpt), reinterpret_cast<const float *>(data.data()), nf, TRUE);
 511                 }
 512                 break;
 513             case xdr_datatype_double:
 514 #if GMX_DOUBLE
 515                 if (cptElementType == CptElementType::real3)
 516                 {
 517                     pr_rvecs(list, 0, entryName(part, ecpt), reinterpret_cast<const rvec *>(data.data()), nf/3);
 518                 }
 519                 else
 520 #endif
 521                 {
 522                     /* Note: With single precision code dumping a double precision rvec will produce float iso rvec print, but that's a minor annoyance */
 523                     pr_dvec(list, 0, entryName(part, ecpt), reinterpret_cast<const double *>(data.data()), nf, TRUE);
 524                 }
 525                 break;
 526             default: GMX_RELEASE_ASSERT(false, "Data type not implemented for listing");
 527         }
 528     }
 529
 530     return res;
 531 }
 532
 533 //! \brief Convert a double array, typed char*, to float
 534 gmx_unused static void convertArrayRealPrecision(const char *c, float *v, int n)
 535 {
 536     const double *d = reinterpret_cast<const double *>(c);
 537     for (int i = 0; i < n; i++)
 538     {
 539         v[i] = static_cast<float>(d[i]);
 540     }
 541 }
 542
 543 //! \brief Convert a float array, typed char*, to double
 544 static void convertArrayRealPrecision(const char *c, double *v, int n)
 545 {
 546     const float *f = reinterpret_cast<const float *>(c);
 547     for (int i = 0; i < n; i++)
 548     {
 549         v[i] = static_cast<double>(f[i]);
 550     }
 551 }
 552
 553 //! \brief Generate an error for trying to convert to integer
 554 static void convertArrayRealPrecision(const char gmx_unused *c, int gmx_unused *v, int gmx_unused n)
 555 {
 556     GMX_RELEASE_ASSERT(false, "We only expect type mismatches between float and double, not integer");
 557 }
 558
 559 /*! \brief Low-level routine for reading/writing a vector of reals from/to file.
 560  *
 561  * This is the only routine that does the actually i/o of real vector,
 562  * all other routines are intermediate level routines for specific real
 563  * data types, calling this routine.
 564  * Currently this routine is (too) complex, since it handles both real *
 565  * and std::vector<real>. Using real * is deprecated and this routine
 566  * will simplify a lot when only std::vector needs to be supported.
 567  *
 568  * The routine is generic to vectors with different allocators,
 569  * because as part of reading a checkpoint there are vectors whose
 570  * size is not known until reading has progressed far enough, so a
 571  * resize method must be called.
 572  *
 573  * When not listing, we use either v or vector, depending on which is !=NULL.
 574  * If nval >= 0, nval is used; on read this should match the passed value.
 575  * If nval n<0, *nptr (with v) or vector->size() is used. On read using v,
 576  * the value is stored in nptr
 577  */
 578 template<typename T, typename AllocatorType>
 579 static int doVectorLow(XDR *xd, StatePart part, int ecpt, int sflags,
 580                        int nval, int *nptr,
 581                        T **v, std::vector<T, AllocatorType> *vector,
 582                        FILE *list, CptElementType cptElementType)
 583 {
 584     GMX_RELEASE_ASSERT(list != nullptr || (v != nullptr && vector == nullptr) || (v == nullptr && vector != nullptr), "Without list, we should have exactly one of v and vector != NULL");
 585
 586     bool_t res = 0;
 587
 588     int    numElemInTheFile;
 589     if (list == nullptr)
 590     {
 591         if (nval >= 0)
 592         {
 593             GMX_RELEASE_ASSERT(nptr == nullptr, "With nval>=0 we should have nptr==NULL");
 594             numElemInTheFile = nval;
 595         }
 596         else
 597         {
 598             if (v != nullptr)
 599             {
 600                 GMX_RELEASE_ASSERT(nptr != nullptr, "With nval<0 we should have nptr!=NULL");
 601                 numElemInTheFile = *nptr;
 602             }
 603             else
 604             {
 605                 numElemInTheFile = vector->size();
 606             }
 607         }
 608     }
 609     /* Read/write the vector element count */
 610     res = xdr_int(xd, &numElemInTheFile);
 611     if (res == 0)
 612     {
 613         return -1;
 614     }
 615     /* Read/write the element data type */
 616     constexpr int xdrTypeInTheCode = xdr_type<T>::value;
 617     int           xdrTypeInTheFile = xdrTypeInTheCode;
 618     res = xdr_int(xd, &xdrTypeInTheFile);
 619     if (res == 0)
 620     {
 621         return -1;
 622     }
 623
 624     if (list == nullptr && (sflags & (1 << ecpt)))
 625     {
 626         if (nval >= 0)
 627         {
 628             if (numElemInTheFile != nval)
 629             {
 630                 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), nval, numElemInTheFile);
 631             }
 632         }
 633         else if (nptr != nullptr)
 634         {
 635             *nptr = numElemInTheFile;
 636         }
 637
 638         bool typesMatch = (xdrTypeInTheFile == xdrTypeInTheCode);
 639         if (!typesMatch)
 640         {
 641             char buf[STRLEN];
 642             sprintf(buf, "mismatch for state entry %s, code precision is %s, file precision is %s",
 643                     entryName(part, ecpt),
 644                     xdr_datatype_names[xdrTypeInTheCode],
 645                     xdr_datatype_names[xdrTypeInTheFile]);
 646
 647             /* Matching int and real should never occur, but check anyhow */
 648             if (xdrTypeInTheFile == xdr_datatype_int ||
 649                 xdrTypeInTheCode == xdr_datatype_int)
 650             {
 651                 gmx_fatal(FARGS, "Type %s: incompatible checkpoint formats or corrupted checkpoint file.", buf);
 652             }
 653         }
 654
 655         T *vp;
 656         if (v != nullptr)
 657         {
 658             if (*v == nullptr)
 659             {
 660                 snew(*v, numElemInTheFile);
 661             }
 662             vp = *v;
 663         }
 664         else
 665         {
 666             /* This conditional ensures that we don't resize on write.
 667              * In particular in the state where this code was written
 668              * vector has a size of numElemInThefile and we
 669              * don't want to lose that padding here.
 670              */
 671             if (vector->size() < static_cast<unsigned int>(numElemInTheFile))
 672             {
 673                 vector->resize(numElemInTheFile);
 674             }
 675             vp = vector->data();
 676         }
 677
 678         char *vChar;
 679         if (typesMatch)
 680         {
 681             vChar = reinterpret_cast<char *>(vp);
 682         }
 683         else
 684         {
 685             snew(vChar, numElemInTheFile*sizeOfXdrType(xdrTypeInTheFile));
 686         }
 687         res = xdr_vector(xd, vChar,
 688                          numElemInTheFile, sizeOfXdrType(xdrTypeInTheFile),
 689                          xdrProc(xdrTypeInTheFile));
 690         if (res == 0)
 691         {
 692             return -1;
 693         }
 694
 695         if (!typesMatch)
 696         {
 697             /* In the old code float-double conversion came for free.
 698              * In the new code we still support it, mainly because
 699              * the tip4p_continue regression test makes use of this.
 700              * It's an open question if we do or don't want to allow this.
 701              */
 702             convertArrayRealPrecision(vChar, vp, numElemInTheFile);
 703             sfree(vChar);
 704         }
 705     }
 706     else
 707     {
 708         res = listXdrVector(xd, part, ecpt, numElemInTheFile, xdrTypeInTheFile,
 709                             list, cptElementType);
 710     }
 711
 712     return 0;
 713 }
 714
 715 //! \brief Read/Write a std::vector, on read checks the number of elements matches \p numElements, if specified.
 716 template <typename T>
 717 static int doVector(XDR *xd, StatePart part, int ecpt, int sflags,
 718                     std::vector<T> *vector, FILE *list, int numElements = -1)
 719 {
 720     return doVectorLow<T>(xd, part, ecpt, sflags, numElements, nullptr, nullptr, vector, list, CptElementType::real);
 721 }
 722
 723 //! \brief Read/Write an ArrayRef<real>.
 724 static int doRealArrayRef(XDR *xd, StatePart part, int ecpt, int sflags,
 725                           gmx::ArrayRef<real> vector, FILE *list)
 726 {
 727     real *v_real = vector.data();
 728     return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, vector.size(), nullptr, &v_real, nullptr, list, CptElementType::real);
 729 }
 730
 731 //! Convert from view of RVec to view of real.
 732 static gmx::ArrayRef<real>
 733 realArrayRefFromRVecArrayRef(gmx::ArrayRef<gmx::RVec> ofRVecs)
 734 {
 735     return gmx::arrayRefFromArray<real>(reinterpret_cast<real *>(ofRVecs.data()), ofRVecs.size() * DIM);
 736 }
 737
 738 //! \brief Read/Write a PaddedVector whose value_type is RVec.
 739 template <typename PaddedVectorOfRVecType>
 740 static int doRvecVector(XDR *xd, StatePart part, int ecpt, int sflags,
 741                         PaddedVectorOfRVecType *v, int numAtoms, FILE *list)
 742 {
 743     const int numReals = numAtoms*DIM;
 744
 745     if (list == nullptr)
 746     {
 747         GMX_RELEASE_ASSERT(sflags & (1 << ecpt), "When not listing, the flag for the entry should be set when requesting i/o");
 748         GMX_RELEASE_ASSERT(v->size() == numAtoms, "v should have sufficient size for numAtoms");
 749
 750         return doRealArrayRef(xd, part, ecpt, sflags, realArrayRefFromRVecArrayRef(makeArrayRef(*v)), list);
 751     }
 752     else
 753     {
 754         // Use the rebind facility to change the value_type of the
 755         // allocator from RVec to real.
 756         using realAllocator = typename std::allocator_traits<typename PaddedVectorOfRVecType::allocator_type>::template rebind_alloc<real>;
 757         return doVectorLow<real, realAllocator>(xd, part, ecpt, sflags, numReals, nullptr, nullptr, nullptr, list, CptElementType::real);
 758     }
 759 }
 760
 761 /* This function stores n along with the reals for reading,
 762  * but on reading it assumes that n matches the value in the checkpoint file,
 763  * a fatal error is generated when this is not the case.
 764  */
 765 static int do_cpte_reals(XDR *xd, StatePart part, int ecpt, int sflags,
 766                          int n, real **v, FILE *list)
 767 {
 768     return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
 769 }
 770
 771 /* This function does the same as do_cpte_reals,
 772  * except that on reading it ignores the passed value of *n
 773  * and stores the value read from the checkpoint file in *n.
 774  */
 775 static int do_cpte_n_reals(XDR *xd, StatePart part, int ecpt, int sflags,
 776                            int *n, real **v, FILE *list)
 777 {
 778     return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, -1, n, v, nullptr, list, CptElementType::real);
 779 }
 780
 781 static int do_cpte_real(XDR *xd, StatePart part, int ecpt, int sflags,
 782                         real *r, FILE *list)
 783 {
 784     return doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, 1, nullptr, &r, nullptr, list, CptElementType::real);
 785 }
 786
 787 static int do_cpte_ints(XDR *xd, StatePart part, int ecpt, int sflags,
 788                         int n, int **v, FILE *list)
 789 {
 790     return doVectorLow < int, std::allocator < int>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::integer);
 791 }
 792
 793 static int do_cpte_int(XDR *xd, StatePart part, int ecpt, int sflags,
 794                        int *i, FILE *list)
 795 {
 796     return do_cpte_ints(xd, part, ecpt, sflags, 1, &i, list);
 797 }
 798
 799 static int do_cpte_bool(XDR *xd, StatePart part, int ecpt, int sflags,
 800                         bool *b, FILE *list)
 801 {
 802     int i   = static_cast<int>(*b);
 803     int ret = do_cpte_int(xd, part, ecpt, sflags, &i, list);
 804     *b = (i != 0);
 805     return ret;
 806 }
 807
 808 static int do_cpte_doubles(XDR *xd, StatePart part, int ecpt, int sflags,
 809                            int n, double **v, FILE *list)
 810 {
 811     return doVectorLow < double, std::allocator < double>>(xd, part, ecpt, sflags, n, nullptr, v, nullptr, list, CptElementType::real);
 812 }
 813
 814 static int do_cpte_double(XDR *xd, StatePart part, int ecpt, int sflags,
 815                           double *r, FILE *list)
 816 {
 817     return do_cpte_doubles(xd, part, ecpt, sflags, 1, &r, list);
 818 }
 819
 820 static int do_cpte_matrix(XDR *xd, StatePart part, int ecpt, int sflags,
 821                           matrix v, FILE *list)
 822 {
 823     real *vr;
 824     int   ret;
 825
 826     vr  = &(v[0][0]);
 827     ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
 828                                                       DIM*DIM, nullptr, &vr, nullptr, nullptr, CptElementType::matrix3x3);
 829
 830     if (list && ret == 0)
 831     {
 832         pr_rvecs(list, 0, entryName(part, ecpt), v, DIM);
 833     }
 834
 835     return ret;
 836 }
 837
 838
 839 static int do_cpte_nmatrix(XDR *xd, StatePart part, int ecpt, int sflags,
 840                            int n, real **v, FILE *list)
 841 {
 842     int   i;
 843     int   ret, reti;
 844     char  name[CPTSTRLEN];
 845
 846     ret = 0;
 847     if (v == nullptr)
 848     {
 849         snew(v, n);
 850     }
 851     for (i = 0; i < n; i++)
 852     {
 853         reti = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags, n, nullptr, &(v[i]), nullptr, nullptr, CptElementType::matrix3x3);
 854         if (list && reti == 0)
 855         {
 856             sprintf(name, "%s[%d]", entryName(part, ecpt), i);
 857             pr_reals(list, 0, name, v[i], n);
 858         }
 859         if (reti != 0)
 860         {
 861             ret = reti;
 862         }
 863     }
 864     return ret;
 865 }
 866
 867 static int do_cpte_matrices(XDR *xd, StatePart part, int ecpt, int sflags,
 868                             int n, matrix **v, FILE *list)
 869 {
 870     bool_t  res = 0;
 871     matrix *vp, *va = nullptr;
 872     real   *vr;
 873     int     nf, i, j, k;
 874     int     ret;
 875
 876     nf  = n;
 877     res = xdr_int(xd, &nf);
 878     if (res == 0)
 879     {
 880         return -1;
 881     }
 882     if (list == nullptr && nf != n)
 883     {
 884         gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", entryName(part, ecpt), n, nf);
 885     }
 886     if (list || !(sflags & (1<<ecpt)))
 887     {
 888         snew(va, nf);
 889         vp = va;
 890     }
 891     else
 892     {
 893         if (*v == nullptr)
 894         {
 895             snew(*v, nf);
 896         }
 897         vp = *v;
 898     }
 899     snew(vr, nf*DIM*DIM);
 900     for (i = 0; i < nf; i++)
 901     {
 902         for (j = 0; j < DIM; j++)
 903         {
 904             for (k = 0; k < DIM; k++)
 905             {
 906                 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
 907             }
 908         }
 909     }
 910     ret = doVectorLow < real, std::allocator < real>>(xd, part, ecpt, sflags,
 911                                                       nf*DIM*DIM, nullptr, &vr, nullptr, nullptr,
 912                                                       CptElementType::matrix3x3);
 913     for (i = 0; i < nf; i++)
 914     {
 915         for (j = 0; j < DIM; j++)
 916         {
 917             for (k = 0; k < DIM; k++)
 918             {
 919                 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
 920             }
 921         }
 922     }
 923     sfree(vr);
 924
 925     if (list && ret == 0)
 926     {
 927         for (i = 0; i < nf; i++)
 928         {
 929             pr_rvecs(list, 0, entryName(part, ecpt), vp[i], DIM);
 930         }
 931     }
 932     if (va)
 933     {
 934         sfree(va);
 935     }
 936
 937     return ret;
 938 }
 939
 940 static void do_cpt_header(XDR *xd, gmx_bool bRead, FILE *list,
 941                           CheckpointHeaderContents *contents)
 942 {
 943     bool_t res = 0;
 944     int    magic;
 945
 946     if (bRead)
 947     {
 948         magic = -1;
 949     }
 950     else
 951     {
 952         magic = CPT_MAGIC1;
 953     }
 954     res = xdr_int(xd, &magic);
 955     if (res == 0)
 956     {
 957         gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
 958     }
 959     if (magic != CPT_MAGIC1)
 960     {
 961         gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
 962                   "The checkpoint file is corrupted or not a checkpoint file",
 963                   magic, CPT_MAGIC1);
 964     }
 965     char fhost[255];
 966     if (!bRead)
 967     {
 968         gmx_gethostname(fhost, 255);
 969     }
 970     do_cpt_string_err(xd, "GROMACS version", contents->version, list);
 971     // The following fields are no longer ever written with meaningful
 972     // content, but because they precede the file version, there is no
 973     // good way for new code to read the old and new formats, nor a
 974     // good way for old code to avoid giving an error while reading a
 975     // new format. So we read and write a field that no longer has a
 976     // purpose.
 977     do_cpt_string_err(xd, "GROMACS build time UNUSED", contents->btime_UNUSED, list);
 978     do_cpt_string_err(xd, "GROMACS build user UNUSED", contents->buser_UNUSED, list);
 979     do_cpt_string_err(xd, "GROMACS build host UNUSED", contents->bhost_UNUSED, list);
 980     do_cpt_string_err(xd, "generating program", contents->fprog, list);
 981     do_cpt_string_err(xd, "generation time", contents->ftime, list);
 982     contents->file_version = cpt_version;
 983     do_cpt_int_err(xd, "checkpoint file version", &contents->file_version, list);
 984     if (contents->file_version > cpt_version)
 985     {
 986         gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", contents->file_version, cpt_version);
 987     }
 988     if (contents->file_version >= 13)
 989     {
 990         do_cpt_int_err(xd, "GROMACS double precision", &contents->double_prec, list);
 991     }
 992     else
 993     {
 994         contents->double_prec = -1;
 995     }
 996     if (contents->file_version >= 12)
 997     {
 998         do_cpt_string_err(xd, "generating host", fhost, list);
 999     }
1000     do_cpt_int_err(xd, "#atoms", &contents->natoms, list);
1001     do_cpt_int_err(xd, "#T-coupling groups", &contents->ngtc, list);
1002     if (contents->file_version >= 10)
1003     {
1004         do_cpt_int_err(xd, "#Nose-Hoover T-chains", &contents->nhchainlength, list);
1005     }
1006     else
1007     {
1008         contents->nhchainlength = 1;
1009     }
1010     if (contents->file_version >= 11)
1011     {
1012         do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", &contents->nnhpres, list);
1013     }
1014     else
1015     {
1016         contents->nnhpres = 0;
1017     }
1018     if (contents->file_version >= 14)
1019     {
1020         do_cpt_int_err(xd, "# of total lambda states ", &contents->nlambda, list);
1021     }
1022     else
1023     {
1024         contents->nlambda = 0;
1025     }
1026     do_cpt_int_err(xd, "integrator", &contents->eIntegrator, list);
1027     if (contents->file_version >= 3)
1028     {
1029         do_cpt_int_err(xd, "simulation part #", &contents->simulation_part, list);
1030     }
1031     else
1032     {
1033         contents->simulation_part = 1;
1034     }
1035     if (contents->file_version >= 5)
1036     {
1037         do_cpt_step_err(xd, "step", &contents->step, list);
1038     }
1039     else
1040     {
1041         int idum = 0;
1042         do_cpt_int_err(xd, "step", &idum, list);
1043         contents->step = static_cast<int64_t>(idum);
1044     }
1045     do_cpt_double_err(xd, "t", &contents->t, list);
1046     do_cpt_int_err(xd, "#PP-ranks", &contents->nnodes, list);
1047     do_cpt_int_err(xd, "dd_nc[x]", &contents->dd_nc[XX], list);
1048     do_cpt_int_err(xd, "dd_nc[y]", &contents->dd_nc[YY], list);
1049     do_cpt_int_err(xd, "dd_nc[z]", &contents->dd_nc[ZZ], list);
1050     do_cpt_int_err(xd, "#PME-only ranks", &contents->npme, list);
1051     do_cpt_int_err(xd, "state flags", &contents->flags_state, list);
1052     if (contents->file_version >= 4)
1053     {
1054         do_cpt_int_err(xd, "ekin data flags", &contents->flags_eks, list);
1055         do_cpt_int_err(xd, "energy history flags", &contents->flags_enh, list);
1056     }
1057     else
1058     {
1059         contents->flags_eks   = 0;
1060         contents->flags_enh   = (contents->flags_state >> (estORIRE_DTAV+1));
1061         contents->flags_state = (contents->flags_state & ~((1<<(estORIRE_DTAV+1)) |
1062                                                            (1<<(estORIRE_DTAV+2)) |
1063                                                            (1<<(estORIRE_DTAV+3))));
1064     }
1065     if (contents->file_version >= 14)
1066     {
1067         do_cpt_int_err(xd, "df history flags", &contents->flags_dfh, list);
1068     }
1069     else
1070     {
1071         contents->flags_dfh = 0;
1072     }
1073
1074     if (contents->file_version >= 15)
1075     {
1076         do_cpt_int_err(xd, "ED data sets", &contents->nED, list);
1077     }
1078     else
1079     {
1080         contents->nED = 0;
1081     }
1082
1083     if (contents->file_version >= 16)
1084     {
1085         do_cpt_int_err(xd, "swap", &contents->eSwapCoords, list);
1086     }
1087     else
1088     {
1089         contents->eSwapCoords = eswapNO;
1090     }
1091
1092     if (contents->file_version >= 17)
1093     {
1094         do_cpt_int_err(xd, "AWH history flags", &contents->flags_awhh, list);
1095     }
1096     else
1097     {
1098         contents->flags_awhh = 0;
1099     }
1100
1101     if (contents->file_version >= 18)
1102     {
1103         do_cpt_int_err(xd, "pull history flags", &contents->flagsPullHistory, list);
1104     }
1105     else
1106     {
1107         contents->flagsPullHistory = 0;
1108     }
1109 }
1110
1111 static int do_cpt_footer(XDR *xd, int file_version)
1112 {
1113     bool_t res = 0;
1114     int    magic;
1115
1116     if (file_version >= 2)
1117     {
1118         magic = CPT_MAGIC2;
1119         res   = xdr_int(xd, &magic);
1120         if (res == 0)
1121         {
1122             cp_error();
1123         }
1124         if (magic != CPT_MAGIC2)
1125         {
1126             return -1;
1127         }
1128     }
1129
1130     return 0;
1131 }
1132
1133 static int do_cpt_state(XDR *xd,
1134                         int fflags, t_state *state,
1135                         FILE *list)
1136 {
1137     int             ret    = 0;
1138     const StatePart part   = StatePart::microState;
1139     const int       sflags = state->flags;
1140     // If reading, state->natoms was probably just read, so
1141     // allocations need to be managed. If writing, this won't change
1142     // anything that matters.
1143     state_change_natoms(state, state->natoms);
1144     for (int i = 0; (i < estNR && ret == 0); i++)
1145     {
1146         if (fflags & (1<<i))
1147         {
1148             switch (i)
1149             {
1150                 case estLAMBDA:  ret      = doRealArrayRef(xd, part, i, sflags, gmx::arrayRefFromArray<real>(state->lambda.data(), state->lambda.size()), list); break;
1151                 case estFEPSTATE: ret     = do_cpte_int (xd, part, i, sflags, &state->fep_state, list); break;
1152                 case estBOX:     ret      = do_cpte_matrix(xd, part, i, sflags, state->box, list); break;
1153                 case estBOX_REL: ret      = do_cpte_matrix(xd, part, i, sflags, state->box_rel, list); break;
1154                 case estBOXV:    ret      = do_cpte_matrix(xd, part, i, sflags, state->boxv, list); break;
1155                 case estPRES_PREV: ret    = do_cpte_matrix(xd, part, i, sflags, state->pres_prev, list); break;
1156                 case estSVIR_PREV:  ret   = do_cpte_matrix(xd, part, i, sflags, state->svir_prev, list); break;
1157                 case estFVIR_PREV:  ret   = do_cpte_matrix(xd, part, i, sflags, state->fvir_prev, list); break;
1158                 case estNH_XI:   ret      = doVector<double>(xd, part, i, sflags, &state->nosehoover_xi, list); break;
1159                 case estNH_VXI:  ret      = doVector<double>(xd, part, i, sflags, &state->nosehoover_vxi, list); break;
1160                 case estNHPRES_XI:   ret  = doVector<double>(xd, part, i, sflags, &state->nhpres_xi, list); break;
1161                 case estNHPRES_VXI:  ret  = doVector<double>(xd, part, i, sflags, &state->nhpres_vxi, list); break;
1162                 case estTHERM_INT:   ret  = doVector<double>(xd, part, i, sflags, &state->therm_integral, list); break;
1163                 case estBAROS_INT:   ret  = do_cpte_double(xd, part, i, sflags, &state->baros_integral, list); break;
1164                 case estVETA:    ret      = do_cpte_real(xd, part, i, sflags, &state->veta, list); break;
1165                 case estVOL0:    ret      = do_cpte_real(xd, part, i, sflags, &state->vol0, list); break;
1166                 case estX:       ret      = doRvecVector(xd, part, i, sflags, &state->x, state->natoms, list); break;
1167                 case estV:       ret      = doRvecVector(xd, part, i, sflags, &state->v, state->natoms, list); break;
1168                 /* The RNG entries are no longer written,
1169                  * the next 4 lines are only for reading old files.
1170                  */
1171                 case estLD_RNG_NOTSUPPORTED:  ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1172                 case estLD_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1173                 case estMC_RNG_NOTSUPPORTED:  ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1174                 case estMC_RNGI_NOTSUPPORTED: ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list); break;
1175                 case estDISRE_INITF:  ret         = do_cpte_real (xd, part, i, sflags, &state->hist.disre_initf, list); break;
1176                 case estDISRE_RM3TAV: ret         = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1177                 case estORIRE_INITF:  ret         = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
1178                 case estORIRE_DTAV:   ret         = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1179                 case estPULLCOMPREVSTEP:      ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list); break;
1180                 default:
1181                     gmx_fatal(FARGS, "Unknown state entry %d\n"
1182                               "You are reading a checkpoint file written by different code, which is not supported", i);
1183             }
1184         }
1185     }
1186     return ret;
1187 }
1188
1189 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1190                             FILE *list)
1191 {
1192     int             ret  = 0;
1193
1194     const StatePart part = StatePart::kineticEnergy;
1195     for (int i = 0; (i < eeksNR && ret == 0); i++)
1196     {
1197         if (fflags & (1<<i))
1198         {
1199             switch (i)
1200             {
1201
1202                 case eeksEKIN_N:     ret = do_cpte_int(xd, part, i, fflags, &ekins->ekin_n, list); break;
1203                 case eeksEKINH:     ret  = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1204                 case eeksEKINF:      ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1205                 case eeksEKINO:      ret = do_cpte_matrices(xd, part, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1206                 case eeksEKINTOTAL:  ret = do_cpte_matrix(xd, part, i, fflags, ekins->ekin_total, list); break;
1207                 case eeksEKINSCALEF: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscalef_nhc, list); break;
1208                 case eeksVSCALE:     ret = doVector<double>(xd, part, i, fflags, &ekins->vscale_nhc, list); break;
1209                 case eeksEKINSCALEH: ret = doVector<double>(xd, part, i, fflags, &ekins->ekinscaleh_nhc, list); break;
1210                 case eeksDEKINDL:   ret  = do_cpte_real(xd, part, i, fflags, &ekins->dekindl, list); break;
1211                 case eeksMVCOS:      ret = do_cpte_real(xd, part, i, fflags, &ekins->mvcos, list); break;
1212                 default:
1213                     gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1214                               "You are probably reading a new checkpoint file with old code", i);
1215             }
1216         }
1217     }
1218
1219     return ret;
1220 }
1221
1222
1223 static int do_cpt_swapstate(XDR *xd, gmx_bool bRead,
1224                             int eSwapCoords, swaphistory_t *swapstate, FILE *list)
1225 {
1226     int swap_cpt_version = 2;
1227
1228     if (eSwapCoords == eswapNO)
1229     {
1230         return 0;
1231     }
1232
1233     swapstate->bFromCpt    = bRead;
1234     swapstate->eSwapCoords = eSwapCoords;
1235
1236     do_cpt_int_err(xd, "swap checkpoint version", &swap_cpt_version, list);
1237     if (bRead && swap_cpt_version < 2)
1238     {
1239         gmx_fatal(FARGS, "Cannot read checkpoint files that were written with old versions"
1240                   "of the ion/water position swapping protocol.\n");
1241     }
1242
1243     do_cpt_int_err(xd, "swap coupling steps", &swapstate->nAverage, list);
1244
1245     /* When reading, init_swapcoords has not been called yet,
1246      * so we have to allocate memory first. */
1247     do_cpt_int_err(xd, "number of ion types", &swapstate->nIonTypes, list);
1248     if (bRead)
1249     {
1250         snew(swapstate->ionType, swapstate->nIonTypes);
1251     }
1252
1253     for (int ic = 0; ic < eCompNR; ic++)
1254     {
1255         for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1256         {
1257             swapstateIons_t *gs = &swapstate->ionType[ii];
1258
1259             if (bRead)
1260             {
1261                 do_cpt_int_err(xd, "swap requested atoms", &gs->nMolReq[ic], list);
1262             }
1263             else
1264             {
1265                 do_cpt_int_err(xd, "swap requested atoms p", gs->nMolReq_p[ic], list);
1266             }
1267
1268             if (bRead)
1269             {
1270                 do_cpt_int_err(xd, "swap influx net", &gs->inflow_net[ic], list);
1271             }
1272             else
1273             {
1274                 do_cpt_int_err(xd, "swap influx net p", gs->inflow_net_p[ic], list);
1275             }
1276
1277             if (bRead && (nullptr == gs->nMolPast[ic]) )
1278             {
1279                 snew(gs->nMolPast[ic], swapstate->nAverage);
1280             }
1281
1282             for (int j = 0; j < swapstate->nAverage; j++)
1283             {
1284                 if (bRead)
1285                 {
1286                     do_cpt_int_err(xd, "swap past atom counts", &gs->nMolPast[ic][j], list);
1287                 }
1288                 else
1289                 {
1290                     do_cpt_int_err(xd, "swap past atom counts p", &gs->nMolPast_p[ic][j], list);
1291                 }
1292             }
1293         }
1294     }
1295
1296     /* Ion flux per channel */
1297     for (int ic = 0; ic < eChanNR; ic++)
1298     {
1299         for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1300         {
1301             swapstateIons_t *gs = &swapstate->ionType[ii];
1302
1303             if (bRead)
1304             {
1305                 do_cpt_int_err(xd, "channel flux A->B", &gs->fluxfromAtoB[ic], list);
1306             }
1307             else
1308             {
1309                 do_cpt_int_err(xd, "channel flux A->B p", gs->fluxfromAtoB_p[ic], list);
1310             }
1311         }
1312     }
1313
1314     /* Ion flux leakage */
1315     if (bRead)
1316     {
1317         do_cpt_int_err(xd, "flux leakage", &swapstate->fluxleak, list);
1318     }
1319     else
1320     {
1321         do_cpt_int_err(xd, "flux leakage", swapstate->fluxleak_p, list);
1322     }
1323
1324     /* Ion history */
1325     for (int ii = 0; ii < swapstate->nIonTypes; ii++)
1326     {
1327         swapstateIons_t *gs = &swapstate->ionType[ii];
1328
1329         do_cpt_int_err(xd, "number of ions", &gs->nMol, list);
1330
1331         if (bRead)
1332         {
1333             snew(gs->channel_label, gs->nMol);
1334             snew(gs->comp_from, gs->nMol);
1335         }
1336
1337         do_cpt_u_chars(xd, "channel history", gs->nMol, gs->channel_label, list);
1338         do_cpt_u_chars(xd, "domain history", gs->nMol, gs->comp_from, list);
1339     }
1340
1341     /* Save the last known whole positions to checkpoint
1342      * file to be able to also make multimeric channels whole in PBC */
1343     do_cpt_int_err(xd, "Ch0 atoms", &swapstate->nat[eChan0], list);
1344     do_cpt_int_err(xd, "Ch1 atoms", &swapstate->nat[eChan1], list);
1345     if (bRead)
1346     {
1347         snew(swapstate->xc_old_whole[eChan0], swapstate->nat[eChan0]);
1348         snew(swapstate->xc_old_whole[eChan1], swapstate->nat[eChan1]);
1349         do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], swapstate->xc_old_whole[eChan0], list);
1350         do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], swapstate->xc_old_whole[eChan1], list);
1351     }
1352     else
1353     {
1354         do_cpt_n_rvecs_err(xd, "Ch0 whole x", swapstate->nat[eChan0], *swapstate->xc_old_whole_p[eChan0], list);
1355         do_cpt_n_rvecs_err(xd, "Ch1 whole x", swapstate->nat[eChan1], *swapstate->xc_old_whole_p[eChan1], list);
1356     }
1357
1358     return 0;
1359 }
1360
1361
1362 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1363                            int fflags, energyhistory_t *enerhist,
1364                            FILE *list)
1365 {
1366     int ret = 0;
1367
1368     if (fflags == 0)
1369     {
1370         return ret;
1371     }
1372
1373     GMX_RELEASE_ASSERT(enerhist != nullptr, "With energy history, we need a valid enerhist pointer");
1374
1375     /* This is stored/read for backward compatibility */
1376     int  energyHistoryNumEnergies = 0;
1377     if (bRead)
1378     {
1379         enerhist->nsteps     = 0;
1380         enerhist->nsum       = 0;
1381         enerhist->nsteps_sim = 0;
1382         enerhist->nsum_sim   = 0;
1383     }
1384     else if (enerhist != nullptr)
1385     {
1386         energyHistoryNumEnergies = enerhist->ener_sum_sim.size();
1387     }
1388
1389     delta_h_history_t *deltaH = enerhist->deltaHForeignLambdas.get();
1390     const StatePart    part   = StatePart::energyHistory;
1391     for (int i = 0; (i < eenhNR && ret == 0); i++)
1392     {
1393         if (fflags & (1<<i))
1394         {
1395             switch (i)
1396             {
1397                 case eenhENERGY_N:     ret = do_cpte_int(xd, part, i, fflags, &energyHistoryNumEnergies, list); break;
1398                 case eenhENERGY_AVER:  ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_ave, list); break;
1399                 case eenhENERGY_SUM:   ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum, list); break;
1400                 case eenhENERGY_NSUM:  do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1401                 case eenhENERGY_SUM_SIM: ret = doVector<double>(xd, part, i, fflags, &enerhist->ener_sum_sim, list); break;
1402                 case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1403                 case eenhENERGY_NSTEPS:     do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1404                 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1405                 case eenhENERGY_DELTA_H_NN:
1406                 {
1407                     int numDeltaH = 0;
1408                     if (!bRead && deltaH != nullptr)
1409                     {
1410                         numDeltaH = deltaH->dh.size();
1411                     }
1412                     do_cpt_int_err(xd, eenh_names[i], &numDeltaH, list);
1413                     if (bRead)
1414                     {
1415                         if (deltaH == nullptr)
1416                         {
1417                             enerhist->deltaHForeignLambdas = std::make_unique<delta_h_history_t>();
1418                             deltaH = enerhist->deltaHForeignLambdas.get();
1419                         }
1420                         deltaH->dh.resize(numDeltaH);
1421                         deltaH->start_lambda_set = FALSE;
1422                     }
1423                     break;
1424                 }
1425                 case eenhENERGY_DELTA_H_LIST:
1426                     for (auto dh : deltaH->dh)
1427                     {
1428                         ret = doVector<real>(xd, part, i, fflags, &dh, list);
1429                     }
1430                     break;
1431                 case eenhENERGY_DELTA_H_STARTTIME:
1432                     ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_time), list); break;
1433                 case eenhENERGY_DELTA_H_STARTLAMBDA:
1434                     ret = do_cpte_double(xd, part, i, fflags, &(deltaH->start_lambda), list); break;
1435                 default:
1436                     gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1437                               "You are probably reading a new checkpoint file with old code", i);
1438             }
1439         }
1440     }
1441
1442     if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1443     {
1444         /* Assume we have an old file format and copy sum to sum_sim */
1445         enerhist->ener_sum_sim = enerhist->ener_sum;
1446     }
1447
1448     if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1449          !(fflags & (1<<eenhENERGY_NSTEPS)))
1450     {
1451         /* Assume we have an old file format and copy nsum to nsteps */
1452         enerhist->nsteps = enerhist->nsum;
1453     }
1454     if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1455          !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1456     {
1457         /* Assume we have an old file format and copy nsum to nsteps */
1458         enerhist->nsteps_sim = enerhist->nsum_sim;
1459     }
1460
1461     return ret;
1462 }
1463
1464 static int doCptPullCoordHist(XDR *xd, PullCoordinateHistory *pullCoordHist,
1465                               const StatePart part, FILE *list)
1466 {
1467     int ret   = 0;
1468     int flags = 0;
1469
1470     flags |= ((1<<epullcoordh_VALUE_REF_SUM) | (1<<epullcoordh_VALUE_SUM) | (1<<epullcoordh_DR01_SUM) |
1471               (1<<epullcoordh_DR23_SUM) | (1<<epullcoordh_DR45_SUM) | (1<<epullcoordh_FSCAL_SUM));
1472
1473     for (int i = 0; i < epullcoordh_NR && ret == 0; i++)
1474     {
1475         switch (i)
1476         {
1477             case epullcoordh_VALUE_REF_SUM: ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->valueRef), list); break;
1478             case epullcoordh_VALUE_SUM:     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->value), list); break;
1479             case epullcoordh_DR01_SUM:
1480                 for (int j = 0; j < DIM && ret == 0; j++)
1481                 {
1482                     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr01[j]), list);
1483                 }
1484                 break;
1485             case epullcoordh_DR23_SUM:
1486                 for (int j = 0; j < DIM && ret == 0; j++)
1487                 {
1488                     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr23[j]), list);
1489                 }
1490                 break;
1491             case epullcoordh_DR45_SUM:
1492                 for (int j = 0; j < DIM && ret == 0; j++)
1493                 {
1494                     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dr45[j]), list);
1495                 }
1496                 break;
1497             case epullcoordh_FSCAL_SUM:     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->scalarForce), list); break;
1498             case epullcoordh_DYNAX_SUM:
1499                 for (int j = 0; j < DIM && ret == 0; j++)
1500                 {
1501                     ret = do_cpte_double(xd, part, i, flags, &(pullCoordHist->dynaX[j]), list);
1502                 }
1503                 break;
1504         }
1505     }
1506
1507     return ret;
1508 }
1509
1510 static int doCptPullGroupHist(XDR *xd, PullGroupHistory *pullGroupHist,
1511                               const StatePart part, FILE *list)
1512 {
1513     int ret   = 0;
1514     int flags = 0;
1515
1516     flags |= ((1<<epullgrouph_X_SUM));
1517
1518     for (int i = 0; i < epullgrouph_NR; i++)
1519     {
1520         switch (i)
1521         {
1522             case epullgrouph_X_SUM:
1523                 for (int j = 0; j < DIM && ret == 0; j++)
1524                 {
1525                     ret = do_cpte_double(xd, part, i, flags, &(pullGroupHist->x[j]), list);
1526                 }
1527                 break;
1528         }
1529     }
1530
1531     return ret;
1532 }
1533
1534
1535 static int doCptPullHist(XDR *xd, gmx_bool bRead,
1536                          int fflags, PullHistory *pullHist,
1537                          const StatePart part,
1538                          FILE *list)
1539 {
1540     int ret                       = 0;
1541     int pullHistoryNumCoordinates = 0;
1542     int pullHistoryNumGroups      = 0;
1543
1544     /* Retain the number of terms in the sum and the number of coordinates (used for writing
1545      * average pull forces and coordinates) in the pull history, in temporary variables,
1546      * in case they cannot be read from the checkpoint, in order to have backward compatibility */
1547     if (bRead)
1548     {
1549         pullHist->numValuesInXSum = 0;
1550         pullHist->numValuesInFSum = 0;
1551     }
1552     else if (pullHist != nullptr)
1553     {
1554         pullHistoryNumCoordinates = pullHist->pullCoordinateSums.size();
1555         pullHistoryNumGroups      = pullHist->pullGroupSums.size();
1556     }
1557     else
1558     {
1559         GMX_RELEASE_ASSERT(fflags == 0, "Without pull history, all flags should be off");
1560     }
1561
1562     for (int i = 0; (i < epullhNR && ret == 0); i++)
1563     {
1564         if (fflags & (1<<i))
1565         {
1566             switch (i)
1567             {
1568                 case epullhPULL_NUMCOORDINATES:         ret = do_cpte_int(xd, part, i, fflags, &pullHistoryNumCoordinates, list); break;
1569                 case epullhPULL_NUMGROUPS:              do_cpt_int_err(xd, eenh_names[i], &pullHistoryNumGroups, list); break;
1570                 case epullhPULL_NUMVALUESINXSUM:        do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInXSum, list); break;
1571                 case epullhPULL_NUMVALUESINFSUM:        do_cpt_int_err(xd, eenh_names[i], &pullHist->numValuesInFSum, list); break;
1572                 default:
1573                     gmx_fatal(FARGS, "Unknown pull history entry %d\n"
1574                               "You are probably reading a new checkpoint file with old code", i);
1575             }
1576         }
1577     }
1578     if (bRead)
1579     {
1580         pullHist->pullCoordinateSums.resize(pullHistoryNumCoordinates);
1581         pullHist->pullGroupSums.resize(pullHistoryNumGroups);
1582     }
1583     if (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0)
1584     {
1585         for (size_t i = 0; i < pullHist->pullCoordinateSums.size() && ret == 0; i++)
1586         {
1587             ret = doCptPullCoordHist(xd, &(pullHist->pullCoordinateSums[i]), part, list);
1588         }
1589         for (size_t i = 0; i < pullHist->pullGroupSums.size() && ret == 0; i++)
1590         {
1591             ret = doCptPullGroupHist(xd, &(pullHist->pullGroupSums[i]), part, list);
1592         }
1593     }
1594
1595     return ret;
1596 }
1597
1598 static int do_cpt_df_hist(XDR *xd, int fflags, int nlambda, df_history_t **dfhistPtr, FILE *list)
1599 {
1600     int ret = 0;
1601
1602     if (fflags == 0)
1603     {
1604         return 0;
1605     }
1606
1607     if (*dfhistPtr == nullptr)
1608     {
1609         snew(*dfhistPtr, 1);
1610         (*dfhistPtr)->nlambda = nlambda;
1611         init_df_history(*dfhistPtr, nlambda);
1612     }
1613     df_history_t    *dfhist = *dfhistPtr;
1614
1615     const StatePart  part   = StatePart::freeEnergyHistory;
1616     for (int i = 0; (i < edfhNR && ret == 0); i++)
1617     {
1618         if (fflags & (1<<i))
1619         {
1620             switch (i)
1621             {
1622                 case edfhBEQUIL:       ret = do_cpte_bool(xd, part, i, fflags, &dfhist->bEquil, list); break;
1623                 case edfhNATLAMBDA:    ret = do_cpte_ints(xd, part, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1624                 case edfhWLHISTO:      ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1625                 case edfhWLDELTA:      ret = do_cpte_real(xd, part, i, fflags, &dfhist->wl_delta, list); break;
1626                 case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1627                 case edfhSUMDG:        ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1628                 case edfhSUMMINVAR:    ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1629                 case edfhSUMVAR:       ret = do_cpte_reals(xd, part, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1630                 case edfhACCUMP:       ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p, list); break;
1631                 case edfhACCUMM:       ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m, list); break;
1632                 case edfhACCUMP2:      ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_p2, list); break;
1633                 case edfhACCUMM2:      ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->accum_m2, list); break;
1634                 case edfhTIJ:          ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij, list); break;
1635                 case edfhTIJEMP:       ret = do_cpte_nmatrix(xd, part, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1636
1637                 default:
1638                     gmx_fatal(FARGS, "Unknown df history entry %d\n"
1639                               "You are probably reading a new checkpoint file with old code", i);
1640             }
1641         }
1642     }
1643
1644     return ret;
1645 }
1646
1647
1648 /* This function stores the last whole configuration of the reference and
1649  * average structure in the .cpt file
1650  */
1651 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1652                           int nED, edsamhistory_t *EDstate, FILE *list)
1653 {
1654     if (nED == 0)
1655     {
1656         return 0;
1657     }
1658
1659     EDstate->bFromCpt     = bRead;
1660     EDstate->nED          = nED;
1661
1662     /* When reading, init_edsam has not been called yet,
1663      * so we have to allocate memory first. */
1664     if (bRead)
1665     {
1666         snew(EDstate->nref, EDstate->nED);
1667         snew(EDstate->old_sref, EDstate->nED);
1668         snew(EDstate->nav, EDstate->nED);
1669         snew(EDstate->old_sav, EDstate->nED);
1670     }
1671
1672     /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1673     for (int i = 0; i < EDstate->nED; i++)
1674     {
1675         char buf[STRLEN];
1676
1677         /* Reference structure SREF */
1678         sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1679         do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1680         sprintf(buf, "ED%d x_ref", i+1);
1681         if (bRead)
1682         {
1683             snew(EDstate->old_sref[i], EDstate->nref[i]);
1684             do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1685         }
1686         else
1687         {
1688             do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1689         }
1690
1691         /* Average structure SAV */
1692         sprintf(buf, "ED%d # of atoms in average structure", i+1);
1693         do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1694         sprintf(buf, "ED%d x_av", i+1);
1695         if (bRead)
1696         {
1697             snew(EDstate->old_sav[i], EDstate->nav[i]);
1698             do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1699         }
1700         else
1701         {
1702             do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1703         }
1704     }
1705
1706     return 0;
1707 }
1708
1709 static int do_cpt_correlation_grid(XDR *xd, gmx_bool bRead, gmx_unused int fflags,
1710                                    gmx::CorrelationGridHistory *corrGrid,
1711                                    FILE *list, int eawhh)
1712 {
1713     int ret = 0;
1714
1715     do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->numCorrelationTensors), list);
1716     do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->tensorSize), list);
1717     do_cpt_int_err(xd, eawhh_names[eawhh], &(corrGrid->blockDataListSize), list);
1718
1719     if (bRead)
1720     {
1721         initCorrelationGridHistory(corrGrid, corrGrid->numCorrelationTensors, corrGrid->tensorSize, corrGrid->blockDataListSize);
1722     }
1723
1724     for (gmx::CorrelationBlockDataHistory &blockData : corrGrid->blockDataBuffer)
1725     {
1726         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeight), list);
1727         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumSquareWeight), list);
1728         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightX), list);
1729         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockSumWeightY), list);
1730         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksSquareBlockWeight), list);
1731         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockSquareWeight), list);
1732         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightX), list);
1733         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.sumOverBlocksBlockWeightBlockWeightY), list);
1734         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.blockLength), list);
1735         do_cpt_int_err(xd, eawhh_names[eawhh], &(blockData.previousBlockIndex), list);
1736         do_cpt_double_err(xd, eawhh_names[eawhh], &(blockData.correlationIntegral), list);
1737     }
1738
1739     return ret;
1740 }
1741
1742 static int do_cpt_awh_bias(XDR *xd, gmx_bool bRead,
1743                            int fflags, gmx::AwhBiasHistory *biasHistory,
1744                            FILE *list)
1745 {
1746     int                       ret   = 0;
1747
1748     gmx::AwhBiasStateHistory *state = &biasHistory->state;
1749     for (int i = 0; (i < eawhhNR && ret == 0); i++)
1750     {
1751         if (fflags & (1<<i))
1752         {
1753             switch (i)
1754             {
1755                 case eawhhIN_INITIAL:
1756                     do_cpt_bool_err(xd, eawhh_names[i], &state->in_initial, list); break;
1757                 case eawhhEQUILIBRATEHISTOGRAM:
1758                     do_cpt_bool_err(xd, eawhh_names[i], &state->equilibrateHistogram, list); break;
1759                 case eawhhHISTSIZE:
1760                     do_cpt_double_err(xd, eawhh_names[i], &state->histSize, list); break;
1761                 case eawhhNPOINTS:
1762                 {
1763                     int numPoints;
1764                     if (!bRead)
1765                     {
1766                         numPoints = biasHistory->pointState.size();
1767                     }
1768                     do_cpt_int_err(xd, eawhh_names[i], &numPoints, list);
1769                     if (bRead)
1770                     {
1771                         biasHistory->pointState.resize(numPoints);
1772                     }
1773                 }
1774                 break;
1775                 case eawhhCOORDPOINT:
1776                     for (auto &psh : biasHistory->pointState)
1777                     {
1778                         do_cpt_double_err(xd, eawhh_names[i], &psh.target, list);
1779                         do_cpt_double_err(xd, eawhh_names[i], &psh.free_energy, list);
1780                         do_cpt_double_err(xd, eawhh_names[i], &psh.bias, list);
1781                         do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_iteration, list);
1782                         do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_covering, list);
1783                         do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_tot, list);
1784                         do_cpt_double_err(xd, eawhh_names[i], &psh.weightsum_ref, list);
1785                         do_cpt_step_err(xd, eawhh_names[i], &psh.last_update_index, list);
1786                         do_cpt_double_err(xd, eawhh_names[i], &psh.log_pmfsum, list);
1787                         do_cpt_double_err(xd, eawhh_names[i], &psh.visits_iteration, list);
1788                         do_cpt_double_err(xd, eawhh_names[i], &psh.visits_tot, list);
1789                     }
1790                     break;
1791                 case eawhhUMBRELLAGRIDPOINT:
1792                     do_cpt_int_err(xd, eawhh_names[i], &(state->umbrellaGridpoint), list); break;
1793                 case eawhhUPDATELIST:
1794                     do_cpt_int_err(xd, eawhh_names[i], &(state->origin_index_updatelist), list);
1795                     do_cpt_int_err(xd, eawhh_names[i], &(state->end_index_updatelist), list);
1796                     break;
1797                 case eawhhLOGSCALEDSAMPLEWEIGHT:
1798                     do_cpt_double_err(xd, eawhh_names[i], &(state->logScaledSampleWeight), list);
1799                     do_cpt_double_err(xd, eawhh_names[i], &(state->maxLogScaledSampleWeight), list);
1800                     break;
1801                 case eawhhNUMUPDATES:
1802                     do_cpt_step_err(xd, eawhh_names[i], &(state->numUpdates), list);
1803                     break;
1804                 case eawhhFORCECORRELATIONGRID:
1805                     ret = do_cpt_correlation_grid(xd, bRead, fflags, &biasHistory->forceCorrelationGrid, list, i);
1806                     break;
1807                 default:
1808                     gmx_fatal(FARGS, "Unknown awh history entry %d\n", i);
1809             }
1810         }
1811     }
1812
1813     return ret;
1814 }
1815
1816 static int do_cpt_awh(XDR *xd, gmx_bool bRead,
1817                       int fflags, gmx::AwhHistory *awhHistory,
1818                       FILE *list)
1819 {
1820     int ret = 0;
1821
1822     if (fflags != 0)
1823     {
1824         std::shared_ptr<gmx::AwhHistory> awhHistoryLocal;
1825
1826         if (awhHistory == nullptr)
1827         {
1828             GMX_RELEASE_ASSERT(bRead, "do_cpt_awh should not be called for writing without an AwhHistory");
1829
1830             awhHistoryLocal = std::make_shared<gmx::AwhHistory>();
1831             awhHistory      = awhHistoryLocal.get();
1832         }
1833
1834         /* To be able to read and write the AWH data several parameters determining the layout of the AWH structs need to be known
1835            (nbias, npoints, etc.). The best thing (?) would be to have these passed to this function. When writing to a checkpoint
1836            these parameters are available in awh_history (after calling init_awh_history). When reading from a checkpoint though, there
1837            is no initialized awh_history (it is initialized and set in this function). The AWH parameters have not always been read
1838            at the time when this function is called for reading so I don't know how to pass them as input. Here, this is solved by
1839            when writing a checkpoint, also storing parameters needed for future reading of the checkpoint.
1840
1841            Another issue is that some variables that AWH checkpoints don't have a registered enum and string (e.g. nbias below).
1842            One difficulty is the multilevel structure of the data which would need to be represented somehow. */
1843
1844         int numBias;
1845         if (!bRead)
1846         {
1847             numBias = awhHistory->bias.size();
1848         }
1849         do_cpt_int_err(xd, "awh_nbias", &numBias, list);
1850
1851         if (bRead)
1852         {
1853             awhHistory->bias.resize(numBias);
1854         }
1855         for (auto &bias : awhHistory->bias)
1856         {
1857             ret = do_cpt_awh_bias(xd, bRead, fflags, &bias, list);
1858             if (ret)
1859             {
1860                 return ret;
1861             }
1862         }
1863         do_cpt_double_err(xd, "awh_potential_offset", &awhHistory->potentialOffset, list);
1864     }
1865     return ret;
1866 }
1867
1868 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1869                         std::vector<gmx_file_position_t> *outputfiles,
1870                         FILE *list, int file_version)
1871 {
1872     gmx_off_t                   offset;
1873     gmx_off_t                   mask = 0xFFFFFFFFL;
1874     int                         offset_high, offset_low;
1875     std::array<char, CPTSTRLEN> buf;
1876     GMX_RELEASE_ASSERT(outputfiles, "Must have valid outputfiles");
1877
1878     // Ensure that reading pre-allocates outputfiles, while writing
1879     // writes what is already there.
1880     int nfiles = outputfiles->size();
1881     if (do_cpt_int(xd, "number of output files", &nfiles, list) != 0)
1882     {
1883         return -1;
1884     }
1885     if (bRead)
1886     {
1887         outputfiles->resize(nfiles);
1888     }
1889
1890     for (auto &outputfile : *outputfiles)
1891     {
1892         /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1893         if (bRead)
1894         {
1895             do_cpt_string_err(xd, "output filename", buf, list);
1896             std::copy(std::begin(buf), std::end(buf), std::begin(outputfile.filename));
1897
1898             if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1899             {
1900                 return -1;
1901             }
1902             if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1903             {
1904                 return -1;
1905             }
1906             outputfile.offset = (static_cast<gmx_off_t>(offset_high) << 32 ) | ( static_cast<gmx_off_t>(offset_low) & mask );
1907         }
1908         else
1909         {
1910             do_cpt_string_err(xd, "output filename", outputfile.filename, list);
1911             /* writing */
1912             offset      = outputfile.offset;
1913             if (offset == -1)
1914             {
1915                 offset_low  = -1;
1916                 offset_high = -1;
1917             }
1918             else
1919             {
1920                 offset_low  = static_cast<int>(offset & mask);
1921                 offset_high = static_cast<int>((offset >> 32) & mask);
1922             }
1923             if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1924             {
1925                 return -1;
1926             }
1927             if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1928             {
1929                 return -1;
1930             }
1931         }
1932         if (file_version >= 8)
1933         {
1934             if (do_cpt_int(xd, "file_checksum_size", &outputfile.checksumSize,
1935                            list) != 0)
1936             {
1937                 return -1;
1938             }
1939             if (do_cpt_u_chars(xd, "file_checksum", outputfile.checksum.size(), outputfile.checksum.data(), list) != 0)
1940             {
1941                 return -1;
1942             }
1943         }
1944         else
1945         {
1946             outputfile.checksumSize = -1;
1947         }
1948     }
1949     return 0;
1950 }
1951
1952
1953 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1954                       FILE *fplog, const t_commrec *cr,
1955                       ivec domdecCells, int nppnodes,
1956                       int eIntegrator, int simulation_part,
1957                       gmx_bool bExpanded, int elamstats,
1958                       int64_t step, double t,
1959                       t_state *state, ObservablesHistory *observablesHistory)
1960 {
1961     t_fileio            *fp;
1962     char                *fntemp; /* the temporary checkpoint file name */
1963     int                  npmenodes;
1964     char                 buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1965     t_fileio            *ret;
1966
1967     if (DOMAINDECOMP(cr))
1968     {
1969         npmenodes = cr->npmenodes;
1970     }
1971     else
1972     {
1973         npmenodes = 0;
1974     }
1975
1976 #if !GMX_NO_RENAME
1977     /* make the new temporary filename */
1978     snew(fntemp, std::strlen(fn)+5+STEPSTRSIZE);
1979     std::strcpy(fntemp, fn);
1980     fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1981     sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1982     std::strcat(fntemp, suffix);
1983     std::strcat(fntemp, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
1984 #else
1985     /* if we can't rename, we just overwrite the cpt file.
1986      * dangerous if interrupted.
1987      */
1988     snew(fntemp, std::strlen(fn));
1989     std::strcpy(fntemp, fn);
1990 #endif
1991     std::string timebuf = gmx_format_current_time();
1992
1993     if (fplog)
1994     {
1995         fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1996                 gmx_step_str(step, buf), timebuf.c_str());
1997     }
1998
1999     /* Get offsets for open files */
2000     auto outputfiles = gmx_fio_get_output_file_positions();
2001
2002     fp = gmx_fio_open(fntemp, "w");
2003
2004     int flags_eks;
2005     if (state->ekinstate.bUpToDate)
2006     {
2007         flags_eks =
2008             ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
2009              (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
2010              (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
2011     }
2012     else
2013     {
2014         flags_eks = 0;
2015     }
2016
2017     energyhistory_t *enerhist  = observablesHistory->energyHistory.get();
2018     int              flags_enh = 0;
2019     if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
2020     {
2021         flags_enh |= (1<<eenhENERGY_N) | (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSTEPS_SIM);
2022         if (enerhist->nsum > 0)
2023         {
2024             flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
2025                           (1<<eenhENERGY_NSUM));
2026         }
2027         if (enerhist->nsum_sim > 0)
2028         {
2029             flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSUM_SIM));
2030         }
2031         if (enerhist->deltaHForeignLambdas != nullptr)
2032         {
2033             flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
2034                            (1<< eenhENERGY_DELTA_H_LIST) |
2035                            (1<< eenhENERGY_DELTA_H_STARTTIME) |
2036                            (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
2037         }
2038     }
2039
2040     PullHistory *pullHist         = observablesHistory->pullHistory.get();
2041     int          flagsPullHistory = 0;
2042     if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
2043     {
2044         flagsPullHistory |= (1<<epullhPULL_NUMCOORDINATES);
2045         flagsPullHistory |= ((1<<epullhPULL_NUMGROUPS) | (1<<epullhPULL_NUMVALUESINXSUM) | (1<<epullhPULL_NUMVALUESINFSUM));
2046     }
2047
2048     int flags_dfh;
2049     if (bExpanded)
2050     {
2051         flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
2052                      (1<<edfhTIJ) | (1<<edfhTIJEMP));
2053         if (EWL(elamstats))
2054         {
2055             flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
2056         }
2057         if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
2058         {
2059             flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
2060                           | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
2061         }
2062     }
2063     else
2064     {
2065         flags_dfh = 0;
2066     }
2067
2068     int flags_awhh = 0;
2069     if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
2070     {
2071         flags_awhh |= ( (1 << eawhhIN_INITIAL) |
2072                         (1 << eawhhEQUILIBRATEHISTOGRAM) |
2073                         (1 << eawhhHISTSIZE) |
2074                         (1 << eawhhNPOINTS) |
2075                         (1 << eawhhCOORDPOINT) |
2076                         (1 << eawhhUMBRELLAGRIDPOINT) |
2077                         (1 << eawhhUPDATELIST) |
2078                         (1 << eawhhLOGSCALEDSAMPLEWEIGHT) |
2079                         (1 << eawhhNUMUPDATES) |
2080                         (1 << eawhhFORCECORRELATIONGRID));
2081     }
2082
2083     /* We can check many more things now (CPU, acceleration, etc), but
2084      * it is highly unlikely to have two separate builds with exactly
2085      * the same version, user, time, and build host!
2086      */
2087
2088     int                      nlambda     = (state->dfhist ? state->dfhist->nlambda : 0);
2089
2090     edsamhistory_t          *edsamhist   = observablesHistory->edsamHistory.get();
2091     int                      nED         = (edsamhist ? edsamhist->nED : 0);
2092
2093     swaphistory_t           *swaphist    = observablesHistory->swapHistory.get();
2094     int                      eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
2095
2096     CheckpointHeaderContents headerContents =
2097     {
2098         0, {0}, {0}, {0}, {0}, GMX_DOUBLE, {0}, {0},
2099         eIntegrator, simulation_part, step, t, nppnodes,
2100         {0}, npmenodes,
2101         state->natoms, state->ngtc, state->nnhpres,
2102         state->nhchainlength, nlambda, state->flags, flags_eks, flags_enh,
2103         flagsPullHistory, flags_dfh, flags_awhh,
2104         nED, eSwapCoords
2105     };
2106     std::strcpy(headerContents.version, gmx_version());
2107     std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
2108     std::strcpy(headerContents.ftime, timebuf.c_str());
2109     if (DOMAINDECOMP(cr))
2110     {
2111         copy_ivec(domdecCells, headerContents.dd_nc);
2112     }
2113
2114     do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
2115
2116     if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)        ||
2117         (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0) ||
2118         (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)  ||
2119         (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr) < 0)  ||
2120         (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)  ||
2121         (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)      ||
2122         (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0) ||
2123         (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0) ||
2124         (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr,
2125                       headerContents.file_version) < 0))
2126     {
2127         gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2128     }
2129
2130     do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2131
2132     /* we really, REALLY, want to make sure to physically write the checkpoint,
2133        and all the files it depends on, out to disk. Because we've
2134        opened the checkpoint with gmx_fio_open(), it's in our list
2135        of open files.  */
2136     ret = gmx_fio_all_output_fsync();
2137
2138     if (ret)
2139     {
2140         char buf[STRLEN];
2141         sprintf(buf,
2142                 "Cannot fsync '%s'; maybe you are out of disk space?",
2143                 gmx_fio_getname(ret));
2144
2145         if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
2146         {
2147             gmx_file(buf);
2148         }
2149         else
2150         {
2151             gmx_warning("%s", buf);
2152         }
2153     }
2154
2155     if (gmx_fio_close(fp) != 0)
2156     {
2157         gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2158     }
2159
2160     /* we don't move the checkpoint if the user specified they didn't want it,
2161        or if the fsyncs failed */
2162 #if !GMX_NO_RENAME
2163     if (!bNumberAndKeep && !ret)
2164     {
2165         if (gmx_fexist(fn))
2166         {
2167             /* Rename the previous checkpoint file */
2168             std::strcpy(buf, fn);
2169             buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
2170             std::strcat(buf, "_prev");
2171             std::strcat(buf, fn+std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
2172             if (!GMX_FAHCORE)
2173             {
2174                 /* we copy here so that if something goes wrong between now and
2175                  * the rename below, there's always a state.cpt.
2176                  * If renames are atomic (such as in POSIX systems),
2177                  * this copying should be unneccesary.
2178                  */
2179                 gmx_file_copy(fn, buf, FALSE);
2180                 /* We don't really care if this fails:
2181                  * there's already a new checkpoint.
2182                  */
2183             }
2184             else
2185             {
2186                 gmx_file_rename(fn, buf);
2187             }
2188         }
2189         if (gmx_file_rename(fntemp, fn) != 0)
2190         {
2191             gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
2192         }
2193     }
2194 #endif  /* GMX_NO_RENAME */
2195
2196     sfree(fntemp);
2197
2198 #if GMX_FAHCORE
2199     /*code for alternate checkpointing scheme.  moved from top of loop over
2200        steps */
2201     fcRequestCheckPoint();
2202     if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
2203     {
2204         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
2205     }
2206 #endif /* end GMX_FAHCORE block */
2207 }
2208
2209 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
2210 {
2211     bool foundMismatch = (p != f);
2212     if (!foundMismatch)
2213     {
2214         return;
2215     }
2216     *mm = TRUE;
2217     if (fplog)
2218     {
2219         fprintf(fplog, "  %s mismatch,\n", type);
2220         fprintf(fplog, "    current program: %d\n", p);
2221         fprintf(fplog, "    checkpoint file: %d\n", f);
2222         fprintf(fplog, "\n");
2223     }
2224 }
2225
2226 static void check_string(FILE *fplog, const char *type, const char *p,
2227                          const char *f, gmx_bool *mm)
2228 {
2229     bool foundMismatch = (std::strcmp(p, f) != 0);
2230     if (!foundMismatch)
2231     {
2232         return;
2233     }
2234     *mm = TRUE;
2235     if (fplog)
2236     {
2237         fprintf(fplog, "  %s mismatch,\n", type);
2238         fprintf(fplog, "    current program: %s\n", p);
2239         fprintf(fplog, "    checkpoint file: %s\n", f);
2240         fprintf(fplog, "\n");
2241     }
2242 }
2243
2244 static void check_match(FILE *fplog, const t_commrec *cr, const ivec dd_nc,
2245                         const CheckpointHeaderContents &headerContents,
2246                         gmx_bool reproducibilityRequested)
2247 {
2248     /* Note that this check_string on the version will also print a message
2249      * when only the minor version differs. But we only print a warning
2250      * message further down with reproducibilityRequested=TRUE.
2251      */
2252     gmx_bool versionDiffers = FALSE;
2253     check_string(fplog, "Version", gmx_version(), headerContents.version, &versionDiffers);
2254
2255     gmx_bool precisionDiffers = FALSE;
2256     check_int   (fplog, "Double prec.", GMX_DOUBLE, headerContents.double_prec, &precisionDiffers);
2257     if (precisionDiffers)
2258     {
2259         const char msg_precision_difference[] =
2260             "You are continuing a simulation with a different precision. Not matching\n"
2261             "single/double precision will lead to precision or performance loss.\n";
2262         if (fplog)
2263         {
2264             fprintf(fplog, "%s\n", msg_precision_difference);
2265         }
2266     }
2267
2268     gmx_bool mm = (versionDiffers || precisionDiffers);
2269
2270     if (reproducibilityRequested)
2271     {
2272         check_string(fplog, "Program name", gmx::getProgramContext().fullBinaryPath(), headerContents.fprog, &mm);
2273
2274         check_int   (fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
2275     }
2276
2277     if (cr->nnodes > 1 && reproducibilityRequested)
2278     {
2279         check_int (fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
2280
2281         int npp = cr->nnodes;
2282         if (cr->npmenodes >= 0)
2283         {
2284             npp -= cr->npmenodes;
2285         }
2286         int npp_f = headerContents.nnodes;
2287         if (headerContents.npme >= 0)
2288         {
2289             npp_f -= headerContents.npme;
2290         }
2291         if (npp == npp_f)
2292         {
2293             check_int (fplog, "#DD-cells[x]", dd_nc[XX], headerContents.dd_nc[XX], &mm);
2294             check_int (fplog, "#DD-cells[y]", dd_nc[YY], headerContents.dd_nc[YY], &mm);
2295             check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], headerContents.dd_nc[ZZ], &mm);
2296         }
2297     }
2298
2299     if (mm)
2300     {
2301         /* Gromacs should be able to continue from checkpoints between
2302          * different patch level versions, but we do not guarantee
2303          * compatibility between different major/minor versions - check this.
2304          */
2305         int        gmx_major;
2306         int        cpt_major;
2307         sscanf(gmx_version(), "%5d", &gmx_major);
2308         int        ret                 = sscanf(headerContents.version, "%5d", &cpt_major);
2309         gmx_bool   majorVersionDiffers = (ret < 1 || gmx_major != cpt_major);
2310
2311         const char msg_major_version_difference[] =
2312             "The current GROMACS major version is not identical to the one that\n"
2313             "generated the checkpoint file. In principle GROMACS does not support\n"
2314             "continuation from checkpoints between different versions, so we advise\n"
2315             "against this. If you still want to try your luck we recommend that you use\n"
2316             "the -noappend flag to keep your output files from the two versions separate.\n"
2317             "This might also work around errors where the output fields in the energy\n"
2318             "file have changed between the different versions.\n";
2319
2320         const char msg_mismatch_notice[] =
2321             "GROMACS patchlevel, binary or parallel settings differ from previous run.\n"
2322             "Continuation is exact, but not guaranteed to be binary identical.\n";
2323
2324         if (majorVersionDiffers)
2325         {
2326             if (fplog)
2327             {
2328                 fprintf(fplog, "%s\n", msg_major_version_difference);
2329             }
2330         }
2331         else if (reproducibilityRequested)
2332         {
2333             /* Major & minor versions match at least, but something is different. */
2334             if (fplog)
2335             {
2336                 fprintf(fplog, "%s\n", msg_mismatch_notice);
2337             }
2338         }
2339     }
2340 }
2341
2342 static void read_checkpoint(const char *fn, t_fileio *logfio,
2343                             const t_commrec *cr,
2344                             const ivec dd_nc,
2345                             int eIntegrator,
2346                             int *init_fep_state,
2347                             CheckpointHeaderContents *headerContents,
2348                             t_state *state,
2349                             ObservablesHistory *observablesHistory,
2350                             gmx_bool reproducibilityRequested)
2351 {
2352     t_fileio            *fp;
2353     char                 buf[STEPSTRSIZE];
2354     int                  ret;
2355
2356     fp = gmx_fio_open(fn, "r");
2357     do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, headerContents);
2358
2359     // If we are appending, then we don't want write to the open log
2360     // file because we still need to compute a checksum for it. In
2361     // that case, the filehandle will be nullptr. Otherwise, we report
2362     // to the new log file about the checkpoint file that we are
2363     // reading from.
2364     FILE *fplog = gmx_fio_getfp(logfio);
2365     if (fplog)
2366     {
2367         fprintf(fplog, "\n");
2368         fprintf(fplog, "Reading checkpoint file %s\n", fn);
2369         fprintf(fplog, "  file generated by:     %s\n", headerContents->fprog);
2370         fprintf(fplog, "  file generated at:     %s\n", headerContents->ftime);
2371         fprintf(fplog, "  GROMACS double prec.:  %d\n", headerContents->double_prec);
2372         fprintf(fplog, "  simulation part #:     %d\n", headerContents->simulation_part);
2373         fprintf(fplog, "  step:                  %s\n", gmx_step_str(headerContents->step, buf));
2374         fprintf(fplog, "  time:                  %f\n", headerContents->t);
2375         fprintf(fplog, "\n");
2376     }
2377
2378     if (headerContents->natoms != state->natoms)
2379     {
2380         gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", headerContents->natoms, state->natoms);
2381     }
2382     if (headerContents->ngtc != state->ngtc)
2383     {
2384         gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", headerContents->ngtc, state->ngtc);
2385     }
2386     if (headerContents->nnhpres != state->nnhpres)
2387     {
2388         gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", headerContents->nnhpres, state->nnhpres);
2389     }
2390
2391     int nlambdaHistory = (state->dfhist ? state->dfhist->nlambda : 0);
2392     if (headerContents->nlambda != nlambdaHistory)
2393     {
2394         gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", headerContents->nlambda, nlambdaHistory);
2395     }
2396
2397     init_gtc_state(state, state->ngtc, state->nnhpres, headerContents->nhchainlength); /* need to keep this here to keep the tpr format working */
2398     /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
2399
2400     if (headerContents->eIntegrator != eIntegrator)
2401     {
2402         gmx_fatal(FARGS, "Cannot change integrator during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2403     }
2404
2405     if (headerContents->flags_state != state->flags)
2406     {
2407         gmx_fatal(FARGS, "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you should make a new .tpr with grompp -f new.mdp -t %s", fn);
2408     }
2409
2410     if (MASTER(cr))
2411     {
2412         check_match(fplog, cr, dd_nc, *headerContents,
2413                     reproducibilityRequested);
2414     }
2415
2416     ret             = do_cpt_state(gmx_fio_getxdr(fp), headerContents->flags_state, state, nullptr);
2417     *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
2418                                             Investigate for 5.0. */
2419     if (ret)
2420     {
2421         cp_error();
2422     }
2423     ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents->flags_eks, &state->ekinstate, nullptr);
2424     if (ret)
2425     {
2426         cp_error();
2427     }
2428     state->ekinstate.hasReadEkinState = (((headerContents->flags_eks & (1<<eeksEKINH)) != 0) ||
2429                                          ((headerContents->flags_eks & (1<<eeksEKINF)) != 0) ||
2430                                          ((headerContents->flags_eks & (1<<eeksEKINO)) != 0) ||
2431                                          (((headerContents->flags_eks & (1<<eeksEKINSCALEF)) |
2432                                            (headerContents->flags_eks & (1<<eeksEKINSCALEH)) |
2433                                            (headerContents->flags_eks & (1<<eeksVSCALE))) != 0));
2434
2435     if (headerContents->flags_enh && observablesHistory->energyHistory == nullptr)
2436     {
2437         observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
2438     }
2439     ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2440                           headerContents->flags_enh, observablesHistory->energyHistory.get(), nullptr);
2441     if (ret)
2442     {
2443         cp_error();
2444     }
2445
2446     if (headerContents->flagsPullHistory)
2447     {
2448         if (observablesHistory->pullHistory == nullptr)
2449         {
2450             observablesHistory->pullHistory = std::make_unique<PullHistory>();
2451         }
2452         ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2453                             headerContents->flagsPullHistory, observablesHistory->pullHistory.get(), StatePart::pullHistory, nullptr);
2454         if (ret)
2455         {
2456             cp_error();
2457         }
2458     }
2459
2460     if (headerContents->file_version < 6)
2461     {
2462         gmx_fatal(FARGS, "Continuing from checkpoint files written before GROMACS 4.5 is not supported");
2463     }
2464
2465     ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents->flags_dfh, headerContents->nlambda, &state->dfhist, nullptr);
2466     if (ret)
2467     {
2468         cp_error();
2469     }
2470
2471     if (headerContents->nED > 0 && observablesHistory->edsamHistory == nullptr)
2472     {
2473         observablesHistory->edsamHistory = std::make_unique<edsamhistory_t>(edsamhistory_t {});
2474     }
2475     ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents->nED, observablesHistory->edsamHistory.get(), nullptr);
2476     if (ret)
2477     {
2478         cp_error();
2479     }
2480
2481     if (headerContents->flags_awhh != 0 && state->awhHistory == nullptr)
2482     {
2483         state->awhHistory = std::make_shared<gmx::AwhHistory>();
2484     }
2485     ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2486                      headerContents->flags_awhh, state->awhHistory.get(), nullptr);
2487     if (ret)
2488     {
2489         cp_error();
2490     }
2491
2492     if (headerContents->eSwapCoords != eswapNO && observablesHistory->swapHistory == nullptr)
2493     {
2494         observablesHistory->swapHistory = std::make_unique<swaphistory_t>(swaphistory_t {});
2495     }
2496     ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents->eSwapCoords, observablesHistory->swapHistory.get(), nullptr);
2497     if (ret)
2498     {
2499         cp_error();
2500     }
2501
2502     std::vector<gmx_file_position_t> outputfiles;
2503     ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, nullptr, headerContents->file_version);
2504     if (ret)
2505     {
2506         cp_error();
2507     }
2508
2509     ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
2510     if (ret)
2511     {
2512         cp_error();
2513     }
2514     if (gmx_fio_close(fp) != 0)
2515     {
2516         gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2517     }
2518 }
2519
2520
2521 void load_checkpoint(const char *fn, t_fileio *logfio,
2522                      const t_commrec *cr, const ivec dd_nc,
2523                      t_inputrec *ir, t_state *state,
2524                      ObservablesHistory *observablesHistory,
2525                      gmx_bool reproducibilityRequested)
2526 {
2527     CheckpointHeaderContents headerContents;
2528     if (SIMMASTER(cr))
2529     {
2530         /* Read the state from the checkpoint file */
2531         read_checkpoint(fn, logfio,
2532                         cr, dd_nc,
2533                         ir->eI, &(ir->fepvals->init_fep_state),
2534                         &headerContents,
2535                         state, observablesHistory,
2536                         reproducibilityRequested);
2537     }
2538     if (PAR(cr))
2539     {
2540         gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
2541     }
2542     ir->bContinuation    = TRUE;
2543     // TODO Should the following condition be <=? Currently if you
2544     // pass a checkpoint written by an normal completion to a restart,
2545     // mdrun will read all input, does some work but no steps, and
2546     // write successful output. But perhaps that is not desirable.
2547     if ((ir->nsteps >= 0) && (ir->nsteps < headerContents.step))
2548     {
2549         // Note that we do not intend to support the use of mdrun
2550         // -nsteps to circumvent this condition.
2551         char nstepsString[STEPSTRSIZE], stepString[STEPSTRSIZE];
2552         gmx_step_str(ir->nsteps, nstepsString);
2553         gmx_step_str(headerContents.step, stepString);
2554         gmx_fatal(FARGS, "The input requested %s steps, however the checkpoint "
2555                   "file has already reached step %s. The simulation will not "
2556                   "proceed, because either your simulation is already complete, "
2557                   "or your combination of input files don't match.",
2558                   nstepsString, stepString);
2559     }
2560     if (ir->nsteps >= 0)
2561     {
2562         ir->nsteps          += ir->init_step - headerContents.step;
2563     }
2564     ir->init_step        = headerContents.step;
2565     ir->simulation_part  = headerContents.simulation_part + 1;
2566 }
2567
2568 void read_checkpoint_part_and_step(const char  *filename,
2569                                    int         *simulation_part,
2570                                    int64_t     *step)
2571 {
2572     t_fileio *fp;
2573
2574     if (filename == nullptr ||
2575         !gmx_fexist(filename) ||
2576         ((fp = gmx_fio_open(filename, "r")) == nullptr))
2577     {
2578         *simulation_part = 0;
2579         *step            = 0;
2580         return;
2581     }
2582
2583     CheckpointHeaderContents headerContents;
2584     do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2585     gmx_fio_close(fp);
2586     *simulation_part = headerContents.simulation_part;
2587     *step            = headerContents.step;
2588 }
2589
2590 static CheckpointHeaderContents
2591 read_checkpoint_data(t_fileio                         *fp,
2592                      t_state                          *state,
2593                      std::vector<gmx_file_position_t> *outputfiles)
2594 {
2595     CheckpointHeaderContents headerContents;
2596     do_cpt_header(gmx_fio_getxdr(fp), TRUE, nullptr, &headerContents);
2597     state->natoms        = headerContents.natoms;
2598     state->ngtc          = headerContents.ngtc;
2599     state->nnhpres       = headerContents.nnhpres;
2600     state->nhchainlength = headerContents.nhchainlength;
2601     state->flags         = headerContents.flags_state;
2602     int ret              =
2603         do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr);
2604     if (ret)
2605     {
2606         cp_error();
2607     }
2608     ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr);
2609     if (ret)
2610     {
2611         cp_error();
2612     }
2613
2614     energyhistory_t enerhist;
2615     ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2616                           headerContents.flags_enh, &enerhist, nullptr);
2617     if (ret)
2618     {
2619         cp_error();
2620     }
2621     PullHistory pullHist = {};
2622     ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2623                         headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, nullptr);
2624     if (ret)
2625     {
2626         cp_error();
2627     }
2628
2629     ret = do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda, &state->dfhist, nullptr);
2630     if (ret)
2631     {
2632         cp_error();
2633     }
2634
2635     edsamhistory_t edsamhist = {};
2636     ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, nullptr);
2637     if (ret)
2638     {
2639         cp_error();
2640     }
2641
2642     ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2643                      headerContents.flags_awhh, state->awhHistory.get(), nullptr);
2644     if (ret)
2645     {
2646         cp_error();
2647     }
2648
2649     swaphistory_t swaphist = {};
2650     ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, nullptr);
2651     if (ret)
2652     {
2653         cp_error();
2654     }
2655
2656     ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2657                        outputfiles,
2658                        nullptr, headerContents.file_version);
2659
2660     if (ret)
2661     {
2662         cp_error();
2663     }
2664
2665     ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2666     if (ret)
2667     {
2668         cp_error();
2669     }
2670     return headerContents;
2671 }
2672
2673 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2674 {
2675     t_state                          state;
2676     std::vector<gmx_file_position_t> outputfiles;
2677     CheckpointHeaderContents         headerContents =
2678         read_checkpoint_data(fp, &state, &outputfiles);
2679
2680     fr->natoms  = state.natoms;
2681     fr->bStep   = TRUE;
2682     fr->step    = int64_to_int(headerContents.step,
2683                                "conversion of checkpoint to trajectory");
2684     fr->bTime      = TRUE;
2685     fr->time       = headerContents.t;
2686     fr->bLambda    = TRUE;
2687     fr->lambda     = state.lambda[efptFEP];
2688     fr->fep_state  = state.fep_state;
2689     fr->bAtoms     = FALSE;
2690     fr->bX         = ((state.flags & (1<<estX)) != 0);
2691     if (fr->bX)
2692     {
2693         fr->x   = makeRvecArray(state.x, state.natoms);
2694     }
2695     fr->bV      = ((state.flags & (1<<estV)) != 0);
2696     if (fr->bV)
2697     {
2698         fr->v   = makeRvecArray(state.v, state.natoms);
2699     }
2700     fr->bF      = FALSE;
2701     fr->bBox    = ((state.flags & (1<<estBOX)) != 0);
2702     if (fr->bBox)
2703     {
2704         copy_mat(state.box, fr->box);
2705     }
2706 }
2707
2708 void list_checkpoint(const char *fn, FILE *out)
2709 {
2710     t_fileio            *fp;
2711     int                  ret;
2712
2713     t_state              state;
2714
2715     fp = gmx_fio_open(fn, "r");
2716     CheckpointHeaderContents headerContents;
2717     do_cpt_header(gmx_fio_getxdr(fp), TRUE, out, &headerContents);
2718     state.natoms        = headerContents.natoms;
2719     state.ngtc          = headerContents.ngtc;
2720     state.nnhpres       = headerContents.nnhpres;
2721     state.nhchainlength = headerContents.nhchainlength;
2722     state.flags         = headerContents.flags_state;
2723     ret                 = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, out);
2724     if (ret)
2725     {
2726         cp_error();
2727     }
2728     ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state.ekinstate, out);
2729     if (ret)
2730     {
2731         cp_error();
2732     }
2733
2734     energyhistory_t enerhist;
2735     ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2736                           headerContents.flags_enh, &enerhist, out);
2737
2738     if (ret == 0)
2739     {
2740         PullHistory pullHist = {};
2741         ret = doCptPullHist(gmx_fio_getxdr(fp), TRUE,
2742                             headerContents.flagsPullHistory, &pullHist, StatePart::pullHistory, out);
2743     }
2744
2745     if (ret == 0)
2746     {
2747         ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2748                              headerContents.flags_dfh, headerContents.nlambda, &state.dfhist, out);
2749     }
2750
2751     if (ret == 0)
2752     {
2753         edsamhistory_t edsamhist = {};
2754         ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, headerContents.nED, &edsamhist, out);
2755     }
2756
2757     if (ret == 0)
2758     {
2759         ret = do_cpt_awh(gmx_fio_getxdr(fp), TRUE,
2760                          headerContents.flags_awhh, state.awhHistory.get(), out);
2761     }
2762
2763     if (ret == 0)
2764     {
2765         swaphistory_t swaphist = {};
2766         ret = do_cpt_swapstate(gmx_fio_getxdr(fp), TRUE, headerContents.eSwapCoords, &swaphist, out);
2767     }
2768
2769     if (ret == 0)
2770     {
2771         std::vector<gmx_file_position_t> outputfiles;
2772         ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, out, headerContents.file_version);
2773     }
2774
2775     if (ret == 0)
2776     {
2777         ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
2778     }
2779
2780     if (ret)
2781     {
2782         cp_warning(out);
2783     }
2784     if (gmx_fio_close(fp) != 0)
2785     {
2786         gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2787     }
2788 }
2789
2790 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2791 CheckpointHeaderContents
2792 read_checkpoint_simulation_part_and_filenames(t_fileio                         *fp,
2793                                               std::vector<gmx_file_position_t> *outputfiles)
2794 {
2795     t_state                  state;
2796     CheckpointHeaderContents headerContents =
2797         read_checkpoint_data(fp, &state, outputfiles);
2798     if (gmx_fio_close(fp) != 0)
2799     {
2800         gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2801     }
2802     return headerContents;
2803 }