1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
61 #include "buildinfo.h"
68 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
70 gmx_ctime_r(const time_t *clock, char *buf, int n);
73 #define CPT_MAGIC1 171817
74 #define CPT_MAGIC2 171819
75 #define CPTSTRLEN 1024
78 #define GMX_CPT_BUILD_DP 1
80 #define GMX_CPT_BUILD_DP 0
83 /* cpt_version should normally only be changed
84 * when the header of footer format changes.
85 * The state data format itself is backward and forward compatible.
86 * But old code can not read a new entry that is present in the file
87 * (but can read a new format when new entries are not present).
89 static const int cpt_version = 15;
92 const char *est_names[estNR] =
95 "box", "box-rel", "box-v", "pres_prev",
96 "nosehoover-xi", "thermostat-integral",
97 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
98 "disre_initf", "disre_rm3tav",
99 "orire_initf", "orire_Dtav",
100 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
104 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
107 const char *eeks_names[eeksNR] =
109 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
110 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
114 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
115 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
116 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
117 eenhENERGY_DELTA_H_NN,
118 eenhENERGY_DELTA_H_LIST,
119 eenhENERGY_DELTA_H_STARTTIME,
120 eenhENERGY_DELTA_H_STARTLAMBDA,
124 const char *eenh_names[eenhNR] =
126 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
127 "energy_sum_sim", "energy_nsum_sim",
128 "energy_nsteps", "energy_nsteps_sim",
130 "energy_delta_h_list",
131 "energy_delta_h_start_time",
132 "energy_delta_h_start_lambda"
135 /* free energy history variables -- need to be preserved over checkpoint */
137 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
138 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
140 /* free energy history variable names */
141 const char *edfh_names[edfhNR] =
143 "bEquilibrated", "N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar", "variance",
144 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
147 #ifdef GMX_NATIVE_WINDOWS
149 gmx_wintruncate(const char *filename, __int64 size)
152 /*we do this elsewhere*/
158 fp = fopen(filename, "rb+");
165 return _chsize_s( fileno(fp), size);
172 ecprREAL, ecprRVEC, ecprMATRIX
176 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
178 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
179 cptpEST - state variables.
180 cptpEEKS - Kinetic energy state variables.
181 cptpEENH - Energy history state variables.
182 cptpEDFH - free energy history variables.
186 static const char *st_names(int cptp, int ecpt)
190 case cptpEST: return est_names [ecpt]; break;
191 case cptpEEKS: return eeks_names[ecpt]; break;
192 case cptpEENH: return eenh_names[ecpt]; break;
193 case cptpEDFH: return edfh_names[ecpt]; break;
199 static void cp_warning(FILE *fp)
201 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
204 static void cp_error()
206 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
209 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
217 res = xdr_string(xd, s, CPTSTRLEN);
224 fprintf(list, "%s = %s\n", desc, *s);
229 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
233 res = xdr_int(xd, i);
240 fprintf(list, "%s = %d\n", desc, *i);
245 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
251 fprintf(list, "%s = ", desc);
253 for (j = 0; j < n && res; j++)
255 res &= xdr_u_char(xd, &i[j]);
258 fprintf(list, "%02x", i[j]);
273 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
275 if (do_cpt_int(xd, desc, i, list) < 0)
281 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_large_int_t *i, FILE *list)
284 char buf[STEPSTRSIZE];
286 res = xdr_gmx_large_int(xd, i);
293 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
297 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
301 res = xdr_double(xd, f);
308 fprintf(list, "%s = %f\n", desc, *f);
312 static void do_cpt_real_err(XDR *xd, real *f)
317 res = xdr_double(xd, f);
319 res = xdr_float(xd, f);
327 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
331 for (i = 0; i < n; i++)
333 for (j = 0; j < DIM; j++)
335 do_cpt_real_err(xd, &f[i][j]);
341 pr_rvecs(list, 0, desc, f, n);
345 /* If nval >= 0, nval is used; on read this should match the passed value.
346 * If nval n<0, *nptr is used; on read the value is stored in nptr
348 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
349 int nval, int *nptr, real **v,
350 FILE *list, int erealtype)
354 int dtc = xdr_datatype_float;
356 int dtc = xdr_datatype_double;
358 real *vp, *va = NULL;
373 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
378 res = xdr_int(xd, &nf);
389 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
398 res = xdr_int(xd, &dt);
405 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
406 st_names(cptp, ecpt), xdr_datatype_names[dtc],
407 xdr_datatype_names[dt]);
409 if (list || !(sflags & (1<<ecpt)))
422 if (dt == xdr_datatype_float)
424 if (dtc == xdr_datatype_float)
432 res = xdr_vector(xd, (char *)vf, nf,
433 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
438 if (dtc != xdr_datatype_float)
440 for (i = 0; i < nf; i++)
449 if (dtc == xdr_datatype_double)
457 res = xdr_vector(xd, (char *)vd, nf,
458 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
463 if (dtc != xdr_datatype_double)
465 for (i = 0; i < nf; i++)
478 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
481 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
484 gmx_incons("Unknown checkpoint real type");
496 /* This function stores n along with the reals for reading,
497 * but on reading it assumes that n matches the value in the checkpoint file,
498 * a fatal error is generated when this is not the case.
500 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
501 int n, real **v, FILE *list)
503 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
506 /* This function does the same as do_cpte_reals,
507 * except that on reading it ignores the passed value of *n
508 * and stored the value read from the checkpoint file in *n.
510 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
511 int *n, real **v, FILE *list)
513 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
516 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
521 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
524 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
525 int n, int **v, FILE *list)
528 int dtc = xdr_datatype_int;
533 res = xdr_int(xd, &nf);
538 if (list == NULL && v != NULL && nf != n)
540 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
543 res = xdr_int(xd, &dt);
550 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
551 st_names(cptp, ecpt), xdr_datatype_names[dtc],
552 xdr_datatype_names[dt]);
554 if (list || !(sflags & (1<<ecpt)) || v == NULL)
567 res = xdr_vector(xd, (char *)vp, nf,
568 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
575 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
585 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
588 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
591 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
592 int n, double **v, FILE *list)
595 int dtc = xdr_datatype_double;
596 double *vp, *va = NULL;
600 res = xdr_int(xd, &nf);
605 if (list == NULL && nf != n)
607 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
610 res = xdr_int(xd, &dt);
617 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
618 st_names(cptp, ecpt), xdr_datatype_names[dtc],
619 xdr_datatype_names[dt]);
621 if (list || !(sflags & (1<<ecpt)))
634 res = xdr_vector(xd, (char *)vp, nf,
635 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
642 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
652 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
653 double *r, FILE *list)
655 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
659 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
660 int n, rvec **v, FILE *list)
664 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
665 n*DIM, NULL, (real **)v, list, ecprRVEC);
668 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
669 matrix v, FILE *list)
674 vr = (real *)&(v[0][0]);
675 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
676 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
678 if (list && ret == 0)
680 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
687 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
688 int n, real **v, FILE *list)
693 char name[CPTSTRLEN];
700 for (i = 0; i < n; i++)
704 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
705 if (list && reti == 0)
707 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
708 pr_reals(list, 0, name, v[i], n);
718 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
719 int n, matrix **v, FILE *list)
722 matrix *vp, *va = NULL;
728 res = xdr_int(xd, &nf);
733 if (list == NULL && nf != n)
735 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
737 if (list || !(sflags & (1<<ecpt)))
750 snew(vr, nf*DIM*DIM);
751 for (i = 0; i < nf; i++)
753 for (j = 0; j < DIM; j++)
755 for (k = 0; k < DIM; k++)
757 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
761 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
762 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
763 for (i = 0; i < nf; i++)
765 for (j = 0; j < DIM; j++)
767 for (k = 0; k < DIM; k++)
769 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
775 if (list && ret == 0)
777 for (i = 0; i < nf; i++)
779 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
790 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
791 char **version, char **btime, char **buser, char **bhost,
793 char **fprog, char **ftime,
794 int *eIntegrator, int *simulation_part,
795 gmx_large_int_t *step, double *t,
796 int *nnodes, int *dd_nc, int *npme,
797 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
798 int *nlambda, int *flags_state,
799 int *flags_eks, int *flags_enh, int *flags_dfh,
817 res = xdr_int(xd, &magic);
820 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
822 if (magic != CPT_MAGIC1)
824 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
825 "The checkpoint file is corrupted or not a checkpoint file",
832 if (gethostname(fhost, 255) != 0)
834 sprintf(fhost, "unknown");
837 sprintf(fhost, "unknown");
840 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
841 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
842 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
843 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
844 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
845 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
846 *file_version = cpt_version;
847 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
848 if (*file_version > cpt_version)
850 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
852 if (*file_version >= 13)
854 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
860 if (*file_version >= 12)
862 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
868 do_cpt_int_err(xd, "#atoms", natoms, list);
869 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
870 if (*file_version >= 10)
872 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
878 if (*file_version >= 11)
880 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
886 if (*file_version >= 14)
888 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
894 do_cpt_int_err(xd, "integrator", eIntegrator, list);
895 if (*file_version >= 3)
897 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
901 *simulation_part = 1;
903 if (*file_version >= 5)
905 do_cpt_step_err(xd, "step", step, list);
909 do_cpt_int_err(xd, "step", &idum, list);
912 do_cpt_double_err(xd, "t", t, list);
913 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
915 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
916 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
917 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
918 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
919 do_cpt_int_err(xd, "state flags", flags_state, list);
920 if (*file_version >= 4)
922 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
923 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
928 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
929 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
930 (1<<(estORIRE_DTAV+2)) |
931 (1<<(estORIRE_DTAV+3))));
933 if (*file_version >= 14)
935 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
942 if (*file_version >= 15)
944 do_cpt_int_err(xd, "ED data sets", nED, list);
952 static int do_cpt_footer(XDR *xd, int file_version)
957 if (file_version >= 2)
960 res = xdr_int(xd, &magic);
965 if (magic != CPT_MAGIC2)
974 static int do_cpt_state(XDR *xd, int fflags, t_state *state,
975 gmx_bool bReadRNG, FILE *list)
978 int **rng_p, **rngi_p;
985 nnht = state->nhchainlength*state->ngtc;
986 nnhtp = state->nhchainlength*state->nnhpres;
990 rng_p = (int **)&state->ld_rng;
991 rngi_p = &state->ld_rngi;
995 /* Do not read the RNG data */
999 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1001 sflags = state->flags;
1002 for (i = 0; (i < estNR && ret == 0); i++)
1004 if (fflags & (1<<i))
1008 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1009 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1010 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1011 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1012 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1013 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1014 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1015 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1016 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1017 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1018 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1019 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1020 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1021 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1022 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1023 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1024 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1025 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1026 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1027 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1028 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1029 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1030 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1031 case estDISRE_RM3TAV: ret = do_cpte_reals(xd, cptpEST, i, sflags, state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1032 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1033 case estORIRE_DTAV: ret = do_cpte_reals(xd, cptpEST, i, sflags, state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1035 gmx_fatal(FARGS, "Unknown state entry %d\n"
1036 "You are probably reading a new checkpoint file with old code", i);
1044 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1052 for (i = 0; (i < eeksNR && ret == 0); i++)
1054 if (fflags & (1<<i))
1059 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1060 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1061 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1062 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1063 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1064 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1065 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1066 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1067 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1068 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1070 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1071 "You are probably reading a new checkpoint file with old code", i);
1080 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1081 int fflags, energyhistory_t *enerhist,
1092 enerhist->nsteps = 0;
1094 enerhist->nsteps_sim = 0;
1095 enerhist->nsum_sim = 0;
1096 enerhist->dht = NULL;
1098 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1100 snew(enerhist->dht, 1);
1101 enerhist->dht->ndh = NULL;
1102 enerhist->dht->dh = NULL;
1103 enerhist->dht->start_lambda_set = FALSE;
1107 for (i = 0; (i < eenhNR && ret == 0); i++)
1109 if (fflags & (1<<i))
1113 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1114 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1115 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1116 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1117 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1118 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1119 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1120 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1121 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1122 if (bRead) /* now allocate memory for it */
1124 snew(enerhist->dht->dh, enerhist->dht->nndh);
1125 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1126 for (j = 0; j < enerhist->dht->nndh; j++)
1128 enerhist->dht->ndh[j] = 0;
1129 enerhist->dht->dh[j] = NULL;
1133 case eenhENERGY_DELTA_H_LIST:
1134 for (j = 0; j < enerhist->dht->nndh; j++)
1136 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1139 case eenhENERGY_DELTA_H_STARTTIME:
1140 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1141 case eenhENERGY_DELTA_H_STARTLAMBDA:
1142 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1144 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1145 "You are probably reading a new checkpoint file with old code", i);
1150 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1152 /* Assume we have an old file format and copy sum to sum_sim */
1153 srenew(enerhist->ener_sum_sim, enerhist->nener);
1154 for (i = 0; i < enerhist->nener; i++)
1156 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1158 fflags |= (1<<eenhENERGY_SUM_SIM);
1161 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1162 !(fflags & (1<<eenhENERGY_NSTEPS)))
1164 /* Assume we have an old file format and copy nsum to nsteps */
1165 enerhist->nsteps = enerhist->nsum;
1166 fflags |= (1<<eenhENERGY_NSTEPS);
1168 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1169 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1171 /* Assume we have an old file format and copy nsum to nsteps */
1172 enerhist->nsteps_sim = enerhist->nsum_sim;
1173 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1179 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1184 nlambda = dfhist->nlambda;
1187 for (i = 0; (i < edfhNR && ret == 0); i++)
1189 if (fflags & (1<<i))
1193 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1194 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1195 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1196 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1197 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1198 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1199 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1200 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1201 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1202 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1203 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1204 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1205 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1206 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1209 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1210 "You are probably reading a new checkpoint file with old code", i);
1219 /* This function stores the last whole configuration of the reference and
1220 * average structure in the .cpt file
1222 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1223 edsamstate_t *EDstate, FILE *list)
1230 EDstate->bFromCpt = bRead;
1232 if (EDstate->nED <= 0)
1237 /* When reading, init_edsam has not been called yet,
1238 * so we have to allocate memory first. */
1241 snew(EDstate->nref, EDstate->nED);
1242 snew(EDstate->old_sref, EDstate->nED);
1243 snew(EDstate->nav, EDstate->nED);
1244 snew(EDstate->old_sav, EDstate->nED);
1247 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1248 for (i = 0; i < EDstate->nED; i++)
1250 /* Reference structure SREF */
1251 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1252 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1253 sprintf(buf, "ED%d x_ref", i+1);
1256 snew(EDstate->old_sref[i], EDstate->nref[i]);
1257 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1261 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1264 /* Average structure SAV */
1265 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1266 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1267 sprintf(buf, "ED%d x_av", i+1);
1270 snew(EDstate->old_sav[i], EDstate->nav[i]);
1271 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1275 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1283 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1284 gmx_file_position_t **p_outputfiles, int *nfiles,
1285 FILE *list, int file_version)
1289 gmx_off_t mask = 0xFFFFFFFFL;
1290 int offset_high, offset_low;
1292 gmx_file_position_t *outputfiles;
1294 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1301 snew(*p_outputfiles, *nfiles);
1304 outputfiles = *p_outputfiles;
1306 for (i = 0; i < *nfiles; i++)
1308 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1311 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1312 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1318 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1322 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1326 #if (SIZEOF_GMX_OFF_T > 4)
1327 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1329 outputfiles[i].offset = offset_low;
1334 buf = outputfiles[i].filename;
1335 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1337 offset = outputfiles[i].offset;
1345 #if (SIZEOF_GMX_OFF_T > 4)
1346 offset_low = (int) (offset & mask);
1347 offset_high = (int) ((offset >> 32) & mask);
1349 offset_low = offset;
1353 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1357 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1362 if (file_version >= 8)
1364 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1369 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1376 outputfiles[i].chksum_size = -1;
1383 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1384 FILE *fplog, t_commrec *cr,
1385 int eIntegrator, int simulation_part,
1386 gmx_bool bExpanded, int elamstats,
1387 gmx_large_int_t step, double t, t_state *state)
1397 char *fntemp; /* the temporary checkpoint file name */
1399 char timebuf[STRLEN];
1400 int nppnodes, npmenodes, flag_64bit;
1401 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1402 gmx_file_position_t *outputfiles;
1405 int flags_eks, flags_enh, flags_dfh, i;
1410 if (DOMAINDECOMP(cr))
1412 nppnodes = cr->dd->nnodes;
1413 npmenodes = cr->npmenodes;
1417 nppnodes = cr->nnodes;
1427 /* make the new temporary filename */
1428 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1430 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1431 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1432 strcat(fntemp, suffix);
1433 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1436 gmx_ctime_r(&now, timebuf, STRLEN);
1440 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1441 gmx_step_str(step, buf), timebuf);
1444 /* Get offsets for open files */
1445 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1447 fp = gmx_fio_open(fntemp, "w");
1449 if (state->ekinstate.bUpToDate)
1452 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1453 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1454 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1462 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1464 flags_enh |= (1<<eenhENERGY_N);
1465 if (state->enerhist.nsum > 0)
1467 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1468 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1470 if (state->enerhist.nsum_sim > 0)
1472 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1473 (1<<eenhENERGY_NSUM_SIM));
1475 if (state->enerhist.dht)
1477 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1478 (1<< eenhENERGY_DELTA_H_LIST) |
1479 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1480 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1486 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1487 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1490 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1492 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1494 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1495 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1503 /* We can check many more things now (CPU, acceleration, etc), but
1504 * it is highly unlikely to have two separate builds with exactly
1505 * the same version, user, time, and build host!
1508 version = gmx_strdup(VERSION);
1509 btime = gmx_strdup(BUILD_TIME);
1510 buser = gmx_strdup(BUILD_USER);
1511 bhost = gmx_strdup(BUILD_HOST);
1513 double_prec = GMX_CPT_BUILD_DP;
1514 fprog = gmx_strdup(Program());
1516 ftime = &(timebuf[0]);
1518 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1519 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1520 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1521 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1522 &state->natoms, &state->ngtc, &state->nnhpres,
1523 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1524 &state->edsamstate.nED,
1533 if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, TRUE, NULL) < 0) ||
1534 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1535 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1536 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1537 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1538 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1541 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1544 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1546 /* we really, REALLY, want to make sure to physically write the checkpoint,
1547 and all the files it depends on, out to disk. Because we've
1548 opened the checkpoint with gmx_fio_open(), it's in our list
1550 ret = gmx_fio_all_output_fsync();
1556 "Cannot fsync '%s'; maybe you are out of disk space?",
1557 gmx_fio_getname(ret));
1559 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1569 if (gmx_fio_close(fp) != 0)
1571 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1574 /* we don't move the checkpoint if the user specified they didn't want it,
1575 or if the fsyncs failed */
1576 if (!bNumberAndKeep && !ret)
1580 /* Rename the previous checkpoint file */
1582 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1583 strcat(buf, "_prev");
1584 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1586 /* we copy here so that if something goes wrong between now and
1587 * the rename below, there's always a state.cpt.
1588 * If renames are atomic (such as in POSIX systems),
1589 * this copying should be unneccesary.
1591 gmx_file_copy(fn, buf, FALSE);
1592 /* We don't really care if this fails:
1593 * there's already a new checkpoint.
1596 gmx_file_rename(fn, buf);
1599 if (gmx_file_rename(fntemp, fn) != 0)
1601 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1609 /*code for alternate checkpointing scheme. moved from top of loop over
1611 fcRequestCheckPoint();
1612 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1614 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1616 #endif /* end GMX_FAHCORE block */
1619 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1623 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1624 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1625 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1626 for (i = 0; i < estNR; i++)
1628 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1630 fprintf(fplog, " %24s %11s %11s\n",
1632 (sflags & (1<<i)) ? " present " : "not present",
1633 (fflags & (1<<i)) ? " present " : "not present");
1638 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1640 FILE *fp = fplog ? fplog : stderr;
1644 fprintf(fp, " %s mismatch,\n", type);
1645 fprintf(fp, " current program: %d\n", p);
1646 fprintf(fp, " checkpoint file: %d\n", f);
1652 static void check_string(FILE *fplog, const char *type, const char *p,
1653 const char *f, gmx_bool *mm)
1655 FILE *fp = fplog ? fplog : stderr;
1657 if (strcmp(p, f) != 0)
1659 fprintf(fp, " %s mismatch,\n", type);
1660 fprintf(fp, " current program: %s\n", p);
1661 fprintf(fp, " checkpoint file: %s\n", f);
1667 static void check_match(FILE *fplog,
1669 char *btime, char *buser, char *bhost, int double_prec,
1671 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1672 ivec dd_nc, ivec dd_nc_f)
1679 check_string(fplog, "Version", VERSION, version, &mm);
1680 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1681 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1682 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1683 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1684 check_string(fplog, "Program name", Program(), fprog, &mm);
1686 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1695 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1698 if (cr->npmenodes >= 0)
1700 npp -= cr->npmenodes;
1704 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1705 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1706 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1713 "Gromacs binary or parallel settings not identical to previous run.\n"
1714 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1715 fplog ? ",\n see the log file for details" : "");
1720 "Gromacs binary or parallel settings not identical to previous run.\n"
1721 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1726 static void read_checkpoint(const char *fn, FILE **pfplog,
1727 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1728 int eIntegrator, int *init_fep_state, gmx_large_int_t *step, double *t,
1729 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1730 int *simulation_part,
1731 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1736 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1738 char filename[STRLEN], buf[STEPSTRSIZE];
1739 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1741 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1744 gmx_file_position_t *outputfiles;
1746 t_fileio *chksum_file;
1747 FILE * fplog = *pfplog;
1748 unsigned char digest[16];
1749 #ifndef GMX_NATIVE_WINDOWS
1750 struct flock fl; /* don't initialize here: the struct order is OS
1754 const char *int_warn =
1755 "WARNING: The checkpoint file was generated with integrator %s,\n"
1756 " while the simulation uses integrator %s\n\n";
1757 const char *sd_note =
1758 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1759 " while the simulation uses %d SD or BD nodes,\n"
1760 " continuation will be exact, except for the random state\n\n";
1762 #ifndef GMX_NATIVE_WINDOWS
1763 fl.l_type = F_WRLCK;
1764 fl.l_whence = SEEK_SET;
1773 "read_checkpoint not (yet) supported with particle decomposition");
1776 fp = gmx_fio_open(fn, "r");
1777 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1778 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1779 &eIntegrator_f, simulation_part, step, t,
1780 &nppnodes_f, dd_nc_f, &npmenodes_f,
1781 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1782 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1783 &state->edsamstate.nED, NULL);
1785 if (bAppendOutputFiles &&
1786 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1788 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1791 if (cr == NULL || MASTER(cr))
1793 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1797 /* This will not be written if we do appending, since fplog is still NULL then */
1800 fprintf(fplog, "\n");
1801 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1802 fprintf(fplog, " file generated by: %s\n", fprog);
1803 fprintf(fplog, " file generated at: %s\n", ftime);
1804 fprintf(fplog, " GROMACS build time: %s\n", btime);
1805 fprintf(fplog, " GROMACS build user: %s\n", buser);
1806 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1807 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1808 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1809 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1810 fprintf(fplog, " time: %f\n", *t);
1811 fprintf(fplog, "\n");
1814 if (natoms != state->natoms)
1816 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1818 if (ngtc != state->ngtc)
1820 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1822 if (nnhpres != state->nnhpres)
1824 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1827 if (nlambda != state->dfhist.nlambda)
1829 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1832 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1833 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1835 if (eIntegrator_f != eIntegrator)
1839 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1841 if (bAppendOutputFiles)
1844 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1845 "Stopping the run to prevent you from ruining all your data...\n"
1846 "If you _really_ know what you are doing, try with the -noappend option.\n");
1850 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1859 else if (bPartDecomp)
1861 nppnodes = cr->nnodes;
1864 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1866 if (cr->npmenodes < 0)
1868 cr->npmenodes = npmenodes_f;
1870 nppnodes = cr->nnodes - cr->npmenodes;
1871 if (nppnodes == nppnodes_f)
1873 for (d = 0; d < DIM; d++)
1877 dd_nc[d] = dd_nc_f[d];
1884 /* The number of PP nodes has not been set yet */
1888 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1890 /* Correct the RNG state size for the number of PP nodes.
1891 * Such assignments should all be moved to one central function.
1893 state->nrng = nppnodes*gmx_rng_n();
1894 state->nrngi = nppnodes;
1898 if (fflags != state->flags)
1903 if (bAppendOutputFiles)
1906 "Output file appending requested, but input and checkpoint states are not identical.\n"
1907 "Stopping the run to prevent you from ruining all your data...\n"
1908 "You can try with the -noappend option, and get more info in the log file.\n");
1911 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1913 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1918 "WARNING: The checkpoint state entries do not match the simulation,\n"
1919 " see the log file for details\n\n");
1925 print_flag_mismatch(fplog, state->flags, fflags);
1930 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1931 nppnodes != nppnodes_f)
1936 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1940 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1945 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1946 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1949 ret = do_cpt_state(gmx_fio_getxdr(fp), fflags, state, *bReadRNG, NULL);
1950 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1951 Investigate for 5.0. */
1956 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1961 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1962 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1964 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1965 flags_enh, &state->enerhist, NULL);
1971 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1977 if (file_version < 6)
1979 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1981 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1984 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1986 state->enerhist.nsum = *step;
1987 state->enerhist.nsum_sim = *step;
1990 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
1996 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2002 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2007 if (gmx_fio_close(fp) != 0)
2009 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2018 /* If the user wants to append to output files,
2019 * we use the file pointer positions of the output files stored
2020 * in the checkpoint file and truncate the files such that any frames
2021 * written after the checkpoint time are removed.
2022 * All files are md5sum checked such that we can be sure that
2023 * we do not truncate other (maybe imprortant) files.
2025 if (bAppendOutputFiles)
2027 if (fn2ftp(outputfiles[0].filename) != efLOG)
2029 /* make sure first file is log file so that it is OK to use it for
2032 gmx_fatal(FARGS, "The first output file should always be the log "
2033 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2035 for (i = 0; i < nfiles; i++)
2037 if (outputfiles[i].offset < 0)
2039 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2040 "is larger than 2 GB, but mdrun did not support large file"
2041 " offsets. Can not append. Run mdrun with -noappend",
2042 outputfiles[i].filename);
2045 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2048 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2053 /* Note that there are systems where the lock operation
2054 * will succeed, but a second process can also lock the file.
2055 * We should probably try to detect this.
2057 #ifndef GMX_NATIVE_WINDOWS
2058 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2061 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2064 if (errno == ENOSYS)
2068 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2072 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2075 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2079 else if (errno == EACCES || errno == EAGAIN)
2081 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2082 "simulation?", outputfiles[i].filename);
2086 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2087 outputfiles[i].filename, strerror(errno));
2092 /* compute md5 chksum */
2093 if (outputfiles[i].chksum_size != -1)
2095 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2096 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2098 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2099 outputfiles[i].chksum_size,
2100 outputfiles[i].filename);
2103 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2105 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2107 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2112 if (i == 0) /*open log file here - so that lock is never lifted
2113 after chksum is calculated */
2115 *pfplog = gmx_fio_getfp(chksum_file);
2119 gmx_fio_close(chksum_file);
2122 /* compare md5 chksum */
2123 if (outputfiles[i].chksum_size != -1 &&
2124 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2128 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2129 for (j = 0; j < 16; j++)
2131 fprintf(debug, "%02x", digest[j]);
2133 fprintf(debug, "\n");
2135 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2136 outputfiles[i].filename);
2141 if (i != 0) /*log file is already seeked to correct position */
2143 #ifdef GMX_NATIVE_WINDOWS
2144 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2146 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2150 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2160 void load_checkpoint(const char *fn, FILE **fplog,
2161 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2162 t_inputrec *ir, t_state *state,
2163 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2164 gmx_bool bAppend, gmx_bool bForceAppend)
2166 gmx_large_int_t step;
2171 /* Read the state from the checkpoint file */
2172 read_checkpoint(fn, fplog,
2173 cr, bPartDecomp, dd_nc,
2174 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2175 &ir->simulation_part, bAppend, bForceAppend);
2179 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2180 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2181 gmx_bcast(sizeof(step), &step, cr);
2182 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2183 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2185 ir->bContinuation = TRUE;
2186 if (ir->nsteps >= 0)
2188 ir->nsteps += ir->init_step - step;
2190 ir->init_step = step;
2191 ir->simulation_part += 1;
2194 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2195 gmx_large_int_t *step, double *t, t_state *state,
2197 int *nfiles, gmx_file_position_t **outputfiles)
2200 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2205 int flags_eks, flags_enh, flags_dfh;
2207 gmx_file_position_t *files_loc = NULL;
2210 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2211 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2212 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2213 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2214 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2215 &state->edsamstate.nED, NULL);
2217 do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, bReadRNG, NULL);
2222 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2227 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2228 flags_enh, &state->enerhist, NULL);
2233 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2239 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2245 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2246 outputfiles != NULL ? outputfiles : &files_loc,
2247 outputfiles != NULL ? nfiles : &nfiles_loc,
2248 NULL, file_version);
2249 if (files_loc != NULL)
2259 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2273 read_checkpoint_state(const char *fn, int *simulation_part,
2274 gmx_large_int_t *step, double *t, t_state *state)
2278 fp = gmx_fio_open(fn, "r");
2279 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2280 if (gmx_fio_close(fp) != 0)
2282 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2286 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2289 int simulation_part;
2290 gmx_large_int_t step;
2293 init_state(&state, 0, 0, 0, 0, 0);
2295 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2297 fr->natoms = state.natoms;
2300 fr->step = gmx_large_int_to_int(step,
2301 "conversion of checkpoint to trajectory");
2305 fr->lambda = state.lambda[efptFEP];
2306 fr->fep_state = state.fep_state;
2308 fr->bX = (state.flags & (1<<estX));
2314 fr->bV = (state.flags & (1<<estV));
2321 fr->bBox = (state.flags & (1<<estBOX));
2324 copy_mat(state.box, fr->box);
2329 void list_checkpoint(const char *fn, FILE *out)
2333 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2335 int eIntegrator, simulation_part, nppnodes, npme;
2336 gmx_large_int_t step;
2340 int flags_eks, flags_enh, flags_dfh;
2344 gmx_file_position_t *outputfiles;
2347 init_state(&state, -1, -1, -1, -1, 0);
2349 fp = gmx_fio_open(fn, "r");
2350 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2351 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2352 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2353 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2354 &(state.dfhist.nlambda), &state.flags,
2355 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2356 ret = do_cpt_state(gmx_fio_getxdr(fp), state.flags, &state, TRUE, out);
2361 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2366 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2367 flags_enh, &state.enerhist, out);
2371 init_df_history(&state.dfhist, state.dfhist.nlambda, 0); /* reinitialize state with correct sizes */
2372 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state.dfhist, out);
2377 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2382 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2387 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2394 if (gmx_fio_close(fp) != 0)
2396 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2403 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2407 /* Check if the output file name stored in the checkpoint file
2408 * is one of the output file names of mdrun.
2412 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2417 return (i < nfile && gmx_fexist(fnm_cp));
2420 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2421 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2422 gmx_large_int_t *cpt_step, t_commrec *cr,
2423 gmx_bool bAppendReq,
2424 int nfile, const t_filenm fnm[],
2425 const char *part_suffix, gmx_bool *bAddPart)
2428 gmx_large_int_t step = 0;
2432 gmx_file_position_t *outputfiles;
2435 char *fn, suf_up[STRLEN];
2441 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2443 *simulation_part = 0;
2447 init_state(&state, 0, 0, 0, 0, 0);
2449 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2450 &nfiles, &outputfiles);
2451 if (gmx_fio_close(fp) != 0)
2453 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2460 for (f = 0; f < nfiles; f++)
2462 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2467 if (nexist == nfiles)
2469 bAppend = bAppendReq;
2471 else if (nexist > 0)
2474 "Output file appending has been requested,\n"
2475 "but some output files listed in the checkpoint file %s\n"
2476 "are not present or are named differently by the current program:\n",
2478 fprintf(stderr, "output files present:");
2479 for (f = 0; f < nfiles; f++)
2481 if (exist_output_file(outputfiles[f].filename,
2484 fprintf(stderr, " %s", outputfiles[f].filename);
2487 fprintf(stderr, "\n");
2488 fprintf(stderr, "output files not present or named differently:");
2489 for (f = 0; f < nfiles; f++)
2491 if (!exist_output_file(outputfiles[f].filename,
2494 fprintf(stderr, " %s", outputfiles[f].filename);
2497 fprintf(stderr, "\n");
2499 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2507 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2509 fn = outputfiles[0].filename;
2510 if (strlen(fn) < 4 ||
2511 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2513 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2515 /* Set bAddPart to whether the suffix string '.part' is present
2516 * in the log file name.
2518 strcpy(suf_up, part_suffix);
2520 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2521 strstr(fn, suf_up) != NULL);
2529 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2531 if (*simulation_part > 0 && bAppendReq)
2533 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2534 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2537 if (NULL != cpt_step)