1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
51 #include "gmx_random.h"
52 #include "checkpoint.h"
56 #include "gromacs/fileio/filenm.h"
57 #include "gromacs/fileio/futil.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/xdrf.h"
60 #include "gromacs/fileio/xdr_datatype.h"
62 #include "buildinfo.h"
68 #define CPT_MAGIC1 171817
69 #define CPT_MAGIC2 171819
70 #define CPTSTRLEN 1024
73 #define GMX_CPT_BUILD_DP 1
75 #define GMX_CPT_BUILD_DP 0
78 /* cpt_version should normally only be changed
79 * when the header of footer format changes.
80 * The state data format itself is backward and forward compatible.
81 * But old code can not read a new entry that is present in the file
82 * (but can read a new format when new entries are not present).
84 static const int cpt_version = 15;
87 const char *est_names[estNR] =
90 "box", "box-rel", "box-v", "pres_prev",
91 "nosehoover-xi", "thermostat-integral",
92 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
93 "disre_initf", "disre_rm3tav",
94 "orire_initf", "orire_Dtav",
95 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
99 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
102 const char *eeks_names[eeksNR] =
104 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
105 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
109 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
110 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
111 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
112 eenhENERGY_DELTA_H_NN,
113 eenhENERGY_DELTA_H_LIST,
114 eenhENERGY_DELTA_H_STARTTIME,
115 eenhENERGY_DELTA_H_STARTLAMBDA,
119 const char *eenh_names[eenhNR] =
121 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
122 "energy_sum_sim", "energy_nsum_sim",
123 "energy_nsteps", "energy_nsteps_sim",
125 "energy_delta_h_list",
126 "energy_delta_h_start_time",
127 "energy_delta_h_start_lambda"
130 /* free energy history variables -- need to be preserved over checkpoint */
132 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
133 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
135 /* free energy history variable names */
136 const char *edfh_names[edfhNR] =
138 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
139 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
142 #ifdef GMX_NATIVE_WINDOWS
144 gmx_wintruncate(const char *filename, __int64 size)
147 /*we do this elsewhere*/
153 fp = fopen(filename, "rb+");
160 return _chsize_s( fileno(fp), size);
167 ecprREAL, ecprRVEC, ecprMATRIX
171 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
173 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
174 cptpEST - state variables.
175 cptpEEKS - Kinetic energy state variables.
176 cptpEENH - Energy history state variables.
177 cptpEDFH - free energy history variables.
181 static const char *st_names(int cptp, int ecpt)
185 case cptpEST: return est_names [ecpt]; break;
186 case cptpEEKS: return eeks_names[ecpt]; break;
187 case cptpEENH: return eenh_names[ecpt]; break;
188 case cptpEDFH: return edfh_names[ecpt]; break;
194 static void cp_warning(FILE *fp)
196 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
199 static void cp_error()
201 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
204 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
212 res = xdr_string(xd, s, CPTSTRLEN);
219 fprintf(list, "%s = %s\n", desc, *s);
224 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
228 res = xdr_int(xd, i);
235 fprintf(list, "%s = %d\n", desc, *i);
240 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
246 fprintf(list, "%s = ", desc);
248 for (j = 0; j < n && res; j++)
250 res &= xdr_u_char(xd, &i[j]);
253 fprintf(list, "%02x", i[j]);
268 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
270 if (do_cpt_int(xd, desc, i, list) < 0)
276 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_large_int_t *i, FILE *list)
279 char buf[STEPSTRSIZE];
281 res = xdr_gmx_large_int(xd, i);
288 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
292 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
296 res = xdr_double(xd, f);
303 fprintf(list, "%s = %f\n", desc, *f);
307 static void do_cpt_real_err(XDR *xd, real *f)
312 res = xdr_double(xd, f);
314 res = xdr_float(xd, f);
322 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
326 for (i = 0; i < n; i++)
328 for (j = 0; j < DIM; j++)
330 do_cpt_real_err(xd, &f[i][j]);
336 pr_rvecs(list, 0, desc, f, n);
340 /* If nval >= 0, nval is used; on read this should match the passed value.
341 * If nval n<0, *nptr is used; on read the value is stored in nptr
343 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
344 int nval, int *nptr, real **v,
345 FILE *list, int erealtype)
349 int dtc = xdr_datatype_float;
351 int dtc = xdr_datatype_double;
353 real *vp, *va = NULL;
368 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
373 res = xdr_int(xd, &nf);
384 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
393 res = xdr_int(xd, &dt);
400 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
401 st_names(cptp, ecpt), xdr_datatype_names[dtc],
402 xdr_datatype_names[dt]);
404 if (list || !(sflags & (1<<ecpt)))
417 if (dt == xdr_datatype_float)
419 if (dtc == xdr_datatype_float)
427 res = xdr_vector(xd, (char *)vf, nf,
428 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
433 if (dtc != xdr_datatype_float)
435 for (i = 0; i < nf; i++)
444 if (dtc == xdr_datatype_double)
452 res = xdr_vector(xd, (char *)vd, nf,
453 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
458 if (dtc != xdr_datatype_double)
460 for (i = 0; i < nf; i++)
473 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
476 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
479 gmx_incons("Unknown checkpoint real type");
491 /* This function stores n along with the reals for reading,
492 * but on reading it assumes that n matches the value in the checkpoint file,
493 * a fatal error is generated when this is not the case.
495 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
496 int n, real **v, FILE *list)
498 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
501 /* This function does the same as do_cpte_reals,
502 * except that on reading it ignores the passed value of *n
503 * and stored the value read from the checkpoint file in *n.
505 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
506 int *n, real **v, FILE *list)
508 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
511 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
516 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
519 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
520 int n, int **v, FILE *list)
523 int dtc = xdr_datatype_int;
528 res = xdr_int(xd, &nf);
533 if (list == NULL && v != NULL && nf != n)
535 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
538 res = xdr_int(xd, &dt);
545 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
546 st_names(cptp, ecpt), xdr_datatype_names[dtc],
547 xdr_datatype_names[dt]);
549 if (list || !(sflags & (1<<ecpt)) || v == NULL)
562 res = xdr_vector(xd, (char *)vp, nf,
563 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
570 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
580 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
583 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
586 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
587 int n, double **v, FILE *list)
590 int dtc = xdr_datatype_double;
591 double *vp, *va = NULL;
595 res = xdr_int(xd, &nf);
600 if (list == NULL && nf != n)
602 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
605 res = xdr_int(xd, &dt);
612 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
613 st_names(cptp, ecpt), xdr_datatype_names[dtc],
614 xdr_datatype_names[dt]);
616 if (list || !(sflags & (1<<ecpt)))
629 res = xdr_vector(xd, (char *)vp, nf,
630 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
637 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
647 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
648 double *r, FILE *list)
650 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
654 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
655 int n, rvec **v, FILE *list)
659 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
660 n*DIM, NULL, (real **)v, list, ecprRVEC);
663 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
664 matrix v, FILE *list)
669 vr = (real *)&(v[0][0]);
670 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
671 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
673 if (list && ret == 0)
675 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
682 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
683 int n, real **v, FILE *list)
688 char name[CPTSTRLEN];
695 for (i = 0; i < n; i++)
699 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
700 if (list && reti == 0)
702 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
703 pr_reals(list, 0, name, v[i], n);
713 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
714 int n, matrix **v, FILE *list)
717 matrix *vp, *va = NULL;
723 res = xdr_int(xd, &nf);
728 if (list == NULL && nf != n)
730 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
732 if (list || !(sflags & (1<<ecpt)))
745 snew(vr, nf*DIM*DIM);
746 for (i = 0; i < nf; i++)
748 for (j = 0; j < DIM; j++)
750 for (k = 0; k < DIM; k++)
752 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
756 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
757 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
758 for (i = 0; i < nf; i++)
760 for (j = 0; j < DIM; j++)
762 for (k = 0; k < DIM; k++)
764 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
770 if (list && ret == 0)
772 for (i = 0; i < nf; i++)
774 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
785 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
786 char **version, char **btime, char **buser, char **bhost,
788 char **fprog, char **ftime,
789 int *eIntegrator, int *simulation_part,
790 gmx_large_int_t *step, double *t,
791 int *nnodes, int *dd_nc, int *npme,
792 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
793 int *nlambda, int *flags_state,
794 int *flags_eks, int *flags_enh, int *flags_dfh,
812 res = xdr_int(xd, &magic);
815 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
817 if (magic != CPT_MAGIC1)
819 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
820 "The checkpoint file is corrupted or not a checkpoint file",
827 if (gethostname(fhost, 255) != 0)
829 sprintf(fhost, "unknown");
832 sprintf(fhost, "unknown");
835 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
836 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
837 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
838 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
839 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
840 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
841 *file_version = cpt_version;
842 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
843 if (*file_version > cpt_version)
845 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
847 if (*file_version >= 13)
849 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
855 if (*file_version >= 12)
857 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
863 do_cpt_int_err(xd, "#atoms", natoms, list);
864 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
865 if (*file_version >= 10)
867 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
873 if (*file_version >= 11)
875 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
881 if (*file_version >= 14)
883 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
889 do_cpt_int_err(xd, "integrator", eIntegrator, list);
890 if (*file_version >= 3)
892 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
896 *simulation_part = 1;
898 if (*file_version >= 5)
900 do_cpt_step_err(xd, "step", step, list);
904 do_cpt_int_err(xd, "step", &idum, list);
907 do_cpt_double_err(xd, "t", t, list);
908 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
910 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
911 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
912 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
913 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
914 do_cpt_int_err(xd, "state flags", flags_state, list);
915 if (*file_version >= 4)
917 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
918 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
923 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
924 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
925 (1<<(estORIRE_DTAV+2)) |
926 (1<<(estORIRE_DTAV+3))));
928 if (*file_version >= 14)
930 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
937 if (*file_version >= 15)
939 do_cpt_int_err(xd, "ED data sets", nED, list);
947 static int do_cpt_footer(XDR *xd, int file_version)
952 if (file_version >= 2)
955 res = xdr_int(xd, &magic);
960 if (magic != CPT_MAGIC2)
969 static int do_cpt_state(XDR *xd, gmx_bool bRead,
970 int fflags, t_state *state,
971 gmx_bool bReadRNG, FILE *list)
974 int **rng_p, **rngi_p;
981 nnht = state->nhchainlength*state->ngtc;
982 nnhtp = state->nhchainlength*state->nnhpres;
986 rng_p = (int **)&state->ld_rng;
987 rngi_p = &state->ld_rngi;
991 /* Do not read the RNG data */
996 if (bRead) /* we need to allocate space for dfhist if we are reading */
998 init_df_history(&state->dfhist,state->dfhist.nlambda);
1001 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1003 sflags = state->flags;
1004 for (i = 0; (i < estNR && ret == 0); i++)
1006 if (fflags & (1<<i))
1010 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1011 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1012 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1013 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1014 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1015 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1016 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1017 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1018 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1019 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1020 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1021 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1022 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1023 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1024 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1025 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1026 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1027 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1028 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1029 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1030 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1031 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1032 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1033 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1034 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1035 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1037 gmx_fatal(FARGS, "Unknown state entry %d\n"
1038 "You are probably reading a new checkpoint file with old code", i);
1046 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1054 for (i = 0; (i < eeksNR && ret == 0); i++)
1056 if (fflags & (1<<i))
1061 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1062 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1063 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1064 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1065 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1066 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1067 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1068 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1069 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1070 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1072 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1073 "You are probably reading a new checkpoint file with old code", i);
1082 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1083 int fflags, energyhistory_t *enerhist,
1094 enerhist->nsteps = 0;
1096 enerhist->nsteps_sim = 0;
1097 enerhist->nsum_sim = 0;
1098 enerhist->dht = NULL;
1100 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1102 snew(enerhist->dht, 1);
1103 enerhist->dht->ndh = NULL;
1104 enerhist->dht->dh = NULL;
1105 enerhist->dht->start_lambda_set = FALSE;
1109 for (i = 0; (i < eenhNR && ret == 0); i++)
1111 if (fflags & (1<<i))
1115 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1116 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1117 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1118 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1119 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1120 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1121 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1122 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1123 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1124 if (bRead) /* now allocate memory for it */
1126 snew(enerhist->dht->dh, enerhist->dht->nndh);
1127 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1128 for (j = 0; j < enerhist->dht->nndh; j++)
1130 enerhist->dht->ndh[j] = 0;
1131 enerhist->dht->dh[j] = NULL;
1135 case eenhENERGY_DELTA_H_LIST:
1136 for (j = 0; j < enerhist->dht->nndh; j++)
1138 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1141 case eenhENERGY_DELTA_H_STARTTIME:
1142 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1143 case eenhENERGY_DELTA_H_STARTLAMBDA:
1144 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1146 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1147 "You are probably reading a new checkpoint file with old code", i);
1152 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1154 /* Assume we have an old file format and copy sum to sum_sim */
1155 srenew(enerhist->ener_sum_sim, enerhist->nener);
1156 for (i = 0; i < enerhist->nener; i++)
1158 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1160 fflags |= (1<<eenhENERGY_SUM_SIM);
1163 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1164 !(fflags & (1<<eenhENERGY_NSTEPS)))
1166 /* Assume we have an old file format and copy nsum to nsteps */
1167 enerhist->nsteps = enerhist->nsum;
1168 fflags |= (1<<eenhENERGY_NSTEPS);
1170 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1171 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1173 /* Assume we have an old file format and copy nsum to nsteps */
1174 enerhist->nsteps_sim = enerhist->nsum_sim;
1175 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1181 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1186 nlambda = dfhist->nlambda;
1189 for (i = 0; (i < edfhNR && ret == 0); i++)
1191 if (fflags & (1<<i))
1195 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1196 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1197 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1198 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1199 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1200 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1201 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1202 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1203 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1204 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1205 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1206 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1207 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1208 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1211 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1212 "You are probably reading a new checkpoint file with old code", i);
1221 /* This function stores the last whole configuration of the reference and
1222 * average structure in the .cpt file
1224 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1225 edsamstate_t *EDstate, FILE *list)
1232 EDstate->bFromCpt = bRead;
1234 if (EDstate->nED <= 0)
1239 /* When reading, init_edsam has not been called yet,
1240 * so we have to allocate memory first. */
1243 snew(EDstate->nref, EDstate->nED);
1244 snew(EDstate->old_sref, EDstate->nED);
1245 snew(EDstate->nav, EDstate->nED);
1246 snew(EDstate->old_sav, EDstate->nED);
1249 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1250 for (i = 0; i < EDstate->nED; i++)
1252 /* Reference structure SREF */
1253 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1254 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1255 sprintf(buf, "ED%d x_ref", i+1);
1258 snew(EDstate->old_sref[i], EDstate->nref[i]);
1259 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1263 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1266 /* Average structure SAV */
1267 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1268 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1269 sprintf(buf, "ED%d x_av", i+1);
1272 snew(EDstate->old_sav[i], EDstate->nav[i]);
1273 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1277 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1285 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1286 gmx_file_position_t **p_outputfiles, int *nfiles,
1287 FILE *list, int file_version)
1291 gmx_off_t mask = 0xFFFFFFFFL;
1292 int offset_high, offset_low;
1294 gmx_file_position_t *outputfiles;
1296 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1303 snew(*p_outputfiles, *nfiles);
1306 outputfiles = *p_outputfiles;
1308 for (i = 0; i < *nfiles; i++)
1310 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1313 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1314 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1320 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1324 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1328 #if (SIZEOF_GMX_OFF_T > 4)
1329 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1331 outputfiles[i].offset = offset_low;
1336 buf = outputfiles[i].filename;
1337 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1339 offset = outputfiles[i].offset;
1347 #if (SIZEOF_GMX_OFF_T > 4)
1348 offset_low = (int) (offset & mask);
1349 offset_high = (int) ((offset >> 32) & mask);
1351 offset_low = offset;
1355 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1359 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1364 if (file_version >= 8)
1366 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1371 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1378 outputfiles[i].chksum_size = -1;
1385 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1386 FILE *fplog, t_commrec *cr,
1387 int eIntegrator, int simulation_part,
1388 gmx_bool bExpanded, int elamstats,
1389 gmx_large_int_t step, double t, t_state *state)
1399 char *fntemp; /* the temporary checkpoint file name */
1401 char timebuf[STRLEN];
1402 int nppnodes, npmenodes, flag_64bit;
1403 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1404 gmx_file_position_t *outputfiles;
1407 int flags_eks, flags_enh, flags_dfh, i;
1412 if (DOMAINDECOMP(cr))
1414 nppnodes = cr->dd->nnodes;
1415 npmenodes = cr->npmenodes;
1419 nppnodes = cr->nnodes;
1429 /* make the new temporary filename */
1430 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1432 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1433 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1434 strcat(fntemp, suffix);
1435 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1438 gmx_ctime_r(&now, timebuf, STRLEN);
1442 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1443 gmx_step_str(step, buf), timebuf);
1446 /* Get offsets for open files */
1447 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1449 fp = gmx_fio_open(fntemp, "w");
1451 if (state->ekinstate.bUpToDate)
1454 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1455 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1456 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1464 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1466 flags_enh |= (1<<eenhENERGY_N);
1467 if (state->enerhist.nsum > 0)
1469 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1470 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1472 if (state->enerhist.nsum_sim > 0)
1474 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1475 (1<<eenhENERGY_NSUM_SIM));
1477 if (state->enerhist.dht)
1479 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1480 (1<< eenhENERGY_DELTA_H_LIST) |
1481 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1482 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1488 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1489 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1492 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1494 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1496 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1497 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1505 /* We can check many more things now (CPU, acceleration, etc), but
1506 * it is highly unlikely to have two separate builds with exactly
1507 * the same version, user, time, and build host!
1510 version = gmx_strdup(VERSION);
1511 btime = gmx_strdup(BUILD_TIME);
1512 buser = gmx_strdup(BUILD_USER);
1513 bhost = gmx_strdup(BUILD_HOST);
1515 double_prec = GMX_CPT_BUILD_DP;
1516 fprog = gmx_strdup(Program());
1518 ftime = &(timebuf[0]);
1520 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1521 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1522 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1523 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1524 &state->natoms, &state->ngtc, &state->nnhpres,
1525 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1526 &state->edsamstate.nED,
1535 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1536 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1537 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1538 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1539 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1540 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1543 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1546 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1548 /* we really, REALLY, want to make sure to physically write the checkpoint,
1549 and all the files it depends on, out to disk. Because we've
1550 opened the checkpoint with gmx_fio_open(), it's in our list
1552 ret = gmx_fio_all_output_fsync();
1558 "Cannot fsync '%s'; maybe you are out of disk space?",
1559 gmx_fio_getname(ret));
1561 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1571 if (gmx_fio_close(fp) != 0)
1573 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1576 /* we don't move the checkpoint if the user specified they didn't want it,
1577 or if the fsyncs failed */
1578 if (!bNumberAndKeep && !ret)
1582 /* Rename the previous checkpoint file */
1584 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1585 strcat(buf, "_prev");
1586 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1588 /* we copy here so that if something goes wrong between now and
1589 * the rename below, there's always a state.cpt.
1590 * If renames are atomic (such as in POSIX systems),
1591 * this copying should be unneccesary.
1593 gmx_file_copy(fn, buf, FALSE);
1594 /* We don't really care if this fails:
1595 * there's already a new checkpoint.
1598 gmx_file_rename(fn, buf);
1601 if (gmx_file_rename(fntemp, fn) != 0)
1603 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1611 /*code for alternate checkpointing scheme. moved from top of loop over
1613 fcRequestCheckPoint();
1614 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1616 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1618 #endif /* end GMX_FAHCORE block */
1621 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1625 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1626 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1627 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1628 for (i = 0; i < estNR; i++)
1630 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1632 fprintf(fplog, " %24s %11s %11s\n",
1634 (sflags & (1<<i)) ? " present " : "not present",
1635 (fflags & (1<<i)) ? " present " : "not present");
1640 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1642 FILE *fp = fplog ? fplog : stderr;
1646 fprintf(fp, " %s mismatch,\n", type);
1647 fprintf(fp, " current program: %d\n", p);
1648 fprintf(fp, " checkpoint file: %d\n", f);
1654 static void check_string(FILE *fplog, const char *type, const char *p,
1655 const char *f, gmx_bool *mm)
1657 FILE *fp = fplog ? fplog : stderr;
1659 if (strcmp(p, f) != 0)
1661 fprintf(fp, " %s mismatch,\n", type);
1662 fprintf(fp, " current program: %s\n", p);
1663 fprintf(fp, " checkpoint file: %s\n", f);
1669 static void check_match(FILE *fplog,
1671 char *btime, char *buser, char *bhost, int double_prec,
1673 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1674 ivec dd_nc, ivec dd_nc_f)
1681 check_string(fplog, "Version", VERSION, version, &mm);
1682 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1683 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1684 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1685 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1686 check_string(fplog, "Program name", Program(), fprog, &mm);
1688 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1697 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1700 if (cr->npmenodes >= 0)
1702 npp -= cr->npmenodes;
1706 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1707 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1708 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1715 "Gromacs binary or parallel settings not identical to previous run.\n"
1716 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1717 fplog ? ",\n see the log file for details" : "");
1722 "Gromacs binary or parallel settings not identical to previous run.\n"
1723 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1728 static void read_checkpoint(const char *fn, FILE **pfplog,
1729 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1730 int eIntegrator, int *init_fep_state, gmx_large_int_t *step, double *t,
1731 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1732 int *simulation_part,
1733 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1738 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1740 char filename[STRLEN], buf[STEPSTRSIZE];
1741 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1743 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1746 gmx_file_position_t *outputfiles;
1748 t_fileio *chksum_file;
1749 FILE * fplog = *pfplog;
1750 unsigned char digest[16];
1751 #ifndef GMX_NATIVE_WINDOWS
1752 struct flock fl; /* don't initialize here: the struct order is OS
1756 const char *int_warn =
1757 "WARNING: The checkpoint file was generated with integrator %s,\n"
1758 " while the simulation uses integrator %s\n\n";
1759 const char *sd_note =
1760 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1761 " while the simulation uses %d SD or BD nodes,\n"
1762 " continuation will be exact, except for the random state\n\n";
1764 #ifndef GMX_NATIVE_WINDOWS
1765 fl.l_type = F_WRLCK;
1766 fl.l_whence = SEEK_SET;
1775 "read_checkpoint not (yet) supported with particle decomposition");
1778 fp = gmx_fio_open(fn, "r");
1779 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1780 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1781 &eIntegrator_f, simulation_part, step, t,
1782 &nppnodes_f, dd_nc_f, &npmenodes_f,
1783 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1784 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1785 &state->edsamstate.nED, NULL);
1787 if (bAppendOutputFiles &&
1788 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1790 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1793 if (cr == NULL || MASTER(cr))
1795 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1799 /* This will not be written if we do appending, since fplog is still NULL then */
1802 fprintf(fplog, "\n");
1803 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1804 fprintf(fplog, " file generated by: %s\n", fprog);
1805 fprintf(fplog, " file generated at: %s\n", ftime);
1806 fprintf(fplog, " GROMACS build time: %s\n", btime);
1807 fprintf(fplog, " GROMACS build user: %s\n", buser);
1808 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1809 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1810 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1811 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1812 fprintf(fplog, " time: %f\n", *t);
1813 fprintf(fplog, "\n");
1816 if (natoms != state->natoms)
1818 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1820 if (ngtc != state->ngtc)
1822 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1824 if (nnhpres != state->nnhpres)
1826 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1829 if (nlambda != state->dfhist.nlambda)
1831 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1834 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1835 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1837 if (eIntegrator_f != eIntegrator)
1841 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1843 if (bAppendOutputFiles)
1846 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1847 "Stopping the run to prevent you from ruining all your data...\n"
1848 "If you _really_ know what you are doing, try with the -noappend option.\n");
1852 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1861 else if (bPartDecomp)
1863 nppnodes = cr->nnodes;
1866 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1868 if (cr->npmenodes < 0)
1870 cr->npmenodes = npmenodes_f;
1872 nppnodes = cr->nnodes - cr->npmenodes;
1873 if (nppnodes == nppnodes_f)
1875 for (d = 0; d < DIM; d++)
1879 dd_nc[d] = dd_nc_f[d];
1886 /* The number of PP nodes has not been set yet */
1890 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1892 /* Correct the RNG state size for the number of PP nodes.
1893 * Such assignments should all be moved to one central function.
1895 state->nrng = nppnodes*gmx_rng_n();
1896 state->nrngi = nppnodes;
1900 if (fflags != state->flags)
1905 if (bAppendOutputFiles)
1908 "Output file appending requested, but input and checkpoint states are not identical.\n"
1909 "Stopping the run to prevent you from ruining all your data...\n"
1910 "You can try with the -noappend option, and get more info in the log file.\n");
1913 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1915 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1920 "WARNING: The checkpoint state entries do not match the simulation,\n"
1921 " see the log file for details\n\n");
1927 print_flag_mismatch(fplog, state->flags, fflags);
1932 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1933 nppnodes != nppnodes_f)
1938 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1942 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1947 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1948 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1951 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1952 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1953 Investigate for 5.0. */
1958 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1963 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1964 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1966 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1967 flags_enh, &state->enerhist, NULL);
1973 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1979 if (file_version < 6)
1981 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1983 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1986 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1988 state->enerhist.nsum = *step;
1989 state->enerhist.nsum_sim = *step;
1992 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
1998 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2004 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2009 if (gmx_fio_close(fp) != 0)
2011 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2020 /* If the user wants to append to output files,
2021 * we use the file pointer positions of the output files stored
2022 * in the checkpoint file and truncate the files such that any frames
2023 * written after the checkpoint time are removed.
2024 * All files are md5sum checked such that we can be sure that
2025 * we do not truncate other (maybe imprortant) files.
2027 if (bAppendOutputFiles)
2029 if (fn2ftp(outputfiles[0].filename) != efLOG)
2031 /* make sure first file is log file so that it is OK to use it for
2034 gmx_fatal(FARGS, "The first output file should always be the log "
2035 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2037 for (i = 0; i < nfiles; i++)
2039 if (outputfiles[i].offset < 0)
2041 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2042 "is larger than 2 GB, but mdrun did not support large file"
2043 " offsets. Can not append. Run mdrun with -noappend",
2044 outputfiles[i].filename);
2047 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2050 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2055 /* Note that there are systems where the lock operation
2056 * will succeed, but a second process can also lock the file.
2057 * We should probably try to detect this.
2059 #ifndef GMX_NATIVE_WINDOWS
2060 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2063 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2066 if (errno == ENOSYS)
2070 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2074 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2077 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2081 else if (errno == EACCES || errno == EAGAIN)
2083 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2084 "simulation?", outputfiles[i].filename);
2088 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2089 outputfiles[i].filename, strerror(errno));
2094 /* compute md5 chksum */
2095 if (outputfiles[i].chksum_size != -1)
2097 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2098 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2100 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2101 outputfiles[i].chksum_size,
2102 outputfiles[i].filename);
2105 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2107 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2109 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2114 if (i == 0) /*open log file here - so that lock is never lifted
2115 after chksum is calculated */
2117 *pfplog = gmx_fio_getfp(chksum_file);
2121 gmx_fio_close(chksum_file);
2124 /* compare md5 chksum */
2125 if (outputfiles[i].chksum_size != -1 &&
2126 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2130 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2131 for (j = 0; j < 16; j++)
2133 fprintf(debug, "%02x", digest[j]);
2135 fprintf(debug, "\n");
2137 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2138 outputfiles[i].filename);
2143 if (i != 0) /*log file is already seeked to correct position */
2145 #ifdef GMX_NATIVE_WINDOWS
2146 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2148 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2152 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2162 void load_checkpoint(const char *fn, FILE **fplog,
2163 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2164 t_inputrec *ir, t_state *state,
2165 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2166 gmx_bool bAppend, gmx_bool bForceAppend)
2168 gmx_large_int_t step;
2173 /* Read the state from the checkpoint file */
2174 read_checkpoint(fn, fplog,
2175 cr, bPartDecomp, dd_nc,
2176 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2177 &ir->simulation_part, bAppend, bForceAppend);
2181 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2182 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2183 gmx_bcast(sizeof(step), &step, cr);
2184 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2185 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2187 ir->bContinuation = TRUE;
2188 if (ir->nsteps >= 0)
2190 ir->nsteps += ir->init_step - step;
2192 ir->init_step = step;
2193 ir->simulation_part += 1;
2196 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2197 gmx_large_int_t *step, double *t, t_state *state,
2199 int *nfiles, gmx_file_position_t **outputfiles)
2202 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2207 int flags_eks, flags_enh, flags_dfh;
2209 gmx_file_position_t *files_loc = NULL;
2212 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2213 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2214 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2215 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2216 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2217 &state->edsamstate.nED, NULL);
2219 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2224 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2229 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2230 flags_enh, &state->enerhist, NULL);
2235 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2241 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2247 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2248 outputfiles != NULL ? outputfiles : &files_loc,
2249 outputfiles != NULL ? nfiles : &nfiles_loc,
2250 NULL, file_version);
2251 if (files_loc != NULL)
2261 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2275 read_checkpoint_state(const char *fn, int *simulation_part,
2276 gmx_large_int_t *step, double *t, t_state *state)
2280 fp = gmx_fio_open(fn, "r");
2281 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2282 if (gmx_fio_close(fp) != 0)
2284 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2288 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2290 /* This next line is nasty because the sub-structures of t_state
2291 * cannot be assumed to be zeroed (or even initialized in ways the
2292 * rest of the code might assume). Using snew would be better, but
2293 * this will all go away for 5.0. */
2295 int simulation_part;
2296 gmx_large_int_t step;
2299 init_state(&state, 0, 0, 0, 0, 0);
2301 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2303 fr->natoms = state.natoms;
2306 fr->step = gmx_large_int_to_int(step,
2307 "conversion of checkpoint to trajectory");
2311 fr->lambda = state.lambda[efptFEP];
2312 fr->fep_state = state.fep_state;
2314 fr->bX = (state.flags & (1<<estX));
2320 fr->bV = (state.flags & (1<<estV));
2327 fr->bBox = (state.flags & (1<<estBOX));
2330 copy_mat(state.box, fr->box);
2335 void list_checkpoint(const char *fn, FILE *out)
2339 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2341 int eIntegrator, simulation_part, nppnodes, npme;
2342 gmx_large_int_t step;
2346 int flags_eks, flags_enh, flags_dfh;
2350 gmx_file_position_t *outputfiles;
2353 init_state(&state, -1, -1, -1, -1, 0);
2355 fp = gmx_fio_open(fn, "r");
2356 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2357 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2358 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2359 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2360 &(state.dfhist.nlambda), &state.flags,
2361 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2362 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2367 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2372 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2373 flags_enh, &state.enerhist, out);
2377 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2378 flags_dfh, &state.dfhist, out);
2383 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2388 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2393 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2400 if (gmx_fio_close(fp) != 0)
2402 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2409 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2413 /* Check if the output file name stored in the checkpoint file
2414 * is one of the output file names of mdrun.
2418 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2423 return (i < nfile && gmx_fexist(fnm_cp));
2426 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2427 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2428 gmx_large_int_t *cpt_step, t_commrec *cr,
2429 gmx_bool bAppendReq,
2430 int nfile, const t_filenm fnm[],
2431 const char *part_suffix, gmx_bool *bAddPart)
2434 gmx_large_int_t step = 0;
2436 /* This next line is nasty because the sub-structures of t_state
2437 * cannot be assumed to be zeroed (or even initialized in ways the
2438 * rest of the code might assume). Using snew would be better, but
2439 * this will all go away for 5.0. */
2442 gmx_file_position_t *outputfiles;
2445 char *fn, suf_up[STRLEN];
2451 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2453 *simulation_part = 0;
2457 init_state(&state, 0, 0, 0, 0, 0);
2459 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2460 &nfiles, &outputfiles);
2461 if (gmx_fio_close(fp) != 0)
2463 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2470 for (f = 0; f < nfiles; f++)
2472 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2477 if (nexist == nfiles)
2479 bAppend = bAppendReq;
2481 else if (nexist > 0)
2484 "Output file appending has been requested,\n"
2485 "but some output files listed in the checkpoint file %s\n"
2486 "are not present or are named differently by the current program:\n",
2488 fprintf(stderr, "output files present:");
2489 for (f = 0; f < nfiles; f++)
2491 if (exist_output_file(outputfiles[f].filename,
2494 fprintf(stderr, " %s", outputfiles[f].filename);
2497 fprintf(stderr, "\n");
2498 fprintf(stderr, "output files not present or named differently:");
2499 for (f = 0; f < nfiles; f++)
2501 if (!exist_output_file(outputfiles[f].filename,
2504 fprintf(stderr, " %s", outputfiles[f].filename);
2507 fprintf(stderr, "\n");
2509 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2517 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2519 fn = outputfiles[0].filename;
2520 if (strlen(fn) < 4 ||
2521 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2523 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2525 /* Set bAddPart to whether the suffix string '.part' is present
2526 * in the log file name.
2528 strcpy(suf_up, part_suffix);
2530 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2531 strstr(fn, suf_up) != NULL);
2539 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2541 if (*simulation_part > 0 && bAppendReq)
2543 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2544 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2547 if (NULL != cpt_step)