2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
47 #ifdef HAVE_SYS_TIME_H
55 #ifdef GMX_NATIVE_WINDOWS
58 #include <sys/locking.h>
68 #include "gmx_random.h"
69 #include "checkpoint.h"
74 #include "gromacs/fileio/filenm.h"
75 #include "gromacs/fileio/futil.h"
76 #include "gromacs/fileio/gmxfio.h"
77 #include "gromacs/fileio/xdrf.h"
78 #include "gromacs/fileio/xdr_datatype.h"
80 #include "buildinfo.h"
86 #define CPT_MAGIC1 171817
87 #define CPT_MAGIC2 171819
88 #define CPTSTRLEN 1024
91 #define GMX_CPT_BUILD_DP 1
93 #define GMX_CPT_BUILD_DP 0
96 /* cpt_version should normally only be changed
97 * when the header of footer format changes.
98 * The state data format itself is backward and forward compatible.
99 * But old code can not read a new entry that is present in the file
100 * (but can read a new format when new entries are not present).
102 static const int cpt_version = 15;
105 const char *est_names[estNR] =
108 "box", "box-rel", "box-v", "pres_prev",
109 "nosehoover-xi", "thermostat-integral",
110 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
111 "disre_initf", "disre_rm3tav",
112 "orire_initf", "orire_Dtav",
113 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
117 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
120 const char *eeks_names[eeksNR] =
122 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
123 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
127 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
128 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
129 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
130 eenhENERGY_DELTA_H_NN,
131 eenhENERGY_DELTA_H_LIST,
132 eenhENERGY_DELTA_H_STARTTIME,
133 eenhENERGY_DELTA_H_STARTLAMBDA,
137 const char *eenh_names[eenhNR] =
139 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
140 "energy_sum_sim", "energy_nsum_sim",
141 "energy_nsteps", "energy_nsteps_sim",
143 "energy_delta_h_list",
144 "energy_delta_h_start_time",
145 "energy_delta_h_start_lambda"
148 /* free energy history variables -- need to be preserved over checkpoint */
150 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
151 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
153 /* free energy history variable names */
154 const char *edfh_names[edfhNR] =
156 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
157 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
160 #ifdef GMX_NATIVE_WINDOWS
162 gmx_wintruncate(const char *filename, __int64 size)
165 /*we do this elsewhere*/
171 fp = fopen(filename, "rb+");
178 return _chsize_s( fileno(fp), size);
185 ecprREAL, ecprRVEC, ecprMATRIX
189 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
191 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
192 cptpEST - state variables.
193 cptpEEKS - Kinetic energy state variables.
194 cptpEENH - Energy history state variables.
195 cptpEDFH - free energy history variables.
199 static const char *st_names(int cptp, int ecpt)
203 case cptpEST: return est_names [ecpt]; break;
204 case cptpEEKS: return eeks_names[ecpt]; break;
205 case cptpEENH: return eenh_names[ecpt]; break;
206 case cptpEDFH: return edfh_names[ecpt]; break;
212 static void cp_warning(FILE *fp)
214 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
217 static void cp_error()
219 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
222 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
230 res = xdr_string(xd, s, CPTSTRLEN);
237 fprintf(list, "%s = %s\n", desc, *s);
242 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
246 res = xdr_int(xd, i);
253 fprintf(list, "%s = %d\n", desc, *i);
258 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
264 fprintf(list, "%s = ", desc);
266 for (j = 0; j < n && res; j++)
268 res &= xdr_u_char(xd, &i[j]);
271 fprintf(list, "%02x", i[j]);
286 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
288 if (do_cpt_int(xd, desc, i, list) < 0)
294 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
297 char buf[STEPSTRSIZE];
299 res = xdr_int64(xd, i);
306 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
310 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
314 res = xdr_double(xd, f);
321 fprintf(list, "%s = %f\n", desc, *f);
325 static void do_cpt_real_err(XDR *xd, real *f)
330 res = xdr_double(xd, f);
332 res = xdr_float(xd, f);
340 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
344 for (i = 0; i < n; i++)
346 for (j = 0; j < DIM; j++)
348 do_cpt_real_err(xd, &f[i][j]);
354 pr_rvecs(list, 0, desc, f, n);
358 /* If nval >= 0, nval is used; on read this should match the passed value.
359 * If nval n<0, *nptr is used; on read the value is stored in nptr
361 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
362 int nval, int *nptr, real **v,
363 FILE *list, int erealtype)
367 int dtc = xdr_datatype_float;
369 int dtc = xdr_datatype_double;
371 real *vp, *va = NULL;
386 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
391 res = xdr_int(xd, &nf);
402 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
411 res = xdr_int(xd, &dt);
418 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
419 st_names(cptp, ecpt), xdr_datatype_names[dtc],
420 xdr_datatype_names[dt]);
422 if (list || !(sflags & (1<<ecpt)))
435 if (dt == xdr_datatype_float)
437 if (dtc == xdr_datatype_float)
445 res = xdr_vector(xd, (char *)vf, nf,
446 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
451 if (dtc != xdr_datatype_float)
453 for (i = 0; i < nf; i++)
462 if (dtc == xdr_datatype_double)
470 res = xdr_vector(xd, (char *)vd, nf,
471 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
476 if (dtc != xdr_datatype_double)
478 for (i = 0; i < nf; i++)
491 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
494 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
497 gmx_incons("Unknown checkpoint real type");
509 /* This function stores n along with the reals for reading,
510 * but on reading it assumes that n matches the value in the checkpoint file,
511 * a fatal error is generated when this is not the case.
513 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
514 int n, real **v, FILE *list)
516 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
519 /* This function does the same as do_cpte_reals,
520 * except that on reading it ignores the passed value of *n
521 * and stored the value read from the checkpoint file in *n.
523 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
524 int *n, real **v, FILE *list)
526 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
529 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
534 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
537 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
538 int n, int **v, FILE *list)
541 int dtc = xdr_datatype_int;
546 res = xdr_int(xd, &nf);
551 if (list == NULL && v != NULL && nf != n)
553 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
556 res = xdr_int(xd, &dt);
563 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
564 st_names(cptp, ecpt), xdr_datatype_names[dtc],
565 xdr_datatype_names[dt]);
567 if (list || !(sflags & (1<<ecpt)) || v == NULL)
580 res = xdr_vector(xd, (char *)vp, nf,
581 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
588 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
598 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
601 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
604 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
605 int n, double **v, FILE *list)
608 int dtc = xdr_datatype_double;
609 double *vp, *va = NULL;
613 res = xdr_int(xd, &nf);
618 if (list == NULL && nf != n)
620 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
623 res = xdr_int(xd, &dt);
630 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
631 st_names(cptp, ecpt), xdr_datatype_names[dtc],
632 xdr_datatype_names[dt]);
634 if (list || !(sflags & (1<<ecpt)))
647 res = xdr_vector(xd, (char *)vp, nf,
648 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
655 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
665 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
666 double *r, FILE *list)
668 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
672 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
673 int n, rvec **v, FILE *list)
677 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
678 n*DIM, NULL, (real **)v, list, ecprRVEC);
681 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
682 matrix v, FILE *list)
687 vr = (real *)&(v[0][0]);
688 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
689 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
691 if (list && ret == 0)
693 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
700 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
701 int n, real **v, FILE *list)
706 char name[CPTSTRLEN];
713 for (i = 0; i < n; i++)
717 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
718 if (list && reti == 0)
720 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
721 pr_reals(list, 0, name, v[i], n);
731 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
732 int n, matrix **v, FILE *list)
735 matrix *vp, *va = NULL;
741 res = xdr_int(xd, &nf);
746 if (list == NULL && nf != n)
748 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
750 if (list || !(sflags & (1<<ecpt)))
763 snew(vr, nf*DIM*DIM);
764 for (i = 0; i < nf; i++)
766 for (j = 0; j < DIM; j++)
768 for (k = 0; k < DIM; k++)
770 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
774 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
775 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
776 for (i = 0; i < nf; i++)
778 for (j = 0; j < DIM; j++)
780 for (k = 0; k < DIM; k++)
782 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
788 if (list && ret == 0)
790 for (i = 0; i < nf; i++)
792 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
803 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
804 char **version, char **btime, char **buser, char **bhost,
806 char **fprog, char **ftime,
807 int *eIntegrator, int *simulation_part,
808 gmx_int64_t *step, double *t,
809 int *nnodes, int *dd_nc, int *npme,
810 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
811 int *nlambda, int *flags_state,
812 int *flags_eks, int *flags_enh, int *flags_dfh,
830 res = xdr_int(xd, &magic);
833 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
835 if (magic != CPT_MAGIC1)
837 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
838 "The checkpoint file is corrupted or not a checkpoint file",
844 gmx_gethostname(fhost, 255);
846 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
847 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
848 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
849 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
850 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
851 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
852 *file_version = cpt_version;
853 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
854 if (*file_version > cpt_version)
856 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
858 if (*file_version >= 13)
860 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
866 if (*file_version >= 12)
868 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
874 do_cpt_int_err(xd, "#atoms", natoms, list);
875 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
876 if (*file_version >= 10)
878 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
884 if (*file_version >= 11)
886 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
892 if (*file_version >= 14)
894 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
900 do_cpt_int_err(xd, "integrator", eIntegrator, list);
901 if (*file_version >= 3)
903 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
907 *simulation_part = 1;
909 if (*file_version >= 5)
911 do_cpt_step_err(xd, "step", step, list);
915 do_cpt_int_err(xd, "step", &idum, list);
918 do_cpt_double_err(xd, "t", t, list);
919 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
921 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
922 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
923 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
924 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
925 do_cpt_int_err(xd, "state flags", flags_state, list);
926 if (*file_version >= 4)
928 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
929 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
934 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
935 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
936 (1<<(estORIRE_DTAV+2)) |
937 (1<<(estORIRE_DTAV+3))));
939 if (*file_version >= 14)
941 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
948 if (*file_version >= 15)
950 do_cpt_int_err(xd, "ED data sets", nED, list);
958 static int do_cpt_footer(XDR *xd, int file_version)
963 if (file_version >= 2)
966 res = xdr_int(xd, &magic);
971 if (magic != CPT_MAGIC2)
980 static int do_cpt_state(XDR *xd, gmx_bool bRead,
981 int fflags, t_state *state,
982 gmx_bool bReadRNG, FILE *list)
985 int **rng_p, **rngi_p;
992 nnht = state->nhchainlength*state->ngtc;
993 nnhtp = state->nhchainlength*state->nnhpres;
997 rng_p = (int **)&state->ld_rng;
998 rngi_p = &state->ld_rngi;
1002 /* Do not read the RNG data */
1007 if (bRead) /* we need to allocate space for dfhist if we are reading */
1009 init_df_history(&state->dfhist, state->dfhist.nlambda);
1012 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1014 sflags = state->flags;
1015 for (i = 0; (i < estNR && ret == 0); i++)
1017 if (fflags & (1<<i))
1021 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1022 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1023 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1024 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1025 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1026 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1027 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1028 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1029 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1030 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1031 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1032 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1033 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1034 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1035 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1036 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1037 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1038 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1039 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1040 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1041 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1042 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1043 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1044 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1045 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1046 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1048 gmx_fatal(FARGS, "Unknown state entry %d\n"
1049 "You are probably reading a new checkpoint file with old code", i);
1057 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1065 for (i = 0; (i < eeksNR && ret == 0); i++)
1067 if (fflags & (1<<i))
1072 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1073 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1074 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1075 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1076 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1077 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1078 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1079 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1080 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1081 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1083 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1084 "You are probably reading a new checkpoint file with old code", i);
1093 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1094 int fflags, energyhistory_t *enerhist,
1105 enerhist->nsteps = 0;
1107 enerhist->nsteps_sim = 0;
1108 enerhist->nsum_sim = 0;
1109 enerhist->dht = NULL;
1111 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1113 snew(enerhist->dht, 1);
1114 enerhist->dht->ndh = NULL;
1115 enerhist->dht->dh = NULL;
1116 enerhist->dht->start_lambda_set = FALSE;
1120 for (i = 0; (i < eenhNR && ret == 0); i++)
1122 if (fflags & (1<<i))
1126 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1127 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1128 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1129 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1130 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1131 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1132 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1133 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1134 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1135 if (bRead) /* now allocate memory for it */
1137 snew(enerhist->dht->dh, enerhist->dht->nndh);
1138 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1139 for (j = 0; j < enerhist->dht->nndh; j++)
1141 enerhist->dht->ndh[j] = 0;
1142 enerhist->dht->dh[j] = NULL;
1146 case eenhENERGY_DELTA_H_LIST:
1147 for (j = 0; j < enerhist->dht->nndh; j++)
1149 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1152 case eenhENERGY_DELTA_H_STARTTIME:
1153 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1154 case eenhENERGY_DELTA_H_STARTLAMBDA:
1155 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1157 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1158 "You are probably reading a new checkpoint file with old code", i);
1163 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1165 /* Assume we have an old file format and copy sum to sum_sim */
1166 srenew(enerhist->ener_sum_sim, enerhist->nener);
1167 for (i = 0; i < enerhist->nener; i++)
1169 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1171 fflags |= (1<<eenhENERGY_SUM_SIM);
1174 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1175 !(fflags & (1<<eenhENERGY_NSTEPS)))
1177 /* Assume we have an old file format and copy nsum to nsteps */
1178 enerhist->nsteps = enerhist->nsum;
1179 fflags |= (1<<eenhENERGY_NSTEPS);
1181 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1182 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1184 /* Assume we have an old file format and copy nsum to nsteps */
1185 enerhist->nsteps_sim = enerhist->nsum_sim;
1186 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1192 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1197 nlambda = dfhist->nlambda;
1200 for (i = 0; (i < edfhNR && ret == 0); i++)
1202 if (fflags & (1<<i))
1206 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1207 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1208 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1209 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1210 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1211 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1212 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1213 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1214 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1215 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1216 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1217 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1218 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1219 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1222 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1223 "You are probably reading a new checkpoint file with old code", i);
1232 /* This function stores the last whole configuration of the reference and
1233 * average structure in the .cpt file
1235 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1236 edsamstate_t *EDstate, FILE *list)
1243 EDstate->bFromCpt = bRead;
1245 if (EDstate->nED <= 0)
1250 /* When reading, init_edsam has not been called yet,
1251 * so we have to allocate memory first. */
1254 snew(EDstate->nref, EDstate->nED);
1255 snew(EDstate->old_sref, EDstate->nED);
1256 snew(EDstate->nav, EDstate->nED);
1257 snew(EDstate->old_sav, EDstate->nED);
1260 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1261 for (i = 0; i < EDstate->nED; i++)
1263 /* Reference structure SREF */
1264 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1265 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1266 sprintf(buf, "ED%d x_ref", i+1);
1269 snew(EDstate->old_sref[i], EDstate->nref[i]);
1270 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1274 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1277 /* Average structure SAV */
1278 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1279 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1280 sprintf(buf, "ED%d x_av", i+1);
1283 snew(EDstate->old_sav[i], EDstate->nav[i]);
1284 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1288 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1296 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1297 gmx_file_position_t **p_outputfiles, int *nfiles,
1298 FILE *list, int file_version)
1302 gmx_off_t mask = 0xFFFFFFFFL;
1303 int offset_high, offset_low;
1305 gmx_file_position_t *outputfiles;
1307 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1314 snew(*p_outputfiles, *nfiles);
1317 outputfiles = *p_outputfiles;
1319 for (i = 0; i < *nfiles; i++)
1321 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1324 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1325 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1331 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1335 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1339 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1343 buf = outputfiles[i].filename;
1344 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1346 offset = outputfiles[i].offset;
1354 offset_low = (int) (offset & mask);
1355 offset_high = (int) ((offset >> 32) & mask);
1357 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1361 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1366 if (file_version >= 8)
1368 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1373 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1380 outputfiles[i].chksum_size = -1;
1387 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1388 FILE *fplog, t_commrec *cr,
1389 int eIntegrator, int simulation_part,
1390 gmx_bool bExpanded, int elamstats,
1391 gmx_int64_t step, double t, t_state *state)
1401 char *fntemp; /* the temporary checkpoint file name */
1403 char timebuf[STRLEN];
1404 int nppnodes, npmenodes, flag_64bit;
1405 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1406 gmx_file_position_t *outputfiles;
1409 int flags_eks, flags_enh, flags_dfh, i;
1414 if (DOMAINDECOMP(cr))
1416 nppnodes = cr->dd->nnodes;
1417 npmenodes = cr->npmenodes;
1421 nppnodes = cr->nnodes;
1431 #ifndef GMX_NO_RENAME
1432 /* make the new temporary filename */
1433 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1435 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1436 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1437 strcat(fntemp, suffix);
1438 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1440 /* if we can't rename, we just overwrite the cpt file.
1441 * dangerous if interrupted.
1443 snew(fntemp, strlen(fn));
1447 gmx_ctime_r(&now, timebuf, STRLEN);
1451 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1452 gmx_step_str(step, buf), timebuf);
1455 /* Get offsets for open files */
1456 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1458 fp = gmx_fio_open(fntemp, "w");
1460 if (state->ekinstate.bUpToDate)
1463 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1464 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1465 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1473 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1475 flags_enh |= (1<<eenhENERGY_N);
1476 if (state->enerhist.nsum > 0)
1478 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1479 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1481 if (state->enerhist.nsum_sim > 0)
1483 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1484 (1<<eenhENERGY_NSUM_SIM));
1486 if (state->enerhist.dht)
1488 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1489 (1<< eenhENERGY_DELTA_H_LIST) |
1490 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1491 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1497 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1498 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1501 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1503 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1505 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1506 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1514 /* We can check many more things now (CPU, acceleration, etc), but
1515 * it is highly unlikely to have two separate builds with exactly
1516 * the same version, user, time, and build host!
1519 version = gmx_strdup(VERSION);
1520 btime = gmx_strdup(BUILD_TIME);
1521 buser = gmx_strdup(BUILD_USER);
1522 bhost = gmx_strdup(BUILD_HOST);
1524 double_prec = GMX_CPT_BUILD_DP;
1525 fprog = gmx_strdup(Program());
1527 ftime = &(timebuf[0]);
1529 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1530 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1531 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1532 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1533 &state->natoms, &state->ngtc, &state->nnhpres,
1534 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1535 &state->edsamstate.nED,
1544 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1545 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1546 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1547 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1548 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1549 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1552 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1555 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1557 /* we really, REALLY, want to make sure to physically write the checkpoint,
1558 and all the files it depends on, out to disk. Because we've
1559 opened the checkpoint with gmx_fio_open(), it's in our list
1561 ret = gmx_fio_all_output_fsync();
1567 "Cannot fsync '%s'; maybe you are out of disk space?",
1568 gmx_fio_getname(ret));
1570 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1580 if (gmx_fio_close(fp) != 0)
1582 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1585 /* we don't move the checkpoint if the user specified they didn't want it,
1586 or if the fsyncs failed */
1587 #ifndef GMX_NO_RENAME
1588 if (!bNumberAndKeep && !ret)
1592 /* Rename the previous checkpoint file */
1594 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1595 strcat(buf, "_prev");
1596 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1598 /* we copy here so that if something goes wrong between now and
1599 * the rename below, there's always a state.cpt.
1600 * If renames are atomic (such as in POSIX systems),
1601 * this copying should be unneccesary.
1603 gmx_file_copy(fn, buf, FALSE);
1604 /* We don't really care if this fails:
1605 * there's already a new checkpoint.
1608 gmx_file_rename(fn, buf);
1611 if (gmx_file_rename(fntemp, fn) != 0)
1613 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1616 #endif /* GMX_NO_RENAME */
1622 /*code for alternate checkpointing scheme. moved from top of loop over
1624 fcRequestCheckPoint();
1625 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1627 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1629 #endif /* end GMX_FAHCORE block */
1632 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1636 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1637 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1638 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1639 for (i = 0; i < estNR; i++)
1641 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1643 fprintf(fplog, " %24s %11s %11s\n",
1645 (sflags & (1<<i)) ? " present " : "not present",
1646 (fflags & (1<<i)) ? " present " : "not present");
1651 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1653 FILE *fp = fplog ? fplog : stderr;
1657 fprintf(fp, " %s mismatch,\n", type);
1658 fprintf(fp, " current program: %d\n", p);
1659 fprintf(fp, " checkpoint file: %d\n", f);
1665 static void check_string(FILE *fplog, const char *type, const char *p,
1666 const char *f, gmx_bool *mm)
1668 FILE *fp = fplog ? fplog : stderr;
1670 if (strcmp(p, f) != 0)
1672 fprintf(fp, " %s mismatch,\n", type);
1673 fprintf(fp, " current program: %s\n", p);
1674 fprintf(fp, " checkpoint file: %s\n", f);
1680 static void check_match(FILE *fplog,
1682 char *btime, char *buser, char *bhost, int double_prec,
1684 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1685 ivec dd_nc, ivec dd_nc_f)
1692 check_string(fplog, "Version", VERSION, version, &mm);
1693 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1694 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1695 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1696 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1697 check_string(fplog, "Program name", Program(), fprog, &mm);
1699 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1708 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1711 if (cr->npmenodes >= 0)
1713 npp -= cr->npmenodes;
1717 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1718 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1719 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1726 "Gromacs binary or parallel settings not identical to previous run.\n"
1727 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1728 fplog ? ",\n see the log file for details" : "");
1733 "Gromacs binary or parallel settings not identical to previous run.\n"
1734 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1739 static void read_checkpoint(const char *fn, FILE **pfplog,
1740 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1741 int eIntegrator, int *init_fep_state, gmx_int64_t *step, double *t,
1742 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1743 int *simulation_part,
1744 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1749 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1751 char filename[STRLEN], buf[STEPSTRSIZE];
1752 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1754 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1757 gmx_file_position_t *outputfiles;
1759 t_fileio *chksum_file;
1760 FILE * fplog = *pfplog;
1761 unsigned char digest[16];
1762 #if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
1763 struct flock fl; /* don't initialize here: the struct order is OS
1767 const char *int_warn =
1768 "WARNING: The checkpoint file was generated with integrator %s,\n"
1769 " while the simulation uses integrator %s\n\n";
1770 const char *sd_note =
1771 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1772 " while the simulation uses %d SD or BD nodes,\n"
1773 " continuation will be exact, except for the random state\n\n";
1775 #if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
1776 fl.l_type = F_WRLCK;
1777 fl.l_whence = SEEK_SET;
1786 "read_checkpoint not (yet) supported with particle decomposition");
1789 fp = gmx_fio_open(fn, "r");
1790 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1791 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1792 &eIntegrator_f, simulation_part, step, t,
1793 &nppnodes_f, dd_nc_f, &npmenodes_f,
1794 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1795 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1796 &state->edsamstate.nED, NULL);
1798 if (bAppendOutputFiles &&
1799 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1801 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1804 if (cr == NULL || MASTER(cr))
1806 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1810 /* This will not be written if we do appending, since fplog is still NULL then */
1813 fprintf(fplog, "\n");
1814 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1815 fprintf(fplog, " file generated by: %s\n", fprog);
1816 fprintf(fplog, " file generated at: %s\n", ftime);
1817 fprintf(fplog, " GROMACS build time: %s\n", btime);
1818 fprintf(fplog, " GROMACS build user: %s\n", buser);
1819 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1820 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1821 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1822 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1823 fprintf(fplog, " time: %f\n", *t);
1824 fprintf(fplog, "\n");
1827 if (natoms != state->natoms)
1829 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1831 if (ngtc != state->ngtc)
1833 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1835 if (nnhpres != state->nnhpres)
1837 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1840 if (nlambda != state->dfhist.nlambda)
1842 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1845 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1846 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1848 if (eIntegrator_f != eIntegrator)
1852 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1854 if (bAppendOutputFiles)
1857 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1858 "Stopping the run to prevent you from ruining all your data...\n"
1859 "If you _really_ know what you are doing, try with the -noappend option.\n");
1863 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1872 else if (bPartDecomp)
1874 nppnodes = cr->nnodes;
1877 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1879 if (cr->npmenodes < 0)
1881 cr->npmenodes = npmenodes_f;
1883 nppnodes = cr->nnodes - cr->npmenodes;
1884 if (nppnodes == nppnodes_f)
1886 for (d = 0; d < DIM; d++)
1890 dd_nc[d] = dd_nc_f[d];
1897 /* The number of PP nodes has not been set yet */
1901 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1903 /* Correct the RNG state size for the number of PP nodes.
1904 * Such assignments should all be moved to one central function.
1906 state->nrng = nppnodes*gmx_rng_n();
1907 state->nrngi = nppnodes;
1911 if (fflags != state->flags)
1916 if (bAppendOutputFiles)
1919 "Output file appending requested, but input and checkpoint states are not identical.\n"
1920 "Stopping the run to prevent you from ruining all your data...\n"
1921 "You can try with the -noappend option, and get more info in the log file.\n");
1924 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1926 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1931 "WARNING: The checkpoint state entries do not match the simulation,\n"
1932 " see the log file for details\n\n");
1938 print_flag_mismatch(fplog, state->flags, fflags);
1943 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1944 nppnodes != nppnodes_f)
1949 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1953 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1958 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1959 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1962 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1963 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1964 Investigate for 5.0. */
1969 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1974 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1975 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1977 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1978 flags_enh, &state->enerhist, NULL);
1984 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1990 if (file_version < 6)
1992 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1994 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1997 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1999 state->enerhist.nsum = *step;
2000 state->enerhist.nsum_sim = *step;
2003 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2009 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2015 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2020 if (gmx_fio_close(fp) != 0)
2022 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2031 /* If the user wants to append to output files,
2032 * we use the file pointer positions of the output files stored
2033 * in the checkpoint file and truncate the files such that any frames
2034 * written after the checkpoint time are removed.
2035 * All files are md5sum checked such that we can be sure that
2036 * we do not truncate other (maybe imprortant) files.
2038 if (bAppendOutputFiles)
2040 if (fn2ftp(outputfiles[0].filename) != efLOG)
2042 /* make sure first file is log file so that it is OK to use it for
2045 gmx_fatal(FARGS, "The first output file should always be the log "
2046 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2048 for (i = 0; i < nfiles; i++)
2050 if (outputfiles[i].offset < 0)
2052 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2053 "is larger than 2 GB, but mdrun did not support large file"
2054 " offsets. Can not append. Run mdrun with -noappend",
2055 outputfiles[i].filename);
2058 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2061 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2066 /* Note that there are systems where the lock operation
2067 * will succeed, but a second process can also lock the file.
2068 * We should probably try to detect this.
2070 #if defined __native_client__
2074 #elif defined GMX_NATIVE_WINDOWS
2075 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2077 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2080 if (errno == ENOSYS)
2084 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2088 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2091 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2095 else if (errno == EACCES || errno == EAGAIN)
2097 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2098 "simulation?", outputfiles[i].filename);
2102 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2103 outputfiles[i].filename, strerror(errno));
2108 /* compute md5 chksum */
2109 if (outputfiles[i].chksum_size != -1)
2111 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2112 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2114 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2115 outputfiles[i].chksum_size,
2116 outputfiles[i].filename);
2119 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2121 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2123 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2128 if (i == 0) /*open log file here - so that lock is never lifted
2129 after chksum is calculated */
2131 *pfplog = gmx_fio_getfp(chksum_file);
2135 gmx_fio_close(chksum_file);
2138 /* compare md5 chksum */
2139 if (outputfiles[i].chksum_size != -1 &&
2140 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2144 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2145 for (j = 0; j < 16; j++)
2147 fprintf(debug, "%02x", digest[j]);
2149 fprintf(debug, "\n");
2151 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2152 outputfiles[i].filename);
2157 if (i != 0) /*log file is already seeked to correct position */
2159 #ifdef GMX_NATIVE_WINDOWS
2160 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2162 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2166 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2176 void load_checkpoint(const char *fn, FILE **fplog,
2177 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2178 t_inputrec *ir, t_state *state,
2179 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2180 gmx_bool bAppend, gmx_bool bForceAppend)
2187 /* Read the state from the checkpoint file */
2188 read_checkpoint(fn, fplog,
2189 cr, bPartDecomp, dd_nc,
2190 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2191 &ir->simulation_part, bAppend, bForceAppend);
2195 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2196 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2197 gmx_bcast(sizeof(step), &step, cr);
2198 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2199 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2201 ir->bContinuation = TRUE;
2202 if (ir->nsteps >= 0)
2204 ir->nsteps += ir->init_step - step;
2206 ir->init_step = step;
2207 ir->simulation_part += 1;
2210 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2211 gmx_int64_t *step, double *t, t_state *state,
2213 int *nfiles, gmx_file_position_t **outputfiles)
2216 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2221 int flags_eks, flags_enh, flags_dfh;
2223 gmx_file_position_t *files_loc = NULL;
2226 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2227 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2228 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2229 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2230 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2231 &state->edsamstate.nED, NULL);
2233 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2238 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2243 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2244 flags_enh, &state->enerhist, NULL);
2249 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2255 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2261 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2262 outputfiles != NULL ? outputfiles : &files_loc,
2263 outputfiles != NULL ? nfiles : &nfiles_loc,
2264 NULL, file_version);
2265 if (files_loc != NULL)
2275 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2289 read_checkpoint_state(const char *fn, int *simulation_part,
2290 gmx_int64_t *step, double *t, t_state *state)
2294 fp = gmx_fio_open(fn, "r");
2295 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2296 if (gmx_fio_close(fp) != 0)
2298 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2302 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2304 /* This next line is nasty because the sub-structures of t_state
2305 * cannot be assumed to be zeroed (or even initialized in ways the
2306 * rest of the code might assume). Using snew would be better, but
2307 * this will all go away for 5.0. */
2309 int simulation_part;
2313 init_state(&state, 0, 0, 0, 0, 0);
2315 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2317 fr->natoms = state.natoms;
2320 fr->step = gmx_int64_to_int(step,
2321 "conversion of checkpoint to trajectory");
2325 fr->lambda = state.lambda[efptFEP];
2326 fr->fep_state = state.fep_state;
2328 fr->bX = (state.flags & (1<<estX));
2334 fr->bV = (state.flags & (1<<estV));
2341 fr->bBox = (state.flags & (1<<estBOX));
2344 copy_mat(state.box, fr->box);
2349 void list_checkpoint(const char *fn, FILE *out)
2353 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2355 int eIntegrator, simulation_part, nppnodes, npme;
2360 int flags_eks, flags_enh, flags_dfh;
2364 gmx_file_position_t *outputfiles;
2367 init_state(&state, -1, -1, -1, -1, 0);
2369 fp = gmx_fio_open(fn, "r");
2370 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2371 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2372 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2373 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2374 &(state.dfhist.nlambda), &state.flags,
2375 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2376 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2381 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2386 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2387 flags_enh, &state.enerhist, out);
2391 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2392 flags_dfh, &state.dfhist, out);
2397 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2402 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2407 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2414 if (gmx_fio_close(fp) != 0)
2416 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2423 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2427 /* Check if the output file name stored in the checkpoint file
2428 * is one of the output file names of mdrun.
2432 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2437 return (i < nfile && gmx_fexist(fnm_cp));
2440 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2441 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2442 gmx_int64_t *cpt_step, t_commrec *cr,
2443 gmx_bool bAppendReq,
2444 int nfile, const t_filenm fnm[],
2445 const char *part_suffix, gmx_bool *bAddPart)
2448 gmx_int64_t step = 0;
2450 /* This next line is nasty because the sub-structures of t_state
2451 * cannot be assumed to be zeroed (or even initialized in ways the
2452 * rest of the code might assume). Using snew would be better, but
2453 * this will all go away for 5.0. */
2456 gmx_file_position_t *outputfiles;
2459 char *fn, suf_up[STRLEN];
2465 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2467 *simulation_part = 0;
2471 init_state(&state, 0, 0, 0, 0, 0);
2473 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2474 &nfiles, &outputfiles);
2475 if (gmx_fio_close(fp) != 0)
2477 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2484 for (f = 0; f < nfiles; f++)
2486 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2491 if (nexist == nfiles)
2493 bAppend = bAppendReq;
2495 else if (nexist > 0)
2498 "Output file appending has been requested,\n"
2499 "but some output files listed in the checkpoint file %s\n"
2500 "are not present or are named differently by the current program:\n",
2502 fprintf(stderr, "output files present:");
2503 for (f = 0; f < nfiles; f++)
2505 if (exist_output_file(outputfiles[f].filename,
2508 fprintf(stderr, " %s", outputfiles[f].filename);
2511 fprintf(stderr, "\n");
2512 fprintf(stderr, "output files not present or named differently:");
2513 for (f = 0; f < nfiles; f++)
2515 if (!exist_output_file(outputfiles[f].filename,
2518 fprintf(stderr, " %s", outputfiles[f].filename);
2521 fprintf(stderr, "\n");
2523 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2531 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2533 fn = outputfiles[0].filename;
2534 if (strlen(fn) < 4 ||
2535 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2537 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2539 /* Set bAddPart to whether the suffix string '.part' is present
2540 * in the log file name.
2542 strcpy(suf_up, part_suffix);
2544 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2545 strstr(fn, suf_up) != NULL);
2553 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2555 if (*simulation_part > 0 && bAppendReq)
2557 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2558 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2561 if (NULL != cpt_step)