2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
47 #ifdef HAVE_SYS_TIME_H
55 #ifdef GMX_NATIVE_WINDOWS
58 #include <sys/locking.h>
68 #include "gmx_random.h"
69 #include "checkpoint.h"
73 #include "gromacs/fileio/filenm.h"
74 #include "gromacs/fileio/futil.h"
75 #include "gromacs/fileio/gmxfio.h"
76 #include "gromacs/fileio/xdrf.h"
77 #include "gromacs/fileio/xdr_datatype.h"
79 #include "buildinfo.h"
85 #define CPT_MAGIC1 171817
86 #define CPT_MAGIC2 171819
87 #define CPTSTRLEN 1024
90 #define GMX_CPT_BUILD_DP 1
92 #define GMX_CPT_BUILD_DP 0
95 /* cpt_version should normally only be changed
96 * when the header of footer format changes.
97 * The state data format itself is backward and forward compatible.
98 * But old code can not read a new entry that is present in the file
99 * (but can read a new format when new entries are not present).
101 static const int cpt_version = 15;
104 const char *est_names[estNR] =
107 "box", "box-rel", "box-v", "pres_prev",
108 "nosehoover-xi", "thermostat-integral",
109 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
110 "disre_initf", "disre_rm3tav",
111 "orire_initf", "orire_Dtav",
112 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
116 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
119 const char *eeks_names[eeksNR] =
121 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
122 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
126 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
127 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
128 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
129 eenhENERGY_DELTA_H_NN,
130 eenhENERGY_DELTA_H_LIST,
131 eenhENERGY_DELTA_H_STARTTIME,
132 eenhENERGY_DELTA_H_STARTLAMBDA,
136 const char *eenh_names[eenhNR] =
138 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
139 "energy_sum_sim", "energy_nsum_sim",
140 "energy_nsteps", "energy_nsteps_sim",
142 "energy_delta_h_list",
143 "energy_delta_h_start_time",
144 "energy_delta_h_start_lambda"
147 /* free energy history variables -- need to be preserved over checkpoint */
149 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
150 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
152 /* free energy history variable names */
153 const char *edfh_names[edfhNR] =
155 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
156 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
159 #ifdef GMX_NATIVE_WINDOWS
161 gmx_wintruncate(const char *filename, __int64 size)
164 /*we do this elsewhere*/
170 fp = fopen(filename, "rb+");
177 return _chsize_s( fileno(fp), size);
184 ecprREAL, ecprRVEC, ecprMATRIX
188 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
190 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
191 cptpEST - state variables.
192 cptpEEKS - Kinetic energy state variables.
193 cptpEENH - Energy history state variables.
194 cptpEDFH - free energy history variables.
198 static const char *st_names(int cptp, int ecpt)
202 case cptpEST: return est_names [ecpt]; break;
203 case cptpEEKS: return eeks_names[ecpt]; break;
204 case cptpEENH: return eenh_names[ecpt]; break;
205 case cptpEDFH: return edfh_names[ecpt]; break;
211 static void cp_warning(FILE *fp)
213 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
216 static void cp_error()
218 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
221 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
229 res = xdr_string(xd, s, CPTSTRLEN);
236 fprintf(list, "%s = %s\n", desc, *s);
241 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
245 res = xdr_int(xd, i);
252 fprintf(list, "%s = %d\n", desc, *i);
257 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
263 fprintf(list, "%s = ", desc);
265 for (j = 0; j < n && res; j++)
267 res &= xdr_u_char(xd, &i[j]);
270 fprintf(list, "%02x", i[j]);
285 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
287 if (do_cpt_int(xd, desc, i, list) < 0)
293 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
296 char buf[STEPSTRSIZE];
298 res = xdr_int64(xd, i);
305 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
309 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
313 res = xdr_double(xd, f);
320 fprintf(list, "%s = %f\n", desc, *f);
324 static void do_cpt_real_err(XDR *xd, real *f)
329 res = xdr_double(xd, f);
331 res = xdr_float(xd, f);
339 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
343 for (i = 0; i < n; i++)
345 for (j = 0; j < DIM; j++)
347 do_cpt_real_err(xd, &f[i][j]);
353 pr_rvecs(list, 0, desc, f, n);
357 /* If nval >= 0, nval is used; on read this should match the passed value.
358 * If nval n<0, *nptr is used; on read the value is stored in nptr
360 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
361 int nval, int *nptr, real **v,
362 FILE *list, int erealtype)
366 int dtc = xdr_datatype_float;
368 int dtc = xdr_datatype_double;
370 real *vp, *va = NULL;
385 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
390 res = xdr_int(xd, &nf);
401 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
410 res = xdr_int(xd, &dt);
417 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
418 st_names(cptp, ecpt), xdr_datatype_names[dtc],
419 xdr_datatype_names[dt]);
421 if (list || !(sflags & (1<<ecpt)))
434 if (dt == xdr_datatype_float)
436 if (dtc == xdr_datatype_float)
444 res = xdr_vector(xd, (char *)vf, nf,
445 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
450 if (dtc != xdr_datatype_float)
452 for (i = 0; i < nf; i++)
461 if (dtc == xdr_datatype_double)
469 res = xdr_vector(xd, (char *)vd, nf,
470 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
475 if (dtc != xdr_datatype_double)
477 for (i = 0; i < nf; i++)
490 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
493 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
496 gmx_incons("Unknown checkpoint real type");
508 /* This function stores n along with the reals for reading,
509 * but on reading it assumes that n matches the value in the checkpoint file,
510 * a fatal error is generated when this is not the case.
512 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
513 int n, real **v, FILE *list)
515 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
518 /* This function does the same as do_cpte_reals,
519 * except that on reading it ignores the passed value of *n
520 * and stored the value read from the checkpoint file in *n.
522 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
523 int *n, real **v, FILE *list)
525 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
528 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
533 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
536 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
537 int n, int **v, FILE *list)
540 int dtc = xdr_datatype_int;
545 res = xdr_int(xd, &nf);
550 if (list == NULL && v != NULL && nf != n)
552 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
555 res = xdr_int(xd, &dt);
562 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
563 st_names(cptp, ecpt), xdr_datatype_names[dtc],
564 xdr_datatype_names[dt]);
566 if (list || !(sflags & (1<<ecpt)) || v == NULL)
579 res = xdr_vector(xd, (char *)vp, nf,
580 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
587 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
597 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
600 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
603 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
604 int n, double **v, FILE *list)
607 int dtc = xdr_datatype_double;
608 double *vp, *va = NULL;
612 res = xdr_int(xd, &nf);
617 if (list == NULL && nf != n)
619 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
622 res = xdr_int(xd, &dt);
629 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
630 st_names(cptp, ecpt), xdr_datatype_names[dtc],
631 xdr_datatype_names[dt]);
633 if (list || !(sflags & (1<<ecpt)))
646 res = xdr_vector(xd, (char *)vp, nf,
647 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
654 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
664 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
665 double *r, FILE *list)
667 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
671 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
672 int n, rvec **v, FILE *list)
676 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
677 n*DIM, NULL, (real **)v, list, ecprRVEC);
680 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
681 matrix v, FILE *list)
686 vr = (real *)&(v[0][0]);
687 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
688 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
690 if (list && ret == 0)
692 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
699 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
700 int n, real **v, FILE *list)
705 char name[CPTSTRLEN];
712 for (i = 0; i < n; i++)
716 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
717 if (list && reti == 0)
719 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
720 pr_reals(list, 0, name, v[i], n);
730 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
731 int n, matrix **v, FILE *list)
734 matrix *vp, *va = NULL;
740 res = xdr_int(xd, &nf);
745 if (list == NULL && nf != n)
747 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
749 if (list || !(sflags & (1<<ecpt)))
762 snew(vr, nf*DIM*DIM);
763 for (i = 0; i < nf; i++)
765 for (j = 0; j < DIM; j++)
767 for (k = 0; k < DIM; k++)
769 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
773 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
774 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
775 for (i = 0; i < nf; i++)
777 for (j = 0; j < DIM; j++)
779 for (k = 0; k < DIM; k++)
781 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
787 if (list && ret == 0)
789 for (i = 0; i < nf; i++)
791 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
802 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
803 char **version, char **btime, char **buser, char **bhost,
805 char **fprog, char **ftime,
806 int *eIntegrator, int *simulation_part,
807 gmx_int64_t *step, double *t,
808 int *nnodes, int *dd_nc, int *npme,
809 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
810 int *nlambda, int *flags_state,
811 int *flags_eks, int *flags_enh, int *flags_dfh,
829 res = xdr_int(xd, &magic);
832 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
834 if (magic != CPT_MAGIC1)
836 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
837 "The checkpoint file is corrupted or not a checkpoint file",
844 if (gethostname(fhost, 255) != 0)
846 sprintf(fhost, "unknown");
849 sprintf(fhost, "unknown");
852 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
853 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
854 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
855 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
856 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
857 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
858 *file_version = cpt_version;
859 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
860 if (*file_version > cpt_version)
862 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
864 if (*file_version >= 13)
866 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
872 if (*file_version >= 12)
874 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
880 do_cpt_int_err(xd, "#atoms", natoms, list);
881 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
882 if (*file_version >= 10)
884 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
890 if (*file_version >= 11)
892 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
898 if (*file_version >= 14)
900 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
906 do_cpt_int_err(xd, "integrator", eIntegrator, list);
907 if (*file_version >= 3)
909 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
913 *simulation_part = 1;
915 if (*file_version >= 5)
917 do_cpt_step_err(xd, "step", step, list);
921 do_cpt_int_err(xd, "step", &idum, list);
924 do_cpt_double_err(xd, "t", t, list);
925 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
927 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
928 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
929 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
930 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
931 do_cpt_int_err(xd, "state flags", flags_state, list);
932 if (*file_version >= 4)
934 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
935 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
940 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
941 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
942 (1<<(estORIRE_DTAV+2)) |
943 (1<<(estORIRE_DTAV+3))));
945 if (*file_version >= 14)
947 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
954 if (*file_version >= 15)
956 do_cpt_int_err(xd, "ED data sets", nED, list);
964 static int do_cpt_footer(XDR *xd, int file_version)
969 if (file_version >= 2)
972 res = xdr_int(xd, &magic);
977 if (magic != CPT_MAGIC2)
986 static int do_cpt_state(XDR *xd, gmx_bool bRead,
987 int fflags, t_state *state,
988 gmx_bool bReadRNG, FILE *list)
991 int **rng_p, **rngi_p;
998 nnht = state->nhchainlength*state->ngtc;
999 nnhtp = state->nhchainlength*state->nnhpres;
1003 rng_p = (int **)&state->ld_rng;
1004 rngi_p = &state->ld_rngi;
1008 /* Do not read the RNG data */
1013 if (bRead) /* we need to allocate space for dfhist if we are reading */
1015 init_df_history(&state->dfhist, state->dfhist.nlambda);
1018 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1020 sflags = state->flags;
1021 for (i = 0; (i < estNR && ret == 0); i++)
1023 if (fflags & (1<<i))
1027 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1028 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1029 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1030 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1031 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1032 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1033 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1034 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1035 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1036 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1037 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1038 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1039 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1040 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1041 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1042 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1043 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1044 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1045 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1046 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1047 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1048 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1049 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1050 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1051 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1052 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1054 gmx_fatal(FARGS, "Unknown state entry %d\n"
1055 "You are probably reading a new checkpoint file with old code", i);
1063 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1071 for (i = 0; (i < eeksNR && ret == 0); i++)
1073 if (fflags & (1<<i))
1078 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1079 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1080 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1081 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1082 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1083 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1084 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1085 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1086 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1087 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1089 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1090 "You are probably reading a new checkpoint file with old code", i);
1099 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1100 int fflags, energyhistory_t *enerhist,
1111 enerhist->nsteps = 0;
1113 enerhist->nsteps_sim = 0;
1114 enerhist->nsum_sim = 0;
1115 enerhist->dht = NULL;
1117 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1119 snew(enerhist->dht, 1);
1120 enerhist->dht->ndh = NULL;
1121 enerhist->dht->dh = NULL;
1122 enerhist->dht->start_lambda_set = FALSE;
1126 for (i = 0; (i < eenhNR && ret == 0); i++)
1128 if (fflags & (1<<i))
1132 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1133 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1134 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1135 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1136 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1137 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1138 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1139 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1140 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1141 if (bRead) /* now allocate memory for it */
1143 snew(enerhist->dht->dh, enerhist->dht->nndh);
1144 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1145 for (j = 0; j < enerhist->dht->nndh; j++)
1147 enerhist->dht->ndh[j] = 0;
1148 enerhist->dht->dh[j] = NULL;
1152 case eenhENERGY_DELTA_H_LIST:
1153 for (j = 0; j < enerhist->dht->nndh; j++)
1155 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1158 case eenhENERGY_DELTA_H_STARTTIME:
1159 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1160 case eenhENERGY_DELTA_H_STARTLAMBDA:
1161 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1163 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1164 "You are probably reading a new checkpoint file with old code", i);
1169 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1171 /* Assume we have an old file format and copy sum to sum_sim */
1172 srenew(enerhist->ener_sum_sim, enerhist->nener);
1173 for (i = 0; i < enerhist->nener; i++)
1175 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1177 fflags |= (1<<eenhENERGY_SUM_SIM);
1180 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1181 !(fflags & (1<<eenhENERGY_NSTEPS)))
1183 /* Assume we have an old file format and copy nsum to nsteps */
1184 enerhist->nsteps = enerhist->nsum;
1185 fflags |= (1<<eenhENERGY_NSTEPS);
1187 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1188 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1190 /* Assume we have an old file format and copy nsum to nsteps */
1191 enerhist->nsteps_sim = enerhist->nsum_sim;
1192 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1198 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1203 nlambda = dfhist->nlambda;
1206 for (i = 0; (i < edfhNR && ret == 0); i++)
1208 if (fflags & (1<<i))
1212 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1213 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1214 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1215 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1216 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1217 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1218 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1219 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1220 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1221 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1222 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1223 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1224 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1225 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1228 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1229 "You are probably reading a new checkpoint file with old code", i);
1238 /* This function stores the last whole configuration of the reference and
1239 * average structure in the .cpt file
1241 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1242 edsamstate_t *EDstate, FILE *list)
1249 EDstate->bFromCpt = bRead;
1251 if (EDstate->nED <= 0)
1256 /* When reading, init_edsam has not been called yet,
1257 * so we have to allocate memory first. */
1260 snew(EDstate->nref, EDstate->nED);
1261 snew(EDstate->old_sref, EDstate->nED);
1262 snew(EDstate->nav, EDstate->nED);
1263 snew(EDstate->old_sav, EDstate->nED);
1266 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1267 for (i = 0; i < EDstate->nED; i++)
1269 /* Reference structure SREF */
1270 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1271 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1272 sprintf(buf, "ED%d x_ref", i+1);
1275 snew(EDstate->old_sref[i], EDstate->nref[i]);
1276 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1280 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1283 /* Average structure SAV */
1284 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1285 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1286 sprintf(buf, "ED%d x_av", i+1);
1289 snew(EDstate->old_sav[i], EDstate->nav[i]);
1290 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1294 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1302 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1303 gmx_file_position_t **p_outputfiles, int *nfiles,
1304 FILE *list, int file_version)
1308 gmx_off_t mask = 0xFFFFFFFFL;
1309 int offset_high, offset_low;
1311 gmx_file_position_t *outputfiles;
1313 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1320 snew(*p_outputfiles, *nfiles);
1323 outputfiles = *p_outputfiles;
1325 for (i = 0; i < *nfiles; i++)
1327 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1330 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1331 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1337 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1341 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1345 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1349 buf = outputfiles[i].filename;
1350 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1352 offset = outputfiles[i].offset;
1360 offset_low = (int) (offset & mask);
1361 offset_high = (int) ((offset >> 32) & mask);
1363 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1367 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1372 if (file_version >= 8)
1374 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1379 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1386 outputfiles[i].chksum_size = -1;
1393 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1394 FILE *fplog, t_commrec *cr,
1395 int eIntegrator, int simulation_part,
1396 gmx_bool bExpanded, int elamstats,
1397 gmx_int64_t step, double t, t_state *state)
1407 char *fntemp; /* the temporary checkpoint file name */
1409 char timebuf[STRLEN];
1410 int nppnodes, npmenodes, flag_64bit;
1411 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1412 gmx_file_position_t *outputfiles;
1415 int flags_eks, flags_enh, flags_dfh, i;
1420 if (DOMAINDECOMP(cr))
1422 nppnodes = cr->dd->nnodes;
1423 npmenodes = cr->npmenodes;
1427 nppnodes = cr->nnodes;
1437 /* make the new temporary filename */
1438 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1440 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1441 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1442 strcat(fntemp, suffix);
1443 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1446 gmx_ctime_r(&now, timebuf, STRLEN);
1450 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1451 gmx_step_str(step, buf), timebuf);
1454 /* Get offsets for open files */
1455 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1457 fp = gmx_fio_open(fntemp, "w");
1459 if (state->ekinstate.bUpToDate)
1462 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1463 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1464 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1472 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1474 flags_enh |= (1<<eenhENERGY_N);
1475 if (state->enerhist.nsum > 0)
1477 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1478 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1480 if (state->enerhist.nsum_sim > 0)
1482 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1483 (1<<eenhENERGY_NSUM_SIM));
1485 if (state->enerhist.dht)
1487 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1488 (1<< eenhENERGY_DELTA_H_LIST) |
1489 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1490 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1496 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1497 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1500 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1502 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1504 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1505 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1513 /* We can check many more things now (CPU, acceleration, etc), but
1514 * it is highly unlikely to have two separate builds with exactly
1515 * the same version, user, time, and build host!
1518 version = gmx_strdup(VERSION);
1519 btime = gmx_strdup(BUILD_TIME);
1520 buser = gmx_strdup(BUILD_USER);
1521 bhost = gmx_strdup(BUILD_HOST);
1523 double_prec = GMX_CPT_BUILD_DP;
1524 fprog = gmx_strdup(Program());
1526 ftime = &(timebuf[0]);
1528 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1529 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1530 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1531 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1532 &state->natoms, &state->ngtc, &state->nnhpres,
1533 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1534 &state->edsamstate.nED,
1543 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1544 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1545 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1546 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1547 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1548 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1551 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1554 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1556 /* we really, REALLY, want to make sure to physically write the checkpoint,
1557 and all the files it depends on, out to disk. Because we've
1558 opened the checkpoint with gmx_fio_open(), it's in our list
1560 ret = gmx_fio_all_output_fsync();
1566 "Cannot fsync '%s'; maybe you are out of disk space?",
1567 gmx_fio_getname(ret));
1569 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1579 if (gmx_fio_close(fp) != 0)
1581 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1584 /* we don't move the checkpoint if the user specified they didn't want it,
1585 or if the fsyncs failed */
1586 if (!bNumberAndKeep && !ret)
1590 /* Rename the previous checkpoint file */
1592 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1593 strcat(buf, "_prev");
1594 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1596 /* we copy here so that if something goes wrong between now and
1597 * the rename below, there's always a state.cpt.
1598 * If renames are atomic (such as in POSIX systems),
1599 * this copying should be unneccesary.
1601 gmx_file_copy(fn, buf, FALSE);
1602 /* We don't really care if this fails:
1603 * there's already a new checkpoint.
1606 gmx_file_rename(fn, buf);
1609 if (gmx_file_rename(fntemp, fn) != 0)
1611 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1619 /*code for alternate checkpointing scheme. moved from top of loop over
1621 fcRequestCheckPoint();
1622 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1624 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1626 #endif /* end GMX_FAHCORE block */
1629 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1633 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1634 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1635 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1636 for (i = 0; i < estNR; i++)
1638 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1640 fprintf(fplog, " %24s %11s %11s\n",
1642 (sflags & (1<<i)) ? " present " : "not present",
1643 (fflags & (1<<i)) ? " present " : "not present");
1648 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1650 FILE *fp = fplog ? fplog : stderr;
1654 fprintf(fp, " %s mismatch,\n", type);
1655 fprintf(fp, " current program: %d\n", p);
1656 fprintf(fp, " checkpoint file: %d\n", f);
1662 static void check_string(FILE *fplog, const char *type, const char *p,
1663 const char *f, gmx_bool *mm)
1665 FILE *fp = fplog ? fplog : stderr;
1667 if (strcmp(p, f) != 0)
1669 fprintf(fp, " %s mismatch,\n", type);
1670 fprintf(fp, " current program: %s\n", p);
1671 fprintf(fp, " checkpoint file: %s\n", f);
1677 static void check_match(FILE *fplog,
1679 char *btime, char *buser, char *bhost, int double_prec,
1681 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1682 ivec dd_nc, ivec dd_nc_f)
1689 check_string(fplog, "Version", VERSION, version, &mm);
1690 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1691 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1692 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1693 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1694 check_string(fplog, "Program name", Program(), fprog, &mm);
1696 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1705 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1708 if (cr->npmenodes >= 0)
1710 npp -= cr->npmenodes;
1714 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1715 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1716 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1723 "Gromacs binary or parallel settings not identical to previous run.\n"
1724 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1725 fplog ? ",\n see the log file for details" : "");
1730 "Gromacs binary or parallel settings not identical to previous run.\n"
1731 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1736 static void read_checkpoint(const char *fn, FILE **pfplog,
1737 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1738 int eIntegrator, int *init_fep_state, gmx_int64_t *step, double *t,
1739 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1740 int *simulation_part,
1741 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1746 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1748 char filename[STRLEN], buf[STEPSTRSIZE];
1749 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1751 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1754 gmx_file_position_t *outputfiles;
1756 t_fileio *chksum_file;
1757 FILE * fplog = *pfplog;
1758 unsigned char digest[16];
1759 #ifndef GMX_NATIVE_WINDOWS
1760 struct flock fl; /* don't initialize here: the struct order is OS
1764 const char *int_warn =
1765 "WARNING: The checkpoint file was generated with integrator %s,\n"
1766 " while the simulation uses integrator %s\n\n";
1767 const char *sd_note =
1768 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1769 " while the simulation uses %d SD or BD nodes,\n"
1770 " continuation will be exact, except for the random state\n\n";
1772 #ifndef GMX_NATIVE_WINDOWS
1773 fl.l_type = F_WRLCK;
1774 fl.l_whence = SEEK_SET;
1783 "read_checkpoint not (yet) supported with particle decomposition");
1786 fp = gmx_fio_open(fn, "r");
1787 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1788 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1789 &eIntegrator_f, simulation_part, step, t,
1790 &nppnodes_f, dd_nc_f, &npmenodes_f,
1791 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1792 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1793 &state->edsamstate.nED, NULL);
1795 if (bAppendOutputFiles &&
1796 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1798 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1801 if (cr == NULL || MASTER(cr))
1803 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1807 /* This will not be written if we do appending, since fplog is still NULL then */
1810 fprintf(fplog, "\n");
1811 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1812 fprintf(fplog, " file generated by: %s\n", fprog);
1813 fprintf(fplog, " file generated at: %s\n", ftime);
1814 fprintf(fplog, " GROMACS build time: %s\n", btime);
1815 fprintf(fplog, " GROMACS build user: %s\n", buser);
1816 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1817 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1818 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1819 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1820 fprintf(fplog, " time: %f\n", *t);
1821 fprintf(fplog, "\n");
1824 if (natoms != state->natoms)
1826 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1828 if (ngtc != state->ngtc)
1830 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1832 if (nnhpres != state->nnhpres)
1834 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1837 if (nlambda != state->dfhist.nlambda)
1839 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1842 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1843 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1845 if (eIntegrator_f != eIntegrator)
1849 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1851 if (bAppendOutputFiles)
1854 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1855 "Stopping the run to prevent you from ruining all your data...\n"
1856 "If you _really_ know what you are doing, try with the -noappend option.\n");
1860 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1869 else if (bPartDecomp)
1871 nppnodes = cr->nnodes;
1874 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1876 if (cr->npmenodes < 0)
1878 cr->npmenodes = npmenodes_f;
1880 nppnodes = cr->nnodes - cr->npmenodes;
1881 if (nppnodes == nppnodes_f)
1883 for (d = 0; d < DIM; d++)
1887 dd_nc[d] = dd_nc_f[d];
1894 /* The number of PP nodes has not been set yet */
1898 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1900 /* Correct the RNG state size for the number of PP nodes.
1901 * Such assignments should all be moved to one central function.
1903 state->nrng = nppnodes*gmx_rng_n();
1904 state->nrngi = nppnodes;
1908 if (fflags != state->flags)
1913 if (bAppendOutputFiles)
1916 "Output file appending requested, but input and checkpoint states are not identical.\n"
1917 "Stopping the run to prevent you from ruining all your data...\n"
1918 "You can try with the -noappend option, and get more info in the log file.\n");
1921 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1923 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1928 "WARNING: The checkpoint state entries do not match the simulation,\n"
1929 " see the log file for details\n\n");
1935 print_flag_mismatch(fplog, state->flags, fflags);
1940 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1941 nppnodes != nppnodes_f)
1946 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1950 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1955 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1956 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1959 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1960 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1961 Investigate for 5.0. */
1966 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1971 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1972 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1974 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1975 flags_enh, &state->enerhist, NULL);
1981 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1987 if (file_version < 6)
1989 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1991 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1994 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1996 state->enerhist.nsum = *step;
1997 state->enerhist.nsum_sim = *step;
2000 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2006 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2012 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2017 if (gmx_fio_close(fp) != 0)
2019 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2028 /* If the user wants to append to output files,
2029 * we use the file pointer positions of the output files stored
2030 * in the checkpoint file and truncate the files such that any frames
2031 * written after the checkpoint time are removed.
2032 * All files are md5sum checked such that we can be sure that
2033 * we do not truncate other (maybe imprortant) files.
2035 if (bAppendOutputFiles)
2037 if (fn2ftp(outputfiles[0].filename) != efLOG)
2039 /* make sure first file is log file so that it is OK to use it for
2042 gmx_fatal(FARGS, "The first output file should always be the log "
2043 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2045 for (i = 0; i < nfiles; i++)
2047 if (outputfiles[i].offset < 0)
2049 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2050 "is larger than 2 GB, but mdrun did not support large file"
2051 " offsets. Can not append. Run mdrun with -noappend",
2052 outputfiles[i].filename);
2055 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2058 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2063 /* Note that there are systems where the lock operation
2064 * will succeed, but a second process can also lock the file.
2065 * We should probably try to detect this.
2067 #ifndef GMX_NATIVE_WINDOWS
2068 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2071 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2074 if (errno == ENOSYS)
2078 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2082 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2085 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2089 else if (errno == EACCES || errno == EAGAIN)
2091 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2092 "simulation?", outputfiles[i].filename);
2096 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2097 outputfiles[i].filename, strerror(errno));
2102 /* compute md5 chksum */
2103 if (outputfiles[i].chksum_size != -1)
2105 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2106 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2108 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2109 outputfiles[i].chksum_size,
2110 outputfiles[i].filename);
2113 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2115 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2117 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2122 if (i == 0) /*open log file here - so that lock is never lifted
2123 after chksum is calculated */
2125 *pfplog = gmx_fio_getfp(chksum_file);
2129 gmx_fio_close(chksum_file);
2132 /* compare md5 chksum */
2133 if (outputfiles[i].chksum_size != -1 &&
2134 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2138 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2139 for (j = 0; j < 16; j++)
2141 fprintf(debug, "%02x", digest[j]);
2143 fprintf(debug, "\n");
2145 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2146 outputfiles[i].filename);
2151 if (i != 0) /*log file is already seeked to correct position */
2153 #ifdef GMX_NATIVE_WINDOWS
2154 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2156 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2160 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2170 void load_checkpoint(const char *fn, FILE **fplog,
2171 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2172 t_inputrec *ir, t_state *state,
2173 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2174 gmx_bool bAppend, gmx_bool bForceAppend)
2181 /* Read the state from the checkpoint file */
2182 read_checkpoint(fn, fplog,
2183 cr, bPartDecomp, dd_nc,
2184 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2185 &ir->simulation_part, bAppend, bForceAppend);
2189 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2190 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2191 gmx_bcast(sizeof(step), &step, cr);
2192 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2193 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2195 ir->bContinuation = TRUE;
2196 if (ir->nsteps >= 0)
2198 ir->nsteps += ir->init_step - step;
2200 ir->init_step = step;
2201 ir->simulation_part += 1;
2204 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2205 gmx_int64_t *step, double *t, t_state *state,
2207 int *nfiles, gmx_file_position_t **outputfiles)
2210 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2215 int flags_eks, flags_enh, flags_dfh;
2217 gmx_file_position_t *files_loc = NULL;
2220 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2221 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2222 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2223 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2224 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2225 &state->edsamstate.nED, NULL);
2227 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2232 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2237 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2238 flags_enh, &state->enerhist, NULL);
2243 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2249 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2255 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2256 outputfiles != NULL ? outputfiles : &files_loc,
2257 outputfiles != NULL ? nfiles : &nfiles_loc,
2258 NULL, file_version);
2259 if (files_loc != NULL)
2269 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2283 read_checkpoint_state(const char *fn, int *simulation_part,
2284 gmx_int64_t *step, double *t, t_state *state)
2288 fp = gmx_fio_open(fn, "r");
2289 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2290 if (gmx_fio_close(fp) != 0)
2292 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2296 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2298 /* This next line is nasty because the sub-structures of t_state
2299 * cannot be assumed to be zeroed (or even initialized in ways the
2300 * rest of the code might assume). Using snew would be better, but
2301 * this will all go away for 5.0. */
2303 int simulation_part;
2307 init_state(&state, 0, 0, 0, 0, 0);
2309 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2311 fr->natoms = state.natoms;
2314 fr->step = gmx_int64_to_int(step,
2315 "conversion of checkpoint to trajectory");
2319 fr->lambda = state.lambda[efptFEP];
2320 fr->fep_state = state.fep_state;
2322 fr->bX = (state.flags & (1<<estX));
2328 fr->bV = (state.flags & (1<<estV));
2335 fr->bBox = (state.flags & (1<<estBOX));
2338 copy_mat(state.box, fr->box);
2343 void list_checkpoint(const char *fn, FILE *out)
2347 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2349 int eIntegrator, simulation_part, nppnodes, npme;
2354 int flags_eks, flags_enh, flags_dfh;
2358 gmx_file_position_t *outputfiles;
2361 init_state(&state, -1, -1, -1, -1, 0);
2363 fp = gmx_fio_open(fn, "r");
2364 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2365 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2366 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2367 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2368 &(state.dfhist.nlambda), &state.flags,
2369 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2370 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2375 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2380 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2381 flags_enh, &state.enerhist, out);
2385 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2386 flags_dfh, &state.dfhist, out);
2391 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2396 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2401 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2408 if (gmx_fio_close(fp) != 0)
2410 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2417 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2421 /* Check if the output file name stored in the checkpoint file
2422 * is one of the output file names of mdrun.
2426 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2431 return (i < nfile && gmx_fexist(fnm_cp));
2434 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2435 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2436 gmx_int64_t *cpt_step, t_commrec *cr,
2437 gmx_bool bAppendReq,
2438 int nfile, const t_filenm fnm[],
2439 const char *part_suffix, gmx_bool *bAddPart)
2442 gmx_int64_t step = 0;
2444 /* This next line is nasty because the sub-structures of t_state
2445 * cannot be assumed to be zeroed (or even initialized in ways the
2446 * rest of the code might assume). Using snew would be better, but
2447 * this will all go away for 5.0. */
2450 gmx_file_position_t *outputfiles;
2453 char *fn, suf_up[STRLEN];
2459 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2461 *simulation_part = 0;
2465 init_state(&state, 0, 0, 0, 0, 0);
2467 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2468 &nfiles, &outputfiles);
2469 if (gmx_fio_close(fp) != 0)
2471 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2478 for (f = 0; f < nfiles; f++)
2480 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2485 if (nexist == nfiles)
2487 bAppend = bAppendReq;
2489 else if (nexist > 0)
2492 "Output file appending has been requested,\n"
2493 "but some output files listed in the checkpoint file %s\n"
2494 "are not present or are named differently by the current program:\n",
2496 fprintf(stderr, "output files present:");
2497 for (f = 0; f < nfiles; f++)
2499 if (exist_output_file(outputfiles[f].filename,
2502 fprintf(stderr, " %s", outputfiles[f].filename);
2505 fprintf(stderr, "\n");
2506 fprintf(stderr, "output files not present or named differently:");
2507 for (f = 0; f < nfiles; f++)
2509 if (!exist_output_file(outputfiles[f].filename,
2512 fprintf(stderr, " %s", outputfiles[f].filename);
2515 fprintf(stderr, "\n");
2517 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2525 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2527 fn = outputfiles[0].filename;
2528 if (strlen(fn) < 4 ||
2529 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2531 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2533 /* Set bAddPart to whether the suffix string '.part' is present
2534 * in the log file name.
2536 strcpy(suf_up, part_suffix);
2538 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2539 strstr(fn, suf_up) != NULL);
2547 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2549 if (*simulation_part > 0 && bAppendReq)
2551 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2552 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2555 if (NULL != cpt_step)