2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* The source code in this file should be thread-safe.
37 Please keep it that way. */
47 #ifdef HAVE_SYS_TIME_H
55 #ifdef GMX_NATIVE_WINDOWS
58 #include <sys/locking.h>
68 #include "gmx_random.h"
69 #include "checkpoint.h"
73 #include "gromacs/fileio/filenm.h"
74 #include "gromacs/fileio/futil.h"
75 #include "gromacs/fileio/gmxfio.h"
76 #include "gromacs/fileio/xdrf.h"
77 #include "gromacs/fileio/xdr_datatype.h"
79 #include "buildinfo.h"
85 #define CPT_MAGIC1 171817
86 #define CPT_MAGIC2 171819
87 #define CPTSTRLEN 1024
90 #define GMX_CPT_BUILD_DP 1
92 #define GMX_CPT_BUILD_DP 0
95 /* cpt_version should normally only be changed
96 * when the header of footer format changes.
97 * The state data format itself is backward and forward compatible.
98 * But old code can not read a new entry that is present in the file
99 * (but can read a new format when new entries are not present).
101 static const int cpt_version = 15;
104 const char *est_names[estNR] =
107 "box", "box-rel", "box-v", "pres_prev",
108 "nosehoover-xi", "thermostat-integral",
109 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
110 "disre_initf", "disre_rm3tav",
111 "orire_initf", "orire_Dtav",
112 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
116 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
119 const char *eeks_names[eeksNR] =
121 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
122 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
126 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
127 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
128 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
129 eenhENERGY_DELTA_H_NN,
130 eenhENERGY_DELTA_H_LIST,
131 eenhENERGY_DELTA_H_STARTTIME,
132 eenhENERGY_DELTA_H_STARTLAMBDA,
136 const char *eenh_names[eenhNR] =
138 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
139 "energy_sum_sim", "energy_nsum_sim",
140 "energy_nsteps", "energy_nsteps_sim",
142 "energy_delta_h_list",
143 "energy_delta_h_start_time",
144 "energy_delta_h_start_lambda"
147 /* free energy history variables -- need to be preserved over checkpoint */
149 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
150 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
152 /* free energy history variable names */
153 const char *edfh_names[edfhNR] =
155 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
156 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
159 #ifdef GMX_NATIVE_WINDOWS
161 gmx_wintruncate(const char *filename, __int64 size)
164 /*we do this elsewhere*/
170 fp = fopen(filename, "rb+");
177 return _chsize_s( fileno(fp), size);
184 ecprREAL, ecprRVEC, ecprMATRIX
188 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
190 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
191 cptpEST - state variables.
192 cptpEEKS - Kinetic energy state variables.
193 cptpEENH - Energy history state variables.
194 cptpEDFH - free energy history variables.
198 static const char *st_names(int cptp, int ecpt)
202 case cptpEST: return est_names [ecpt]; break;
203 case cptpEEKS: return eeks_names[ecpt]; break;
204 case cptpEENH: return eenh_names[ecpt]; break;
205 case cptpEDFH: return edfh_names[ecpt]; break;
211 static void cp_warning(FILE *fp)
213 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
216 static void cp_error()
218 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
221 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
229 res = xdr_string(xd, s, CPTSTRLEN);
236 fprintf(list, "%s = %s\n", desc, *s);
241 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
245 res = xdr_int(xd, i);
252 fprintf(list, "%s = %d\n", desc, *i);
257 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
263 fprintf(list, "%s = ", desc);
265 for (j = 0; j < n && res; j++)
267 res &= xdr_u_char(xd, &i[j]);
270 fprintf(list, "%02x", i[j]);
285 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
287 if (do_cpt_int(xd, desc, i, list) < 0)
293 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
296 char buf[STEPSTRSIZE];
298 res = xdr_int64(xd, i);
305 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
309 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
313 res = xdr_double(xd, f);
320 fprintf(list, "%s = %f\n", desc, *f);
324 static void do_cpt_real_err(XDR *xd, real *f)
329 res = xdr_double(xd, f);
331 res = xdr_float(xd, f);
339 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
343 for (i = 0; i < n; i++)
345 for (j = 0; j < DIM; j++)
347 do_cpt_real_err(xd, &f[i][j]);
353 pr_rvecs(list, 0, desc, f, n);
357 /* If nval >= 0, nval is used; on read this should match the passed value.
358 * If nval n<0, *nptr is used; on read the value is stored in nptr
360 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
361 int nval, int *nptr, real **v,
362 FILE *list, int erealtype)
366 int dtc = xdr_datatype_float;
368 int dtc = xdr_datatype_double;
370 real *vp, *va = NULL;
385 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
390 res = xdr_int(xd, &nf);
401 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
410 res = xdr_int(xd, &dt);
417 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
418 st_names(cptp, ecpt), xdr_datatype_names[dtc],
419 xdr_datatype_names[dt]);
421 if (list || !(sflags & (1<<ecpt)))
434 if (dt == xdr_datatype_float)
436 if (dtc == xdr_datatype_float)
444 res = xdr_vector(xd, (char *)vf, nf,
445 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
450 if (dtc != xdr_datatype_float)
452 for (i = 0; i < nf; i++)
461 if (dtc == xdr_datatype_double)
469 res = xdr_vector(xd, (char *)vd, nf,
470 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
475 if (dtc != xdr_datatype_double)
477 for (i = 0; i < nf; i++)
490 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
493 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
496 gmx_incons("Unknown checkpoint real type");
508 /* This function stores n along with the reals for reading,
509 * but on reading it assumes that n matches the value in the checkpoint file,
510 * a fatal error is generated when this is not the case.
512 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
513 int n, real **v, FILE *list)
515 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
518 /* This function does the same as do_cpte_reals,
519 * except that on reading it ignores the passed value of *n
520 * and stored the value read from the checkpoint file in *n.
522 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
523 int *n, real **v, FILE *list)
525 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
528 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
533 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
536 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
537 int n, int **v, FILE *list)
540 int dtc = xdr_datatype_int;
545 res = xdr_int(xd, &nf);
550 if (list == NULL && v != NULL && nf != n)
552 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
555 res = xdr_int(xd, &dt);
562 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
563 st_names(cptp, ecpt), xdr_datatype_names[dtc],
564 xdr_datatype_names[dt]);
566 if (list || !(sflags & (1<<ecpt)) || v == NULL)
579 res = xdr_vector(xd, (char *)vp, nf,
580 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
587 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
597 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
600 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
603 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
604 int n, double **v, FILE *list)
607 int dtc = xdr_datatype_double;
608 double *vp, *va = NULL;
612 res = xdr_int(xd, &nf);
617 if (list == NULL && nf != n)
619 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
622 res = xdr_int(xd, &dt);
629 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
630 st_names(cptp, ecpt), xdr_datatype_names[dtc],
631 xdr_datatype_names[dt]);
633 if (list || !(sflags & (1<<ecpt)))
646 res = xdr_vector(xd, (char *)vp, nf,
647 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
654 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
664 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
665 double *r, FILE *list)
667 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
671 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
672 int n, rvec **v, FILE *list)
676 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
677 n*DIM, NULL, (real **)v, list, ecprRVEC);
680 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
681 matrix v, FILE *list)
686 vr = (real *)&(v[0][0]);
687 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
688 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
690 if (list && ret == 0)
692 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
699 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
700 int n, real **v, FILE *list)
705 char name[CPTSTRLEN];
712 for (i = 0; i < n; i++)
716 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
717 if (list && reti == 0)
719 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
720 pr_reals(list, 0, name, v[i], n);
730 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
731 int n, matrix **v, FILE *list)
734 matrix *vp, *va = NULL;
740 res = xdr_int(xd, &nf);
745 if (list == NULL && nf != n)
747 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
749 if (list || !(sflags & (1<<ecpt)))
762 snew(vr, nf*DIM*DIM);
763 for (i = 0; i < nf; i++)
765 for (j = 0; j < DIM; j++)
767 for (k = 0; k < DIM; k++)
769 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
773 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
774 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
775 for (i = 0; i < nf; i++)
777 for (j = 0; j < DIM; j++)
779 for (k = 0; k < DIM; k++)
781 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
787 if (list && ret == 0)
789 for (i = 0; i < nf; i++)
791 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
802 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
803 char **version, char **btime, char **buser, char **bhost,
805 char **fprog, char **ftime,
806 int *eIntegrator, int *simulation_part,
807 gmx_int64_t *step, double *t,
808 int *nnodes, int *dd_nc, int *npme,
809 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
810 int *nlambda, int *flags_state,
811 int *flags_eks, int *flags_enh, int *flags_dfh,
829 res = xdr_int(xd, &magic);
832 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
834 if (magic != CPT_MAGIC1)
836 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
837 "The checkpoint file is corrupted or not a checkpoint file",
844 if (gethostname(fhost, 255) != 0)
846 sprintf(fhost, "unknown");
849 sprintf(fhost, "unknown");
852 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
853 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
854 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
855 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
856 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
857 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
858 *file_version = cpt_version;
859 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
860 if (*file_version > cpt_version)
862 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
864 if (*file_version >= 13)
866 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
872 if (*file_version >= 12)
874 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
880 do_cpt_int_err(xd, "#atoms", natoms, list);
881 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
882 if (*file_version >= 10)
884 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
890 if (*file_version >= 11)
892 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
898 if (*file_version >= 14)
900 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
906 do_cpt_int_err(xd, "integrator", eIntegrator, list);
907 if (*file_version >= 3)
909 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
913 *simulation_part = 1;
915 if (*file_version >= 5)
917 do_cpt_step_err(xd, "step", step, list);
921 do_cpt_int_err(xd, "step", &idum, list);
924 do_cpt_double_err(xd, "t", t, list);
925 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
927 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
928 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
929 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
930 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
931 do_cpt_int_err(xd, "state flags", flags_state, list);
932 if (*file_version >= 4)
934 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
935 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
940 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
941 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
942 (1<<(estORIRE_DTAV+2)) |
943 (1<<(estORIRE_DTAV+3))));
945 if (*file_version >= 14)
947 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
954 if (*file_version >= 15)
956 do_cpt_int_err(xd, "ED data sets", nED, list);
964 static int do_cpt_footer(XDR *xd, int file_version)
969 if (file_version >= 2)
972 res = xdr_int(xd, &magic);
977 if (magic != CPT_MAGIC2)
986 static int do_cpt_state(XDR *xd, gmx_bool bRead,
987 int fflags, t_state *state,
988 gmx_bool bReadRNG, FILE *list)
991 int **rng_p, **rngi_p;
998 nnht = state->nhchainlength*state->ngtc;
999 nnhtp = state->nhchainlength*state->nnhpres;
1003 rng_p = (int **)&state->ld_rng;
1004 rngi_p = &state->ld_rngi;
1008 /* Do not read the RNG data */
1013 if (bRead) /* we need to allocate space for dfhist if we are reading */
1015 init_df_history(&state->dfhist, state->dfhist.nlambda);
1018 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1020 sflags = state->flags;
1021 for (i = 0; (i < estNR && ret == 0); i++)
1023 if (fflags & (1<<i))
1027 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1028 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1029 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1030 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1031 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1032 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1033 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1034 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1035 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1036 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1037 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1038 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1039 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1040 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1041 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1042 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1043 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1044 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1045 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1046 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1047 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1048 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1049 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1050 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1051 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1052 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1054 gmx_fatal(FARGS, "Unknown state entry %d\n"
1055 "You are probably reading a new checkpoint file with old code", i);
1063 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1071 for (i = 0; (i < eeksNR && ret == 0); i++)
1073 if (fflags & (1<<i))
1078 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1079 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1080 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1081 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1082 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1083 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1084 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1085 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1086 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1087 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1089 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1090 "You are probably reading a new checkpoint file with old code", i);
1099 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1100 int fflags, energyhistory_t *enerhist,
1111 enerhist->nsteps = 0;
1113 enerhist->nsteps_sim = 0;
1114 enerhist->nsum_sim = 0;
1115 enerhist->dht = NULL;
1117 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1119 snew(enerhist->dht, 1);
1120 enerhist->dht->ndh = NULL;
1121 enerhist->dht->dh = NULL;
1122 enerhist->dht->start_lambda_set = FALSE;
1126 for (i = 0; (i < eenhNR && ret == 0); i++)
1128 if (fflags & (1<<i))
1132 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1133 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1134 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1135 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1136 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1137 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1138 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1139 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1140 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1141 if (bRead) /* now allocate memory for it */
1143 snew(enerhist->dht->dh, enerhist->dht->nndh);
1144 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1145 for (j = 0; j < enerhist->dht->nndh; j++)
1147 enerhist->dht->ndh[j] = 0;
1148 enerhist->dht->dh[j] = NULL;
1152 case eenhENERGY_DELTA_H_LIST:
1153 for (j = 0; j < enerhist->dht->nndh; j++)
1155 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1158 case eenhENERGY_DELTA_H_STARTTIME:
1159 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1160 case eenhENERGY_DELTA_H_STARTLAMBDA:
1161 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1163 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1164 "You are probably reading a new checkpoint file with old code", i);
1169 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1171 /* Assume we have an old file format and copy sum to sum_sim */
1172 srenew(enerhist->ener_sum_sim, enerhist->nener);
1173 for (i = 0; i < enerhist->nener; i++)
1175 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1177 fflags |= (1<<eenhENERGY_SUM_SIM);
1180 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1181 !(fflags & (1<<eenhENERGY_NSTEPS)))
1183 /* Assume we have an old file format and copy nsum to nsteps */
1184 enerhist->nsteps = enerhist->nsum;
1185 fflags |= (1<<eenhENERGY_NSTEPS);
1187 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1188 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1190 /* Assume we have an old file format and copy nsum to nsteps */
1191 enerhist->nsteps_sim = enerhist->nsum_sim;
1192 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1198 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1203 nlambda = dfhist->nlambda;
1206 for (i = 0; (i < edfhNR && ret == 0); i++)
1208 if (fflags & (1<<i))
1212 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1213 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1214 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1215 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1216 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1217 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1218 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1219 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1220 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1221 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1222 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1223 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1224 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1225 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1228 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1229 "You are probably reading a new checkpoint file with old code", i);
1238 /* This function stores the last whole configuration of the reference and
1239 * average structure in the .cpt file
1241 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1242 edsamstate_t *EDstate, FILE *list)
1249 EDstate->bFromCpt = bRead;
1251 if (EDstate->nED <= 0)
1256 /* When reading, init_edsam has not been called yet,
1257 * so we have to allocate memory first. */
1260 snew(EDstate->nref, EDstate->nED);
1261 snew(EDstate->old_sref, EDstate->nED);
1262 snew(EDstate->nav, EDstate->nED);
1263 snew(EDstate->old_sav, EDstate->nED);
1266 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1267 for (i = 0; i < EDstate->nED; i++)
1269 /* Reference structure SREF */
1270 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1271 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1272 sprintf(buf, "ED%d x_ref", i+1);
1275 snew(EDstate->old_sref[i], EDstate->nref[i]);
1276 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1280 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1283 /* Average structure SAV */
1284 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1285 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1286 sprintf(buf, "ED%d x_av", i+1);
1289 snew(EDstate->old_sav[i], EDstate->nav[i]);
1290 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1294 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1302 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1303 gmx_file_position_t **p_outputfiles, int *nfiles,
1304 FILE *list, int file_version)
1308 gmx_off_t mask = 0xFFFFFFFFL;
1309 int offset_high, offset_low;
1311 gmx_file_position_t *outputfiles;
1313 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1320 snew(*p_outputfiles, *nfiles);
1323 outputfiles = *p_outputfiles;
1325 for (i = 0; i < *nfiles; i++)
1327 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1330 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1331 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1337 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1341 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1345 #if (SIZEOF_GMX_OFF_T > 4)
1346 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1348 outputfiles[i].offset = offset_low;
1353 buf = outputfiles[i].filename;
1354 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1356 offset = outputfiles[i].offset;
1364 #if (SIZEOF_GMX_OFF_T > 4)
1365 offset_low = (int) (offset & mask);
1366 offset_high = (int) ((offset >> 32) & mask);
1368 offset_low = offset;
1372 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1376 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1381 if (file_version >= 8)
1383 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1388 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1395 outputfiles[i].chksum_size = -1;
1402 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1403 FILE *fplog, t_commrec *cr,
1404 int eIntegrator, int simulation_part,
1405 gmx_bool bExpanded, int elamstats,
1406 gmx_int64_t step, double t, t_state *state)
1416 char *fntemp; /* the temporary checkpoint file name */
1418 char timebuf[STRLEN];
1419 int nppnodes, npmenodes, flag_64bit;
1420 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1421 gmx_file_position_t *outputfiles;
1424 int flags_eks, flags_enh, flags_dfh, i;
1429 if (DOMAINDECOMP(cr))
1431 nppnodes = cr->dd->nnodes;
1432 npmenodes = cr->npmenodes;
1436 nppnodes = cr->nnodes;
1446 /* make the new temporary filename */
1447 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1449 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1450 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1451 strcat(fntemp, suffix);
1452 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1455 gmx_ctime_r(&now, timebuf, STRLEN);
1459 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1460 gmx_step_str(step, buf), timebuf);
1463 /* Get offsets for open files */
1464 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1466 fp = gmx_fio_open(fntemp, "w");
1468 if (state->ekinstate.bUpToDate)
1471 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1472 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1473 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1481 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1483 flags_enh |= (1<<eenhENERGY_N);
1484 if (state->enerhist.nsum > 0)
1486 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1487 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1489 if (state->enerhist.nsum_sim > 0)
1491 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1492 (1<<eenhENERGY_NSUM_SIM));
1494 if (state->enerhist.dht)
1496 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1497 (1<< eenhENERGY_DELTA_H_LIST) |
1498 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1499 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1505 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1506 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1509 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1511 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1513 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1514 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1522 /* We can check many more things now (CPU, acceleration, etc), but
1523 * it is highly unlikely to have two separate builds with exactly
1524 * the same version, user, time, and build host!
1527 version = gmx_strdup(VERSION);
1528 btime = gmx_strdup(BUILD_TIME);
1529 buser = gmx_strdup(BUILD_USER);
1530 bhost = gmx_strdup(BUILD_HOST);
1532 double_prec = GMX_CPT_BUILD_DP;
1533 fprog = gmx_strdup(Program());
1535 ftime = &(timebuf[0]);
1537 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1538 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1539 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1540 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1541 &state->natoms, &state->ngtc, &state->nnhpres,
1542 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1543 &state->edsamstate.nED,
1552 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1553 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1554 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1555 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1556 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1557 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1560 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1563 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1565 /* we really, REALLY, want to make sure to physically write the checkpoint,
1566 and all the files it depends on, out to disk. Because we've
1567 opened the checkpoint with gmx_fio_open(), it's in our list
1569 ret = gmx_fio_all_output_fsync();
1575 "Cannot fsync '%s'; maybe you are out of disk space?",
1576 gmx_fio_getname(ret));
1578 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1588 if (gmx_fio_close(fp) != 0)
1590 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1593 /* we don't move the checkpoint if the user specified they didn't want it,
1594 or if the fsyncs failed */
1595 if (!bNumberAndKeep && !ret)
1599 /* Rename the previous checkpoint file */
1601 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1602 strcat(buf, "_prev");
1603 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1605 /* we copy here so that if something goes wrong between now and
1606 * the rename below, there's always a state.cpt.
1607 * If renames are atomic (such as in POSIX systems),
1608 * this copying should be unneccesary.
1610 gmx_file_copy(fn, buf, FALSE);
1611 /* We don't really care if this fails:
1612 * there's already a new checkpoint.
1615 gmx_file_rename(fn, buf);
1618 if (gmx_file_rename(fntemp, fn) != 0)
1620 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1628 /*code for alternate checkpointing scheme. moved from top of loop over
1630 fcRequestCheckPoint();
1631 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1633 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1635 #endif /* end GMX_FAHCORE block */
1638 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1642 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1643 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1644 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1645 for (i = 0; i < estNR; i++)
1647 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1649 fprintf(fplog, " %24s %11s %11s\n",
1651 (sflags & (1<<i)) ? " present " : "not present",
1652 (fflags & (1<<i)) ? " present " : "not present");
1657 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1659 FILE *fp = fplog ? fplog : stderr;
1663 fprintf(fp, " %s mismatch,\n", type);
1664 fprintf(fp, " current program: %d\n", p);
1665 fprintf(fp, " checkpoint file: %d\n", f);
1671 static void check_string(FILE *fplog, const char *type, const char *p,
1672 const char *f, gmx_bool *mm)
1674 FILE *fp = fplog ? fplog : stderr;
1676 if (strcmp(p, f) != 0)
1678 fprintf(fp, " %s mismatch,\n", type);
1679 fprintf(fp, " current program: %s\n", p);
1680 fprintf(fp, " checkpoint file: %s\n", f);
1686 static void check_match(FILE *fplog,
1688 char *btime, char *buser, char *bhost, int double_prec,
1690 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1691 ivec dd_nc, ivec dd_nc_f)
1698 check_string(fplog, "Version", VERSION, version, &mm);
1699 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1700 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1701 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1702 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1703 check_string(fplog, "Program name", Program(), fprog, &mm);
1705 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1714 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1717 if (cr->npmenodes >= 0)
1719 npp -= cr->npmenodes;
1723 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1724 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1725 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1732 "Gromacs binary or parallel settings not identical to previous run.\n"
1733 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1734 fplog ? ",\n see the log file for details" : "");
1739 "Gromacs binary or parallel settings not identical to previous run.\n"
1740 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1745 static void read_checkpoint(const char *fn, FILE **pfplog,
1746 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1747 int eIntegrator, int *init_fep_state, gmx_int64_t *step, double *t,
1748 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1749 int *simulation_part,
1750 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1755 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1757 char filename[STRLEN], buf[STEPSTRSIZE];
1758 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1760 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1763 gmx_file_position_t *outputfiles;
1765 t_fileio *chksum_file;
1766 FILE * fplog = *pfplog;
1767 unsigned char digest[16];
1768 #ifndef GMX_NATIVE_WINDOWS
1769 struct flock fl; /* don't initialize here: the struct order is OS
1773 const char *int_warn =
1774 "WARNING: The checkpoint file was generated with integrator %s,\n"
1775 " while the simulation uses integrator %s\n\n";
1776 const char *sd_note =
1777 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1778 " while the simulation uses %d SD or BD nodes,\n"
1779 " continuation will be exact, except for the random state\n\n";
1781 #ifndef GMX_NATIVE_WINDOWS
1782 fl.l_type = F_WRLCK;
1783 fl.l_whence = SEEK_SET;
1792 "read_checkpoint not (yet) supported with particle decomposition");
1795 fp = gmx_fio_open(fn, "r");
1796 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1797 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1798 &eIntegrator_f, simulation_part, step, t,
1799 &nppnodes_f, dd_nc_f, &npmenodes_f,
1800 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1801 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1802 &state->edsamstate.nED, NULL);
1804 if (bAppendOutputFiles &&
1805 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1807 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1810 if (cr == NULL || MASTER(cr))
1812 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1816 /* This will not be written if we do appending, since fplog is still NULL then */
1819 fprintf(fplog, "\n");
1820 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1821 fprintf(fplog, " file generated by: %s\n", fprog);
1822 fprintf(fplog, " file generated at: %s\n", ftime);
1823 fprintf(fplog, " GROMACS build time: %s\n", btime);
1824 fprintf(fplog, " GROMACS build user: %s\n", buser);
1825 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1826 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1827 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1828 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1829 fprintf(fplog, " time: %f\n", *t);
1830 fprintf(fplog, "\n");
1833 if (natoms != state->natoms)
1835 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1837 if (ngtc != state->ngtc)
1839 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1841 if (nnhpres != state->nnhpres)
1843 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1846 if (nlambda != state->dfhist.nlambda)
1848 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1851 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1852 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1854 if (eIntegrator_f != eIntegrator)
1858 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1860 if (bAppendOutputFiles)
1863 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1864 "Stopping the run to prevent you from ruining all your data...\n"
1865 "If you _really_ know what you are doing, try with the -noappend option.\n");
1869 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1878 else if (bPartDecomp)
1880 nppnodes = cr->nnodes;
1883 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1885 if (cr->npmenodes < 0)
1887 cr->npmenodes = npmenodes_f;
1889 nppnodes = cr->nnodes - cr->npmenodes;
1890 if (nppnodes == nppnodes_f)
1892 for (d = 0; d < DIM; d++)
1896 dd_nc[d] = dd_nc_f[d];
1903 /* The number of PP nodes has not been set yet */
1907 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1909 /* Correct the RNG state size for the number of PP nodes.
1910 * Such assignments should all be moved to one central function.
1912 state->nrng = nppnodes*gmx_rng_n();
1913 state->nrngi = nppnodes;
1917 if (fflags != state->flags)
1922 if (bAppendOutputFiles)
1925 "Output file appending requested, but input and checkpoint states are not identical.\n"
1926 "Stopping the run to prevent you from ruining all your data...\n"
1927 "You can try with the -noappend option, and get more info in the log file.\n");
1930 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1932 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1937 "WARNING: The checkpoint state entries do not match the simulation,\n"
1938 " see the log file for details\n\n");
1944 print_flag_mismatch(fplog, state->flags, fflags);
1949 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1950 nppnodes != nppnodes_f)
1955 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1959 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1964 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1965 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1968 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1969 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1970 Investigate for 5.0. */
1975 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1980 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1981 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1983 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1984 flags_enh, &state->enerhist, NULL);
1990 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1996 if (file_version < 6)
1998 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
2000 fprintf(stderr, "\nWARNING: %s\n\n", warn);
2003 fprintf(fplog, "\nWARNING: %s\n\n", warn);
2005 state->enerhist.nsum = *step;
2006 state->enerhist.nsum_sim = *step;
2009 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2015 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2021 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2026 if (gmx_fio_close(fp) != 0)
2028 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2037 /* If the user wants to append to output files,
2038 * we use the file pointer positions of the output files stored
2039 * in the checkpoint file and truncate the files such that any frames
2040 * written after the checkpoint time are removed.
2041 * All files are md5sum checked such that we can be sure that
2042 * we do not truncate other (maybe imprortant) files.
2044 if (bAppendOutputFiles)
2046 if (fn2ftp(outputfiles[0].filename) != efLOG)
2048 /* make sure first file is log file so that it is OK to use it for
2051 gmx_fatal(FARGS, "The first output file should always be the log "
2052 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2054 for (i = 0; i < nfiles; i++)
2056 if (outputfiles[i].offset < 0)
2058 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2059 "is larger than 2 GB, but mdrun did not support large file"
2060 " offsets. Can not append. Run mdrun with -noappend",
2061 outputfiles[i].filename);
2064 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2067 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2072 /* Note that there are systems where the lock operation
2073 * will succeed, but a second process can also lock the file.
2074 * We should probably try to detect this.
2076 #ifndef GMX_NATIVE_WINDOWS
2077 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2080 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2083 if (errno == ENOSYS)
2087 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2091 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2094 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2098 else if (errno == EACCES || errno == EAGAIN)
2100 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2101 "simulation?", outputfiles[i].filename);
2105 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2106 outputfiles[i].filename, strerror(errno));
2111 /* compute md5 chksum */
2112 if (outputfiles[i].chksum_size != -1)
2114 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2115 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2117 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2118 outputfiles[i].chksum_size,
2119 outputfiles[i].filename);
2122 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2124 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2126 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2131 if (i == 0) /*open log file here - so that lock is never lifted
2132 after chksum is calculated */
2134 *pfplog = gmx_fio_getfp(chksum_file);
2138 gmx_fio_close(chksum_file);
2141 /* compare md5 chksum */
2142 if (outputfiles[i].chksum_size != -1 &&
2143 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2147 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2148 for (j = 0; j < 16; j++)
2150 fprintf(debug, "%02x", digest[j]);
2152 fprintf(debug, "\n");
2154 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2155 outputfiles[i].filename);
2160 if (i != 0) /*log file is already seeked to correct position */
2162 #ifdef GMX_NATIVE_WINDOWS
2163 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2165 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2169 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2179 void load_checkpoint(const char *fn, FILE **fplog,
2180 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2181 t_inputrec *ir, t_state *state,
2182 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2183 gmx_bool bAppend, gmx_bool bForceAppend)
2190 /* Read the state from the checkpoint file */
2191 read_checkpoint(fn, fplog,
2192 cr, bPartDecomp, dd_nc,
2193 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2194 &ir->simulation_part, bAppend, bForceAppend);
2198 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2199 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2200 gmx_bcast(sizeof(step), &step, cr);
2201 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2202 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2204 ir->bContinuation = TRUE;
2205 if (ir->nsteps >= 0)
2207 ir->nsteps += ir->init_step - step;
2209 ir->init_step = step;
2210 ir->simulation_part += 1;
2213 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2214 gmx_int64_t *step, double *t, t_state *state,
2216 int *nfiles, gmx_file_position_t **outputfiles)
2219 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2224 int flags_eks, flags_enh, flags_dfh;
2226 gmx_file_position_t *files_loc = NULL;
2229 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2230 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2231 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2232 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2233 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2234 &state->edsamstate.nED, NULL);
2236 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2241 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2246 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2247 flags_enh, &state->enerhist, NULL);
2252 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2258 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2264 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2265 outputfiles != NULL ? outputfiles : &files_loc,
2266 outputfiles != NULL ? nfiles : &nfiles_loc,
2267 NULL, file_version);
2268 if (files_loc != NULL)
2278 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2292 read_checkpoint_state(const char *fn, int *simulation_part,
2293 gmx_int64_t *step, double *t, t_state *state)
2297 fp = gmx_fio_open(fn, "r");
2298 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2299 if (gmx_fio_close(fp) != 0)
2301 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2305 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2307 /* This next line is nasty because the sub-structures of t_state
2308 * cannot be assumed to be zeroed (or even initialized in ways the
2309 * rest of the code might assume). Using snew would be better, but
2310 * this will all go away for 5.0. */
2312 int simulation_part;
2316 init_state(&state, 0, 0, 0, 0, 0);
2318 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2320 fr->natoms = state.natoms;
2323 fr->step = gmx_int64_to_int(step,
2324 "conversion of checkpoint to trajectory");
2328 fr->lambda = state.lambda[efptFEP];
2329 fr->fep_state = state.fep_state;
2331 fr->bX = (state.flags & (1<<estX));
2337 fr->bV = (state.flags & (1<<estV));
2344 fr->bBox = (state.flags & (1<<estBOX));
2347 copy_mat(state.box, fr->box);
2352 void list_checkpoint(const char *fn, FILE *out)
2356 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2358 int eIntegrator, simulation_part, nppnodes, npme;
2363 int flags_eks, flags_enh, flags_dfh;
2367 gmx_file_position_t *outputfiles;
2370 init_state(&state, -1, -1, -1, -1, 0);
2372 fp = gmx_fio_open(fn, "r");
2373 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2374 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2375 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2376 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2377 &(state.dfhist.nlambda), &state.flags,
2378 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2379 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2384 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2389 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2390 flags_enh, &state.enerhist, out);
2394 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2395 flags_dfh, &state.dfhist, out);
2400 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2405 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2410 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2417 if (gmx_fio_close(fp) != 0)
2419 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2426 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2430 /* Check if the output file name stored in the checkpoint file
2431 * is one of the output file names of mdrun.
2435 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2440 return (i < nfile && gmx_fexist(fnm_cp));
2443 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2444 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2445 gmx_int64_t *cpt_step, t_commrec *cr,
2446 gmx_bool bAppendReq,
2447 int nfile, const t_filenm fnm[],
2448 const char *part_suffix, gmx_bool *bAddPart)
2451 gmx_int64_t step = 0;
2453 /* This next line is nasty because the sub-structures of t_state
2454 * cannot be assumed to be zeroed (or even initialized in ways the
2455 * rest of the code might assume). Using snew would be better, but
2456 * this will all go away for 5.0. */
2459 gmx_file_position_t *outputfiles;
2462 char *fn, suf_up[STRLEN];
2468 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2470 *simulation_part = 0;
2474 init_state(&state, 0, 0, 0, 0, 0);
2476 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2477 &nfiles, &outputfiles);
2478 if (gmx_fio_close(fp) != 0)
2480 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2487 for (f = 0; f < nfiles; f++)
2489 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2494 if (nexist == nfiles)
2496 bAppend = bAppendReq;
2498 else if (nexist > 0)
2501 "Output file appending has been requested,\n"
2502 "but some output files listed in the checkpoint file %s\n"
2503 "are not present or are named differently by the current program:\n",
2505 fprintf(stderr, "output files present:");
2506 for (f = 0; f < nfiles; f++)
2508 if (exist_output_file(outputfiles[f].filename,
2511 fprintf(stderr, " %s", outputfiles[f].filename);
2514 fprintf(stderr, "\n");
2515 fprintf(stderr, "output files not present or named differently:");
2516 for (f = 0; f < nfiles; f++)
2518 if (!exist_output_file(outputfiles[f].filename,
2521 fprintf(stderr, " %s", outputfiles[f].filename);
2524 fprintf(stderr, "\n");
2526 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2534 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2536 fn = outputfiles[0].filename;
2537 if (strlen(fn) < 4 ||
2538 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2540 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2542 /* Set bAddPart to whether the suffix string '.part' is present
2543 * in the log file name.
2545 strcpy(suf_up, part_suffix);
2547 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2548 strstr(fn, suf_up) != NULL);
2556 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2558 if (*simulation_part > 0 && bAppendReq)
2560 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2561 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2564 if (NULL != cpt_step)