2 * This file is part of the GROMACS molecular simulation package.
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
6 * Copyright (c) 2012,2013, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
49 #ifdef HAVE_SYS_TIME_H
54 #ifdef GMX_NATIVE_WINDOWS
57 #include <sys/locking.h>
71 #include "gmx_random.h"
72 #include "checkpoint.h"
78 #include "buildinfo.h"
85 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
87 gmx_ctime_r(const time_t *clock, char *buf, int n);
90 #define CPT_MAGIC1 171817
91 #define CPT_MAGIC2 171819
92 #define CPTSTRLEN 1024
95 #define GMX_CPT_BUILD_DP 1
97 #define GMX_CPT_BUILD_DP 0
100 /* cpt_version should normally only be changed
101 * when the header of footer format changes.
102 * The state data format itself is backward and forward compatible.
103 * But old code can not read a new entry that is present in the file
104 * (but can read a new format when new entries are not present).
106 static const int cpt_version = 15;
109 const char *est_names[estNR] =
112 "box", "box-rel", "box-v", "pres_prev",
113 "nosehoover-xi", "thermostat-integral",
114 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
115 "disre_initf", "disre_rm3tav",
116 "orire_initf", "orire_Dtav",
117 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
121 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
124 const char *eeks_names[eeksNR] =
126 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
127 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
131 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
132 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
133 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
134 eenhENERGY_DELTA_H_NN,
135 eenhENERGY_DELTA_H_LIST,
136 eenhENERGY_DELTA_H_STARTTIME,
137 eenhENERGY_DELTA_H_STARTLAMBDA,
141 const char *eenh_names[eenhNR] =
143 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
144 "energy_sum_sim", "energy_nsum_sim",
145 "energy_nsteps", "energy_nsteps_sim",
147 "energy_delta_h_list",
148 "energy_delta_h_start_time",
149 "energy_delta_h_start_lambda"
152 /* free energy history variables -- need to be preserved over checkpoint */
154 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
155 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
157 /* free energy history variable names */
158 const char *edfh_names[edfhNR] =
160 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
161 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
164 #ifdef GMX_NATIVE_WINDOWS
166 gmx_wintruncate(const char *filename, __int64 size)
169 /*we do this elsewhere*/
175 fp = fopen(filename, "rb+");
182 return _chsize_s( fileno(fp), size);
189 ecprREAL, ecprRVEC, ecprMATRIX
193 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
195 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
196 cptpEST - state variables.
197 cptpEEKS - Kinetic energy state variables.
198 cptpEENH - Energy history state variables.
199 cptpEDFH - free energy history variables.
203 static const char *st_names(int cptp, int ecpt)
207 case cptpEST: return est_names [ecpt]; break;
208 case cptpEEKS: return eeks_names[ecpt]; break;
209 case cptpEENH: return eenh_names[ecpt]; break;
210 case cptpEDFH: return edfh_names[ecpt]; break;
216 static void cp_warning(FILE *fp)
218 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
221 static void cp_error()
223 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
226 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
234 res = xdr_string(xd, s, CPTSTRLEN);
241 fprintf(list, "%s = %s\n", desc, *s);
246 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
250 res = xdr_int(xd, i);
257 fprintf(list, "%s = %d\n", desc, *i);
262 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
268 fprintf(list, "%s = ", desc);
270 for (j = 0; j < n && res; j++)
272 res &= xdr_u_char(xd, &i[j]);
275 fprintf(list, "%02x", i[j]);
290 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
292 if (do_cpt_int(xd, desc, i, list) < 0)
298 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_large_int_t *i, FILE *list)
301 char buf[STEPSTRSIZE];
303 res = xdr_gmx_large_int(xd, i, "reading checkpoint file");
310 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
314 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
318 res = xdr_double(xd, f);
325 fprintf(list, "%s = %f\n", desc, *f);
329 static void do_cpt_real_err(XDR *xd, const char *desc, real *f)
334 res = xdr_double(xd, f);
336 res = xdr_float(xd, f);
344 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
348 for (i = 0; i < n; i++)
350 for (j = 0; j < DIM; j++)
352 do_cpt_real_err(xd, desc, &f[i][j]);
358 pr_rvecs(list, 0, desc, f, n);
362 /* If nval >= 0, nval is used; on read this should match the passed value.
363 * If nval n<0, *nptr is used; on read the value is stored in nptr
365 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
366 int nval, int *nptr, real **v,
367 FILE *list, int erealtype)
371 int dtc = xdr_datatype_float;
373 int dtc = xdr_datatype_double;
375 real *vp, *va = NULL;
390 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
395 res = xdr_int(xd, &nf);
406 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
415 res = xdr_int(xd, &dt);
422 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
423 st_names(cptp, ecpt), xdr_datatype_names[dtc],
424 xdr_datatype_names[dt]);
426 if (list || !(sflags & (1<<ecpt)))
439 if (dt == xdr_datatype_float)
441 if (dtc == xdr_datatype_float)
449 res = xdr_vector(xd, (char *)vf, nf,
450 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
455 if (dtc != xdr_datatype_float)
457 for (i = 0; i < nf; i++)
466 if (dtc == xdr_datatype_double)
474 res = xdr_vector(xd, (char *)vd, nf,
475 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
480 if (dtc != xdr_datatype_double)
482 for (i = 0; i < nf; i++)
495 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
498 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
501 gmx_incons("Unknown checkpoint real type");
513 /* This function stores n along with the reals for reading,
514 * but on reading it assumes that n matches the value in the checkpoint file,
515 * a fatal error is generated when this is not the case.
517 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
518 int n, real **v, FILE *list)
520 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
523 /* This function does the same as do_cpte_reals,
524 * except that on reading it ignores the passed value of *n
525 * and stored the value read from the checkpoint file in *n.
527 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
528 int *n, real **v, FILE *list)
530 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
533 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
538 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
541 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
542 int n, int **v, FILE *list)
545 int dtc = xdr_datatype_int;
550 res = xdr_int(xd, &nf);
555 if (list == NULL && v != NULL && nf != n)
557 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
560 res = xdr_int(xd, &dt);
567 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
568 st_names(cptp, ecpt), xdr_datatype_names[dtc],
569 xdr_datatype_names[dt]);
571 if (list || !(sflags & (1<<ecpt)) || v == NULL)
584 res = xdr_vector(xd, (char *)vp, nf,
585 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
592 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
602 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
605 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
608 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
609 int n, double **v, FILE *list)
612 int dtc = xdr_datatype_double;
613 double *vp, *va = NULL;
617 res = xdr_int(xd, &nf);
622 if (list == NULL && nf != n)
624 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
627 res = xdr_int(xd, &dt);
634 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
635 st_names(cptp, ecpt), xdr_datatype_names[dtc],
636 xdr_datatype_names[dt]);
638 if (list || !(sflags & (1<<ecpt)))
651 res = xdr_vector(xd, (char *)vp, nf,
652 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
659 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
669 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
670 double *r, FILE *list)
672 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
676 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
677 int n, rvec **v, FILE *list)
681 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
682 n*DIM, NULL, (real **)v, list, ecprRVEC);
685 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
686 matrix v, FILE *list)
691 vr = (real *)&(v[0][0]);
692 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
693 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
695 if (list && ret == 0)
697 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
704 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
705 int n, real **v, FILE *list)
710 char name[CPTSTRLEN];
717 for (i = 0; i < n; i++)
721 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
722 if (list && reti == 0)
724 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
725 pr_reals(list, 0, name, v[i], n);
735 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
736 int n, matrix **v, FILE *list)
739 matrix *vp, *va = NULL;
745 res = xdr_int(xd, &nf);
750 if (list == NULL && nf != n)
752 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
754 if (list || !(sflags & (1<<ecpt)))
767 snew(vr, nf*DIM*DIM);
768 for (i = 0; i < nf; i++)
770 for (j = 0; j < DIM; j++)
772 for (k = 0; k < DIM; k++)
774 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
778 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
779 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
780 for (i = 0; i < nf; i++)
782 for (j = 0; j < DIM; j++)
784 for (k = 0; k < DIM; k++)
786 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
792 if (list && ret == 0)
794 for (i = 0; i < nf; i++)
796 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
807 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
808 char **version, char **btime, char **buser, char **bhost,
810 char **fprog, char **ftime,
811 int *eIntegrator, int *simulation_part,
812 gmx_large_int_t *step, double *t,
813 int *nnodes, int *dd_nc, int *npme,
814 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
815 int *nlambda, int *flags_state,
816 int *flags_eks, int *flags_enh, int *flags_dfh,
834 res = xdr_int(xd, &magic);
837 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
839 if (magic != CPT_MAGIC1)
841 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
842 "The checkpoint file is corrupted or not a checkpoint file",
848 gmx_gethostname(fhost, 255);
850 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
851 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
852 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
853 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
854 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
855 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
856 *file_version = cpt_version;
857 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
858 if (*file_version > cpt_version)
860 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
862 if (*file_version >= 13)
864 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
870 if (*file_version >= 12)
872 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
878 do_cpt_int_err(xd, "#atoms", natoms, list);
879 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
880 if (*file_version >= 10)
882 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
888 if (*file_version >= 11)
890 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
896 if (*file_version >= 14)
898 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
904 do_cpt_int_err(xd, "integrator", eIntegrator, list);
905 if (*file_version >= 3)
907 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
911 *simulation_part = 1;
913 if (*file_version >= 5)
915 do_cpt_step_err(xd, "step", step, list);
919 do_cpt_int_err(xd, "step", &idum, list);
922 do_cpt_double_err(xd, "t", t, list);
923 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
925 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
926 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
927 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
928 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
929 do_cpt_int_err(xd, "state flags", flags_state, list);
930 if (*file_version >= 4)
932 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
933 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
938 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
939 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
940 (1<<(estORIRE_DTAV+2)) |
941 (1<<(estORIRE_DTAV+3))));
943 if (*file_version >= 14)
945 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
952 if (*file_version >= 15)
954 do_cpt_int_err(xd, "ED data sets", nED, list);
962 static int do_cpt_footer(XDR *xd, gmx_bool bRead, int file_version)
967 if (file_version >= 2)
970 res = xdr_int(xd, &magic);
975 if (magic != CPT_MAGIC2)
984 static int do_cpt_state(XDR *xd, gmx_bool bRead,
985 int fflags, t_state *state,
986 gmx_bool bReadRNG, FILE *list)
989 int **rng_p, **rngi_p;
996 nnht = state->nhchainlength*state->ngtc;
997 nnhtp = state->nhchainlength*state->nnhpres;
1001 rng_p = (int **)&state->ld_rng;
1002 rngi_p = &state->ld_rngi;
1006 /* Do not read the RNG data */
1011 if (bRead) /* we need to allocate space for dfhist if we are reading */
1013 init_df_history(&state->dfhist,state->dfhist.nlambda);
1016 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1018 sflags = state->flags;
1019 for (i = 0; (i < estNR && ret == 0); i++)
1021 if (fflags & (1<<i))
1025 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1026 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1027 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1028 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1029 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1030 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1031 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1032 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1033 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1034 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1035 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1036 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1037 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1038 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1039 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1040 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1041 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1042 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1043 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1044 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1045 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1046 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1047 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1048 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1049 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1050 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1052 gmx_fatal(FARGS, "Unknown state entry %d\n"
1053 "You are probably reading a new checkpoint file with old code", i);
1061 static int do_cpt_ekinstate(XDR *xd, gmx_bool bRead,
1062 int fflags, ekinstate_t *ekins,
1070 for (i = 0; (i < eeksNR && ret == 0); i++)
1072 if (fflags & (1<<i))
1077 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1078 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1079 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1080 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1081 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1082 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1083 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1084 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1085 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1086 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1088 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1089 "You are probably reading a new checkpoint file with old code", i);
1098 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1099 int fflags, energyhistory_t *enerhist,
1110 enerhist->nsteps = 0;
1112 enerhist->nsteps_sim = 0;
1113 enerhist->nsum_sim = 0;
1114 enerhist->dht = NULL;
1116 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1118 snew(enerhist->dht, 1);
1119 enerhist->dht->ndh = NULL;
1120 enerhist->dht->dh = NULL;
1121 enerhist->dht->start_lambda_set = FALSE;
1125 for (i = 0; (i < eenhNR && ret == 0); i++)
1127 if (fflags & (1<<i))
1131 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1132 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1133 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1134 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1135 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1136 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1137 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1138 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1139 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1140 if (bRead) /* now allocate memory for it */
1142 snew(enerhist->dht->dh, enerhist->dht->nndh);
1143 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1144 for (j = 0; j < enerhist->dht->nndh; j++)
1146 enerhist->dht->ndh[j] = 0;
1147 enerhist->dht->dh[j] = NULL;
1151 case eenhENERGY_DELTA_H_LIST:
1152 for (j = 0; j < enerhist->dht->nndh; j++)
1154 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1157 case eenhENERGY_DELTA_H_STARTTIME:
1158 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1159 case eenhENERGY_DELTA_H_STARTLAMBDA:
1160 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1162 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1163 "You are probably reading a new checkpoint file with old code", i);
1168 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1170 /* Assume we have an old file format and copy sum to sum_sim */
1171 srenew(enerhist->ener_sum_sim, enerhist->nener);
1172 for (i = 0; i < enerhist->nener; i++)
1174 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1176 fflags |= (1<<eenhENERGY_SUM_SIM);
1179 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1180 !(fflags & (1<<eenhENERGY_NSTEPS)))
1182 /* Assume we have an old file format and copy nsum to nsteps */
1183 enerhist->nsteps = enerhist->nsum;
1184 fflags |= (1<<eenhENERGY_NSTEPS);
1186 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1187 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1189 /* Assume we have an old file format and copy nsum to nsteps */
1190 enerhist->nsteps_sim = enerhist->nsum_sim;
1191 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1197 static int do_cpt_df_hist(XDR *xd, gmx_bool bRead, int fflags, df_history_t *dfhist, FILE *list)
1202 nlambda = dfhist->nlambda;
1205 for (i = 0; (i < edfhNR && ret == 0); i++)
1207 if (fflags & (1<<i))
1211 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1212 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1213 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1214 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1215 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1216 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1217 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1218 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1219 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1220 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1221 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1222 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1223 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1224 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1227 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1228 "You are probably reading a new checkpoint file with old code", i);
1237 /* This function stores the last whole configuration of the reference and
1238 * average structure in the .cpt file
1240 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1241 edsamstate_t *EDstate, FILE *list)
1248 EDstate->bFromCpt = bRead;
1250 if (EDstate->nED <= 0)
1255 /* When reading, init_edsam has not been called yet,
1256 * so we have to allocate memory first. */
1259 snew(EDstate->nref, EDstate->nED);
1260 snew(EDstate->old_sref, EDstate->nED);
1261 snew(EDstate->nav, EDstate->nED);
1262 snew(EDstate->old_sav, EDstate->nED);
1265 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1266 for (i = 0; i < EDstate->nED; i++)
1268 /* Reference structure SREF */
1269 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1270 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1271 sprintf(buf, "ED%d x_ref", i+1);
1274 snew(EDstate->old_sref[i], EDstate->nref[i]);
1275 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1279 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1282 /* Average structure SAV */
1283 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1284 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1285 sprintf(buf, "ED%d x_av", i+1);
1288 snew(EDstate->old_sav[i], EDstate->nav[i]);
1289 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1293 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1301 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1302 gmx_file_position_t **p_outputfiles, int *nfiles,
1303 FILE *list, int file_version)
1307 gmx_off_t mask = 0xFFFFFFFFL;
1308 int offset_high, offset_low;
1310 gmx_file_position_t *outputfiles;
1312 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1319 snew(*p_outputfiles, *nfiles);
1322 outputfiles = *p_outputfiles;
1324 for (i = 0; i < *nfiles; i++)
1326 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1329 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1330 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1336 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1340 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1344 #if (SIZEOF_GMX_OFF_T > 4)
1345 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1347 outputfiles[i].offset = offset_low;
1352 buf = outputfiles[i].filename;
1353 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1355 offset = outputfiles[i].offset;
1363 #if (SIZEOF_GMX_OFF_T > 4)
1364 offset_low = (int) (offset & mask);
1365 offset_high = (int) ((offset >> 32) & mask);
1367 offset_low = offset;
1371 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1375 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1380 if (file_version >= 8)
1382 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1387 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1394 outputfiles[i].chksum_size = -1;
1401 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1402 FILE *fplog, t_commrec *cr,
1403 int eIntegrator, int simulation_part,
1404 gmx_bool bExpanded, int elamstats,
1405 gmx_large_int_t step, double t, t_state *state)
1415 char *fntemp; /* the temporary checkpoint file name */
1417 char timebuf[STRLEN];
1418 int nppnodes, npmenodes, flag_64bit;
1419 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1420 gmx_file_position_t *outputfiles;
1423 int flags_eks, flags_enh, flags_dfh, i;
1428 if (DOMAINDECOMP(cr))
1430 nppnodes = cr->dd->nnodes;
1431 npmenodes = cr->npmenodes;
1435 nppnodes = cr->nnodes;
1445 #ifndef GMX_NO_RENAME
1446 /* make the new temporary filename */
1447 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1449 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1450 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1451 strcat(fntemp, suffix);
1452 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1454 /* if we can't rename, we just overwrite the cpt file.
1455 * dangerous if interrupted.
1457 snew(fntemp, strlen(fn));
1461 gmx_ctime_r(&now, timebuf, STRLEN);
1465 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1466 gmx_step_str(step, buf), timebuf);
1469 /* Get offsets for open files */
1470 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1472 fp = gmx_fio_open(fntemp, "w");
1474 if (state->ekinstate.bUpToDate)
1477 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1478 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1479 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1487 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1489 flags_enh |= (1<<eenhENERGY_N);
1490 if (state->enerhist.nsum > 0)
1492 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1493 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1495 if (state->enerhist.nsum_sim > 0)
1497 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1498 (1<<eenhENERGY_NSUM_SIM));
1500 if (state->enerhist.dht)
1502 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1503 (1<< eenhENERGY_DELTA_H_LIST) |
1504 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1505 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1511 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1512 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1515 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1517 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1519 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1520 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1528 /* We can check many more things now (CPU, acceleration, etc), but
1529 * it is highly unlikely to have two separate builds with exactly
1530 * the same version, user, time, and build host!
1533 version = gmx_strdup(VERSION);
1534 btime = gmx_strdup(BUILD_TIME);
1535 buser = gmx_strdup(BUILD_USER);
1536 bhost = gmx_strdup(BUILD_HOST);
1538 double_prec = GMX_CPT_BUILD_DP;
1539 fprog = gmx_strdup(Program());
1541 ftime = &(timebuf[0]);
1543 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1544 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1545 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1546 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1547 &state->natoms, &state->ngtc, &state->nnhpres,
1548 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1549 &state->edsamstate.nED,
1558 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1559 (do_cpt_ekinstate(gmx_fio_getxdr(fp), FALSE, flags_eks, &state->ekinstate, NULL) < 0) ||
1560 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1561 (do_cpt_df_hist(gmx_fio_getxdr(fp), FALSE, flags_dfh, &state->dfhist, NULL) < 0) ||
1562 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1563 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1566 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1569 do_cpt_footer(gmx_fio_getxdr(fp), FALSE, file_version);
1571 /* we really, REALLY, want to make sure to physically write the checkpoint,
1572 and all the files it depends on, out to disk. Because we've
1573 opened the checkpoint with gmx_fio_open(), it's in our list
1575 ret = gmx_fio_all_output_fsync();
1581 "Cannot fsync '%s'; maybe you are out of disk space?",
1582 gmx_fio_getname(ret));
1584 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1594 if (gmx_fio_close(fp) != 0)
1596 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1599 /* we don't move the checkpoint if the user specified they didn't want it,
1600 or if the fsyncs failed */
1601 #ifndef GMX_NO_RENAME
1602 if (!bNumberAndKeep && !ret)
1606 /* Rename the previous checkpoint file */
1608 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1609 strcat(buf, "_prev");
1610 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1612 /* we copy here so that if something goes wrong between now and
1613 * the rename below, there's always a state.cpt.
1614 * If renames are atomic (such as in POSIX systems),
1615 * this copying should be unneccesary.
1617 gmx_file_copy(fn, buf, FALSE);
1618 /* We don't really care if this fails:
1619 * there's already a new checkpoint.
1622 gmx_file_rename(fn, buf);
1625 if (gmx_file_rename(fntemp, fn) != 0)
1627 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1630 #endif /* GMX_NO_RENAME */
1636 /*code for alternate checkpointing scheme. moved from top of loop over
1638 fcRequestCheckPoint();
1639 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1641 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1643 #endif /* end GMX_FAHCORE block */
1646 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1650 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1651 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1652 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1653 for (i = 0; i < estNR; i++)
1655 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1657 fprintf(fplog, " %24s %11s %11s\n",
1659 (sflags & (1<<i)) ? " present " : "not present",
1660 (fflags & (1<<i)) ? " present " : "not present");
1665 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1667 FILE *fp = fplog ? fplog : stderr;
1671 fprintf(fp, " %s mismatch,\n", type);
1672 fprintf(fp, " current program: %d\n", p);
1673 fprintf(fp, " checkpoint file: %d\n", f);
1679 static void check_string(FILE *fplog, const char *type, const char *p,
1680 const char *f, gmx_bool *mm)
1682 FILE *fp = fplog ? fplog : stderr;
1684 if (strcmp(p, f) != 0)
1686 fprintf(fp, " %s mismatch,\n", type);
1687 fprintf(fp, " current program: %s\n", p);
1688 fprintf(fp, " checkpoint file: %s\n", f);
1694 static void check_match(FILE *fplog,
1696 char *btime, char *buser, char *bhost, int double_prec,
1698 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1699 ivec dd_nc, ivec dd_nc_f)
1706 check_string(fplog, "Version", VERSION, version, &mm);
1707 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1708 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1709 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1710 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1711 check_string(fplog, "Program name", Program(), fprog, &mm);
1713 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1722 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1725 if (cr->npmenodes >= 0)
1727 npp -= cr->npmenodes;
1731 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1732 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1733 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1740 "Gromacs binary or parallel settings not identical to previous run.\n"
1741 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1742 fplog ? ",\n see the log file for details" : "");
1747 "Gromacs binary or parallel settings not identical to previous run.\n"
1748 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1753 static void read_checkpoint(const char *fn, FILE **pfplog,
1754 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1755 int eIntegrator, int *init_fep_state, gmx_large_int_t *step, double *t,
1756 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1757 int *simulation_part,
1758 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1763 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1765 char filename[STRLEN], buf[STEPSTRSIZE];
1766 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1768 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1771 gmx_file_position_t *outputfiles;
1773 t_fileio *chksum_file;
1774 FILE * fplog = *pfplog;
1775 unsigned char digest[16];
1776 #if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
1777 struct flock fl; /* don't initialize here: the struct order is OS
1781 const char *int_warn =
1782 "WARNING: The checkpoint file was generated with integrator %s,\n"
1783 " while the simulation uses integrator %s\n\n";
1784 const char *sd_note =
1785 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1786 " while the simulation uses %d SD or BD nodes,\n"
1787 " continuation will be exact, except for the random state\n\n";
1789 #if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
1790 fl.l_type = F_WRLCK;
1791 fl.l_whence = SEEK_SET;
1800 "read_checkpoint not (yet) supported with particle decomposition");
1803 fp = gmx_fio_open(fn, "r");
1804 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1805 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1806 &eIntegrator_f, simulation_part, step, t,
1807 &nppnodes_f, dd_nc_f, &npmenodes_f,
1808 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1809 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1810 &state->edsamstate.nED, NULL);
1812 if (bAppendOutputFiles &&
1813 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1815 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1818 if (cr == NULL || MASTER(cr))
1820 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1824 /* This will not be written if we do appending, since fplog is still NULL then */
1827 fprintf(fplog, "\n");
1828 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1829 fprintf(fplog, " file generated by: %s\n", fprog);
1830 fprintf(fplog, " file generated at: %s\n", ftime);
1831 fprintf(fplog, " GROMACS build time: %s\n", btime);
1832 fprintf(fplog, " GROMACS build user: %s\n", buser);
1833 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1834 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1835 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1836 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1837 fprintf(fplog, " time: %f\n", *t);
1838 fprintf(fplog, "\n");
1841 if (natoms != state->natoms)
1843 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1845 if (ngtc != state->ngtc)
1847 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1849 if (nnhpres != state->nnhpres)
1851 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1854 if (nlambda != state->dfhist.nlambda)
1856 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1859 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1860 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1862 if (eIntegrator_f != eIntegrator)
1866 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1868 if (bAppendOutputFiles)
1871 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1872 "Stopping the run to prevent you from ruining all your data...\n"
1873 "If you _really_ know what you are doing, try with the -noappend option.\n");
1877 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1886 else if (bPartDecomp)
1888 nppnodes = cr->nnodes;
1891 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1893 if (cr->npmenodes < 0)
1895 cr->npmenodes = npmenodes_f;
1897 nppnodes = cr->nnodes - cr->npmenodes;
1898 if (nppnodes == nppnodes_f)
1900 for (d = 0; d < DIM; d++)
1904 dd_nc[d] = dd_nc_f[d];
1911 /* The number of PP nodes has not been set yet */
1915 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1917 /* Correct the RNG state size for the number of PP nodes.
1918 * Such assignments should all be moved to one central function.
1920 state->nrng = nppnodes*gmx_rng_n();
1921 state->nrngi = nppnodes;
1925 if (fflags != state->flags)
1930 if (bAppendOutputFiles)
1933 "Output file appending requested, but input and checkpoint states are not identical.\n"
1934 "Stopping the run to prevent you from ruining all your data...\n"
1935 "You can try with the -noappend option, and get more info in the log file.\n");
1938 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1940 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1945 "WARNING: The checkpoint state entries do not match the simulation,\n"
1946 " see the log file for details\n\n");
1952 print_flag_mismatch(fplog, state->flags, fflags);
1957 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1958 nppnodes != nppnodes_f)
1963 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1967 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1972 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1973 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1976 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1977 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1978 Investigate for 5.0. */
1983 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
1984 flags_eks, &state->ekinstate, NULL);
1989 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1990 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1992 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1993 flags_enh, &state->enerhist, NULL);
1999 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2005 if (file_version < 6)
2007 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
2009 fprintf(stderr, "\nWARNING: %s\n\n", warn);
2012 fprintf(fplog, "\nWARNING: %s\n\n", warn);
2014 state->enerhist.nsum = *step;
2015 state->enerhist.nsum_sim = *step;
2018 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
2019 flags_dfh, &state->dfhist, NULL);
2025 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2031 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2036 if (gmx_fio_close(fp) != 0)
2038 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2047 /* If the user wants to append to output files,
2048 * we use the file pointer positions of the output files stored
2049 * in the checkpoint file and truncate the files such that any frames
2050 * written after the checkpoint time are removed.
2051 * All files are md5sum checked such that we can be sure that
2052 * we do not truncate other (maybe imprortant) files.
2054 if (bAppendOutputFiles)
2056 if (fn2ftp(outputfiles[0].filename) != efLOG)
2058 /* make sure first file is log file so that it is OK to use it for
2061 gmx_fatal(FARGS, "The first output file should always be the log "
2062 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2064 for (i = 0; i < nfiles; i++)
2066 if (outputfiles[i].offset < 0)
2068 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2069 "is larger than 2 GB, but mdrun did not support large file"
2070 " offsets. Can not append. Run mdrun with -noappend",
2071 outputfiles[i].filename);
2074 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2077 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2082 /* Note that there are systems where the lock operation
2083 * will succeed, but a second process can also lock the file.
2084 * We should probably try to detect this.
2086 #if defined __native_client__
2090 #elif defined GMX_NATIVE_WINDOWS
2091 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2093 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
2096 if (errno == ENOSYS)
2100 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2104 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2107 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2111 else if (errno == EACCES || errno == EAGAIN)
2113 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2114 "simulation?", outputfiles[i].filename);
2118 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2119 outputfiles[i].filename, strerror(errno));
2124 /* compute md5 chksum */
2125 if (outputfiles[i].chksum_size != -1)
2127 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2128 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2130 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2131 outputfiles[i].chksum_size,
2132 outputfiles[i].filename);
2135 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2137 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2139 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2144 if (i == 0) /*open log file here - so that lock is never lifted
2145 after chksum is calculated */
2147 *pfplog = gmx_fio_getfp(chksum_file);
2151 gmx_fio_close(chksum_file);
2154 /* compare md5 chksum */
2155 if (outputfiles[i].chksum_size != -1 &&
2156 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2160 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2161 for (j = 0; j < 16; j++)
2163 fprintf(debug, "%02x", digest[j]);
2165 fprintf(debug, "\n");
2167 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2168 outputfiles[i].filename);
2173 if (i != 0) /*log file is already seeked to correct position */
2175 #ifdef GMX_NATIVE_WINDOWS
2176 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2178 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2182 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2192 void load_checkpoint(const char *fn, FILE **fplog,
2193 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2194 t_inputrec *ir, t_state *state,
2195 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2196 gmx_bool bAppend, gmx_bool bForceAppend)
2198 gmx_large_int_t step;
2203 /* Read the state from the checkpoint file */
2204 read_checkpoint(fn, fplog,
2205 cr, bPartDecomp, dd_nc,
2206 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2207 &ir->simulation_part, bAppend, bForceAppend);
2211 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2212 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2213 gmx_bcast(sizeof(step), &step, cr);
2214 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2215 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2217 ir->bContinuation = TRUE;
2218 if (ir->nsteps >= 0)
2220 ir->nsteps += ir->init_step - step;
2222 ir->init_step = step;
2223 ir->simulation_part += 1;
2226 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2227 gmx_large_int_t *step, double *t, t_state *state,
2229 int *nfiles, gmx_file_position_t **outputfiles)
2232 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2237 int flags_eks, flags_enh, flags_dfh;
2239 gmx_file_position_t *files_loc = NULL;
2242 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2243 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2244 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2245 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2246 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2247 &state->edsamstate.nED, NULL);
2249 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2254 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
2255 flags_eks, &state->ekinstate, NULL);
2260 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2261 flags_enh, &state->enerhist, NULL);
2266 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
2267 flags_dfh, &state->dfhist, NULL);
2273 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2279 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2280 outputfiles != NULL ? outputfiles : &files_loc,
2281 outputfiles != NULL ? nfiles : &nfiles_loc,
2282 NULL, file_version);
2283 if (files_loc != NULL)
2293 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2307 read_checkpoint_state(const char *fn, int *simulation_part,
2308 gmx_large_int_t *step, double *t, t_state *state)
2312 fp = gmx_fio_open(fn, "r");
2313 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2314 if (gmx_fio_close(fp) != 0)
2316 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2320 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2322 /* This next line is nasty because the sub-structures of t_state
2323 * cannot be assumed to be zeroed (or even initialized in ways the
2324 * rest of the code might assume). Using snew would be better, but
2325 * this will all go away for 5.0. */
2327 int simulation_part;
2328 gmx_large_int_t step;
2331 init_state(&state, 0, 0, 0, 0, 0);
2333 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2335 fr->natoms = state.natoms;
2338 fr->step = gmx_large_int_to_int(step,
2339 "conversion of checkpoint to trajectory");
2343 fr->lambda = state.lambda[efptFEP];
2344 fr->fep_state = state.fep_state;
2346 fr->bX = (state.flags & (1<<estX));
2352 fr->bV = (state.flags & (1<<estV));
2359 fr->bBox = (state.flags & (1<<estBOX));
2362 copy_mat(state.box, fr->box);
2367 void list_checkpoint(const char *fn, FILE *out)
2371 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2373 int eIntegrator, simulation_part, nppnodes, npme;
2374 gmx_large_int_t step;
2378 int flags_eks, flags_enh, flags_dfh;
2382 gmx_file_position_t *outputfiles;
2385 init_state(&state, -1, -1, -1, -1, 0);
2387 fp = gmx_fio_open(fn, "r");
2388 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2389 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2390 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2391 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2392 &(state.dfhist.nlambda), &state.flags,
2393 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2394 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2399 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
2400 flags_eks, &state.ekinstate, out);
2405 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2406 flags_enh, &state.enerhist, out);
2410 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
2411 flags_dfh, &state.dfhist, out);
2416 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2421 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2426 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2433 if (gmx_fio_close(fp) != 0)
2435 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2442 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2446 /* Check if the output file name stored in the checkpoint file
2447 * is one of the output file names of mdrun.
2451 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2456 return (i < nfile && gmx_fexist(fnm_cp));
2459 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2460 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2461 gmx_large_int_t *cpt_step, t_commrec *cr,
2462 gmx_bool bAppendReq,
2463 int nfile, const t_filenm fnm[],
2464 const char *part_suffix, gmx_bool *bAddPart)
2467 gmx_large_int_t step = 0;
2469 /* This next line is nasty because the sub-structures of t_state
2470 * cannot be assumed to be zeroed (or even initialized in ways the
2471 * rest of the code might assume). Using snew would be better, but
2472 * this will all go away for 5.0. */
2475 gmx_file_position_t *outputfiles;
2478 char *fn, suf_up[STRLEN];
2484 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2486 *simulation_part = 0;
2490 init_state(&state, 0, 0, 0, 0, 0);
2492 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2493 &nfiles, &outputfiles);
2494 if (gmx_fio_close(fp) != 0)
2496 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2503 for (f = 0; f < nfiles; f++)
2505 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2510 if (nexist == nfiles)
2512 bAppend = bAppendReq;
2514 else if (nexist > 0)
2517 "Output file appending has been requested,\n"
2518 "but some output files listed in the checkpoint file %s\n"
2519 "are not present or are named differently by the current program:\n",
2521 fprintf(stderr, "output files present:");
2522 for (f = 0; f < nfiles; f++)
2524 if (exist_output_file(outputfiles[f].filename,
2527 fprintf(stderr, " %s", outputfiles[f].filename);
2530 fprintf(stderr, "\n");
2531 fprintf(stderr, "output files not present or named differently:");
2532 for (f = 0; f < nfiles; f++)
2534 if (!exist_output_file(outputfiles[f].filename,
2537 fprintf(stderr, " %s", outputfiles[f].filename);
2540 fprintf(stderr, "\n");
2542 gmx_fatal(FARGS, "File appending requested, but %d of the %d output files are not present or are named differently", nfiles-nexist, nfiles);
2550 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2552 fn = outputfiles[0].filename;
2553 if (strlen(fn) < 4 ||
2554 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2556 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2558 /* Set bAddPart to whether the suffix string '.part' is present
2559 * in the log file name.
2561 strcpy(suf_up, part_suffix);
2563 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2564 strstr(fn, suf_up) != NULL);
2572 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2574 if (*simulation_part > 0 && bAppendReq)
2576 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2577 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2580 if (NULL != cpt_step)