1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
61 #include "buildinfo.h"
67 #define CPT_MAGIC1 171817
68 #define CPT_MAGIC2 171819
69 #define CPTSTRLEN 1024
72 #define GMX_CPT_BUILD_DP 1
74 #define GMX_CPT_BUILD_DP 0
77 /* cpt_version should normally only be changed
78 * when the header of footer format changes.
79 * The state data format itself is backward and forward compatible.
80 * But old code can not read a new entry that is present in the file
81 * (but can read a new format when new entries are not present).
83 static const int cpt_version = 15;
86 const char *est_names[estNR] =
89 "box", "box-rel", "box-v", "pres_prev",
90 "nosehoover-xi", "thermostat-integral",
91 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
92 "disre_initf", "disre_rm3tav",
93 "orire_initf", "orire_Dtav",
94 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
98 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
101 const char *eeks_names[eeksNR] =
103 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
104 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
108 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
109 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
110 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
111 eenhENERGY_DELTA_H_NN,
112 eenhENERGY_DELTA_H_LIST,
113 eenhENERGY_DELTA_H_STARTTIME,
114 eenhENERGY_DELTA_H_STARTLAMBDA,
118 const char *eenh_names[eenhNR] =
120 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
121 "energy_sum_sim", "energy_nsum_sim",
122 "energy_nsteps", "energy_nsteps_sim",
124 "energy_delta_h_list",
125 "energy_delta_h_start_time",
126 "energy_delta_h_start_lambda"
129 /* free energy history variables -- need to be preserved over checkpoint */
131 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
132 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
134 /* free energy history variable names */
135 const char *edfh_names[edfhNR] =
137 "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
138 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
141 #ifdef GMX_NATIVE_WINDOWS
143 gmx_wintruncate(const char *filename, __int64 size)
146 /*we do this elsewhere*/
152 fp = fopen(filename, "rb+");
159 return _chsize_s( fileno(fp), size);
166 ecprREAL, ecprRVEC, ecprMATRIX
170 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
172 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
173 cptpEST - state variables.
174 cptpEEKS - Kinetic energy state variables.
175 cptpEENH - Energy history state variables.
176 cptpEDFH - free energy history variables.
180 static const char *st_names(int cptp, int ecpt)
184 case cptpEST: return est_names [ecpt]; break;
185 case cptpEEKS: return eeks_names[ecpt]; break;
186 case cptpEENH: return eenh_names[ecpt]; break;
187 case cptpEDFH: return edfh_names[ecpt]; break;
193 static void cp_warning(FILE *fp)
195 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
198 static void cp_error()
200 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
203 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
211 res = xdr_string(xd, s, CPTSTRLEN);
218 fprintf(list, "%s = %s\n", desc, *s);
223 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
227 res = xdr_int(xd, i);
234 fprintf(list, "%s = %d\n", desc, *i);
239 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
245 fprintf(list, "%s = ", desc);
247 for (j = 0; j < n && res; j++)
249 res &= xdr_u_char(xd, &i[j]);
252 fprintf(list, "%02x", i[j]);
267 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
269 if (do_cpt_int(xd, desc, i, list) < 0)
275 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_large_int_t *i, FILE *list)
278 char buf[STEPSTRSIZE];
280 res = xdr_gmx_large_int(xd, i);
287 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
291 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
295 res = xdr_double(xd, f);
302 fprintf(list, "%s = %f\n", desc, *f);
306 static void do_cpt_real_err(XDR *xd, real *f)
311 res = xdr_double(xd, f);
313 res = xdr_float(xd, f);
321 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
325 for (i = 0; i < n; i++)
327 for (j = 0; j < DIM; j++)
329 do_cpt_real_err(xd, &f[i][j]);
335 pr_rvecs(list, 0, desc, f, n);
339 /* If nval >= 0, nval is used; on read this should match the passed value.
340 * If nval n<0, *nptr is used; on read the value is stored in nptr
342 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
343 int nval, int *nptr, real **v,
344 FILE *list, int erealtype)
348 int dtc = xdr_datatype_float;
350 int dtc = xdr_datatype_double;
352 real *vp, *va = NULL;
367 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
372 res = xdr_int(xd, &nf);
383 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
392 res = xdr_int(xd, &dt);
399 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
400 st_names(cptp, ecpt), xdr_datatype_names[dtc],
401 xdr_datatype_names[dt]);
403 if (list || !(sflags & (1<<ecpt)))
416 if (dt == xdr_datatype_float)
418 if (dtc == xdr_datatype_float)
426 res = xdr_vector(xd, (char *)vf, nf,
427 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
432 if (dtc != xdr_datatype_float)
434 for (i = 0; i < nf; i++)
443 if (dtc == xdr_datatype_double)
451 res = xdr_vector(xd, (char *)vd, nf,
452 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
457 if (dtc != xdr_datatype_double)
459 for (i = 0; i < nf; i++)
472 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
475 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
478 gmx_incons("Unknown checkpoint real type");
490 /* This function stores n along with the reals for reading,
491 * but on reading it assumes that n matches the value in the checkpoint file,
492 * a fatal error is generated when this is not the case.
494 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
495 int n, real **v, FILE *list)
497 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
500 /* This function does the same as do_cpte_reals,
501 * except that on reading it ignores the passed value of *n
502 * and stored the value read from the checkpoint file in *n.
504 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
505 int *n, real **v, FILE *list)
507 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
510 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
515 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
518 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
519 int n, int **v, FILE *list)
522 int dtc = xdr_datatype_int;
527 res = xdr_int(xd, &nf);
532 if (list == NULL && v != NULL && nf != n)
534 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
537 res = xdr_int(xd, &dt);
544 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
545 st_names(cptp, ecpt), xdr_datatype_names[dtc],
546 xdr_datatype_names[dt]);
548 if (list || !(sflags & (1<<ecpt)) || v == NULL)
561 res = xdr_vector(xd, (char *)vp, nf,
562 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
569 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
579 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
582 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
585 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
586 int n, double **v, FILE *list)
589 int dtc = xdr_datatype_double;
590 double *vp, *va = NULL;
594 res = xdr_int(xd, &nf);
599 if (list == NULL && nf != n)
601 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
604 res = xdr_int(xd, &dt);
611 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
612 st_names(cptp, ecpt), xdr_datatype_names[dtc],
613 xdr_datatype_names[dt]);
615 if (list || !(sflags & (1<<ecpt)))
628 res = xdr_vector(xd, (char *)vp, nf,
629 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
636 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
646 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
647 double *r, FILE *list)
649 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
653 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
654 int n, rvec **v, FILE *list)
658 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
659 n*DIM, NULL, (real **)v, list, ecprRVEC);
662 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
663 matrix v, FILE *list)
668 vr = (real *)&(v[0][0]);
669 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
670 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
672 if (list && ret == 0)
674 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
681 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
682 int n, real **v, FILE *list)
687 char name[CPTSTRLEN];
694 for (i = 0; i < n; i++)
698 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
699 if (list && reti == 0)
701 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
702 pr_reals(list, 0, name, v[i], n);
712 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
713 int n, matrix **v, FILE *list)
716 matrix *vp, *va = NULL;
722 res = xdr_int(xd, &nf);
727 if (list == NULL && nf != n)
729 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
731 if (list || !(sflags & (1<<ecpt)))
744 snew(vr, nf*DIM*DIM);
745 for (i = 0; i < nf; i++)
747 for (j = 0; j < DIM; j++)
749 for (k = 0; k < DIM; k++)
751 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
755 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
756 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
757 for (i = 0; i < nf; i++)
759 for (j = 0; j < DIM; j++)
761 for (k = 0; k < DIM; k++)
763 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
769 if (list && ret == 0)
771 for (i = 0; i < nf; i++)
773 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
784 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
785 char **version, char **btime, char **buser, char **bhost,
787 char **fprog, char **ftime,
788 int *eIntegrator, int *simulation_part,
789 gmx_large_int_t *step, double *t,
790 int *nnodes, int *dd_nc, int *npme,
791 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
792 int *nlambda, int *flags_state,
793 int *flags_eks, int *flags_enh, int *flags_dfh,
811 res = xdr_int(xd, &magic);
814 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
816 if (magic != CPT_MAGIC1)
818 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
819 "The checkpoint file is corrupted or not a checkpoint file",
826 if (gethostname(fhost, 255) != 0)
828 sprintf(fhost, "unknown");
831 sprintf(fhost, "unknown");
834 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
835 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
836 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
837 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
838 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
839 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
840 *file_version = cpt_version;
841 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
842 if (*file_version > cpt_version)
844 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
846 if (*file_version >= 13)
848 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
854 if (*file_version >= 12)
856 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
862 do_cpt_int_err(xd, "#atoms", natoms, list);
863 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
864 if (*file_version >= 10)
866 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
872 if (*file_version >= 11)
874 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
880 if (*file_version >= 14)
882 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
888 do_cpt_int_err(xd, "integrator", eIntegrator, list);
889 if (*file_version >= 3)
891 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
895 *simulation_part = 1;
897 if (*file_version >= 5)
899 do_cpt_step_err(xd, "step", step, list);
903 do_cpt_int_err(xd, "step", &idum, list);
906 do_cpt_double_err(xd, "t", t, list);
907 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
909 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
910 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
911 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
912 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
913 do_cpt_int_err(xd, "state flags", flags_state, list);
914 if (*file_version >= 4)
916 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
917 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
922 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
923 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
924 (1<<(estORIRE_DTAV+2)) |
925 (1<<(estORIRE_DTAV+3))));
927 if (*file_version >= 14)
929 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
936 if (*file_version >= 15)
938 do_cpt_int_err(xd, "ED data sets", nED, list);
946 static int do_cpt_footer(XDR *xd, int file_version)
951 if (file_version >= 2)
954 res = xdr_int(xd, &magic);
959 if (magic != CPT_MAGIC2)
968 static int do_cpt_state(XDR *xd, gmx_bool bRead,
969 int fflags, t_state *state,
970 gmx_bool bReadRNG, FILE *list)
973 int **rng_p, **rngi_p;
980 nnht = state->nhchainlength*state->ngtc;
981 nnhtp = state->nhchainlength*state->nnhpres;
985 rng_p = (int **)&state->ld_rng;
986 rngi_p = &state->ld_rngi;
990 /* Do not read the RNG data */
995 if (bRead) /* we need to allocate space for dfhist if we are reading */
997 init_df_history(&state->dfhist,state->dfhist.nlambda);
1000 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1002 sflags = state->flags;
1003 for (i = 0; (i < estNR && ret == 0); i++)
1005 if (fflags & (1<<i))
1009 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1010 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1011 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1012 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1013 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1014 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1015 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1016 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1017 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1018 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1019 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1020 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1021 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1022 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1023 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1024 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1025 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1026 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1027 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1028 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1029 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1030 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1031 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1032 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1033 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1034 case estORIRE_DTAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1036 gmx_fatal(FARGS, "Unknown state entry %d\n"
1037 "You are probably reading a new checkpoint file with old code", i);
1045 static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
1053 for (i = 0; (i < eeksNR && ret == 0); i++)
1055 if (fflags & (1<<i))
1060 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1061 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1062 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1063 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1064 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1065 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1066 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1067 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1068 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1069 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1071 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1072 "You are probably reading a new checkpoint file with old code", i);
1081 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1082 int fflags, energyhistory_t *enerhist,
1093 enerhist->nsteps = 0;
1095 enerhist->nsteps_sim = 0;
1096 enerhist->nsum_sim = 0;
1097 enerhist->dht = NULL;
1099 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1101 snew(enerhist->dht, 1);
1102 enerhist->dht->ndh = NULL;
1103 enerhist->dht->dh = NULL;
1104 enerhist->dht->start_lambda_set = FALSE;
1108 for (i = 0; (i < eenhNR && ret == 0); i++)
1110 if (fflags & (1<<i))
1114 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1115 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1116 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1117 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1118 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1119 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1120 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1121 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1122 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1123 if (bRead) /* now allocate memory for it */
1125 snew(enerhist->dht->dh, enerhist->dht->nndh);
1126 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1127 for (j = 0; j < enerhist->dht->nndh; j++)
1129 enerhist->dht->ndh[j] = 0;
1130 enerhist->dht->dh[j] = NULL;
1134 case eenhENERGY_DELTA_H_LIST:
1135 for (j = 0; j < enerhist->dht->nndh; j++)
1137 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1140 case eenhENERGY_DELTA_H_STARTTIME:
1141 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1142 case eenhENERGY_DELTA_H_STARTLAMBDA:
1143 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1145 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1146 "You are probably reading a new checkpoint file with old code", i);
1151 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1153 /* Assume we have an old file format and copy sum to sum_sim */
1154 srenew(enerhist->ener_sum_sim, enerhist->nener);
1155 for (i = 0; i < enerhist->nener; i++)
1157 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1159 fflags |= (1<<eenhENERGY_SUM_SIM);
1162 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1163 !(fflags & (1<<eenhENERGY_NSTEPS)))
1165 /* Assume we have an old file format and copy nsum to nsteps */
1166 enerhist->nsteps = enerhist->nsum;
1167 fflags |= (1<<eenhENERGY_NSTEPS);
1169 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1170 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1172 /* Assume we have an old file format and copy nsum to nsteps */
1173 enerhist->nsteps_sim = enerhist->nsum_sim;
1174 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1180 static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
1185 nlambda = dfhist->nlambda;
1188 for (i = 0; (i < edfhNR && ret == 0); i++)
1190 if (fflags & (1<<i))
1194 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1195 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1196 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1197 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1198 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1199 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1200 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1201 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1202 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1203 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1204 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1205 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1206 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1207 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1210 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1211 "You are probably reading a new checkpoint file with old code", i);
1220 /* This function stores the last whole configuration of the reference and
1221 * average structure in the .cpt file
1223 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1224 edsamstate_t *EDstate, FILE *list)
1231 EDstate->bFromCpt = bRead;
1233 if (EDstate->nED <= 0)
1238 /* When reading, init_edsam has not been called yet,
1239 * so we have to allocate memory first. */
1242 snew(EDstate->nref, EDstate->nED);
1243 snew(EDstate->old_sref, EDstate->nED);
1244 snew(EDstate->nav, EDstate->nED);
1245 snew(EDstate->old_sav, EDstate->nED);
1248 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1249 for (i = 0; i < EDstate->nED; i++)
1251 /* Reference structure SREF */
1252 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1253 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1254 sprintf(buf, "ED%d x_ref", i+1);
1257 snew(EDstate->old_sref[i], EDstate->nref[i]);
1258 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1262 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1265 /* Average structure SAV */
1266 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1267 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1268 sprintf(buf, "ED%d x_av", i+1);
1271 snew(EDstate->old_sav[i], EDstate->nav[i]);
1272 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1276 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1284 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1285 gmx_file_position_t **p_outputfiles, int *nfiles,
1286 FILE *list, int file_version)
1290 gmx_off_t mask = 0xFFFFFFFFL;
1291 int offset_high, offset_low;
1293 gmx_file_position_t *outputfiles;
1295 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1302 snew(*p_outputfiles, *nfiles);
1305 outputfiles = *p_outputfiles;
1307 for (i = 0; i < *nfiles; i++)
1309 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1312 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1313 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1319 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1323 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1327 #if (SIZEOF_GMX_OFF_T > 4)
1328 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1330 outputfiles[i].offset = offset_low;
1335 buf = outputfiles[i].filename;
1336 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1338 offset = outputfiles[i].offset;
1346 #if (SIZEOF_GMX_OFF_T > 4)
1347 offset_low = (int) (offset & mask);
1348 offset_high = (int) ((offset >> 32) & mask);
1350 offset_low = offset;
1354 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1358 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1363 if (file_version >= 8)
1365 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1370 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1377 outputfiles[i].chksum_size = -1;
1384 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1385 FILE *fplog, t_commrec *cr,
1386 int eIntegrator, int simulation_part,
1387 gmx_bool bExpanded, int elamstats,
1388 gmx_large_int_t step, double t, t_state *state)
1398 char *fntemp; /* the temporary checkpoint file name */
1400 char timebuf[STRLEN];
1401 int nppnodes, npmenodes, flag_64bit;
1402 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1403 gmx_file_position_t *outputfiles;
1406 int flags_eks, flags_enh, flags_dfh, i;
1411 if (DOMAINDECOMP(cr))
1413 nppnodes = cr->dd->nnodes;
1414 npmenodes = cr->npmenodes;
1418 nppnodes = cr->nnodes;
1428 /* make the new temporary filename */
1429 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1431 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1432 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1433 strcat(fntemp, suffix);
1434 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1437 gmx_ctime_r(&now, timebuf, STRLEN);
1441 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1442 gmx_step_str(step, buf), timebuf);
1445 /* Get offsets for open files */
1446 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1448 fp = gmx_fio_open(fntemp, "w");
1450 if (state->ekinstate.bUpToDate)
1453 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1454 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1455 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1463 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1465 flags_enh |= (1<<eenhENERGY_N);
1466 if (state->enerhist.nsum > 0)
1468 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1469 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1471 if (state->enerhist.nsum_sim > 0)
1473 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1474 (1<<eenhENERGY_NSUM_SIM));
1476 if (state->enerhist.dht)
1478 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1479 (1<< eenhENERGY_DELTA_H_LIST) |
1480 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1481 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1487 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1488 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1491 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1493 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1495 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1496 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1504 /* We can check many more things now (CPU, acceleration, etc), but
1505 * it is highly unlikely to have two separate builds with exactly
1506 * the same version, user, time, and build host!
1509 version = gmx_strdup(VERSION);
1510 btime = gmx_strdup(BUILD_TIME);
1511 buser = gmx_strdup(BUILD_USER);
1512 bhost = gmx_strdup(BUILD_HOST);
1514 double_prec = GMX_CPT_BUILD_DP;
1515 fprog = gmx_strdup(Program());
1517 ftime = &(timebuf[0]);
1519 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1520 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1521 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1522 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1523 &state->natoms, &state->ngtc, &state->nnhpres,
1524 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1525 &state->edsamstate.nED,
1534 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1535 (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
1536 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1537 (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0) ||
1538 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1539 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1542 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1545 do_cpt_footer(gmx_fio_getxdr(fp), file_version);
1547 /* we really, REALLY, want to make sure to physically write the checkpoint,
1548 and all the files it depends on, out to disk. Because we've
1549 opened the checkpoint with gmx_fio_open(), it's in our list
1551 ret = gmx_fio_all_output_fsync();
1557 "Cannot fsync '%s'; maybe you are out of disk space?",
1558 gmx_fio_getname(ret));
1560 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1570 if (gmx_fio_close(fp) != 0)
1572 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1575 /* we don't move the checkpoint if the user specified they didn't want it,
1576 or if the fsyncs failed */
1577 if (!bNumberAndKeep && !ret)
1581 /* Rename the previous checkpoint file */
1583 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1584 strcat(buf, "_prev");
1585 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1587 /* we copy here so that if something goes wrong between now and
1588 * the rename below, there's always a state.cpt.
1589 * If renames are atomic (such as in POSIX systems),
1590 * this copying should be unneccesary.
1592 gmx_file_copy(fn, buf, FALSE);
1593 /* We don't really care if this fails:
1594 * there's already a new checkpoint.
1597 gmx_file_rename(fn, buf);
1600 if (gmx_file_rename(fntemp, fn) != 0)
1602 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1610 /*code for alternate checkpointing scheme. moved from top of loop over
1612 fcRequestCheckPoint();
1613 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1615 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1617 #endif /* end GMX_FAHCORE block */
1620 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1624 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1625 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1626 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1627 for (i = 0; i < estNR; i++)
1629 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1631 fprintf(fplog, " %24s %11s %11s\n",
1633 (sflags & (1<<i)) ? " present " : "not present",
1634 (fflags & (1<<i)) ? " present " : "not present");
1639 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1641 FILE *fp = fplog ? fplog : stderr;
1645 fprintf(fp, " %s mismatch,\n", type);
1646 fprintf(fp, " current program: %d\n", p);
1647 fprintf(fp, " checkpoint file: %d\n", f);
1653 static void check_string(FILE *fplog, const char *type, const char *p,
1654 const char *f, gmx_bool *mm)
1656 FILE *fp = fplog ? fplog : stderr;
1658 if (strcmp(p, f) != 0)
1660 fprintf(fp, " %s mismatch,\n", type);
1661 fprintf(fp, " current program: %s\n", p);
1662 fprintf(fp, " checkpoint file: %s\n", f);
1668 static void check_match(FILE *fplog,
1670 char *btime, char *buser, char *bhost, int double_prec,
1672 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1673 ivec dd_nc, ivec dd_nc_f)
1680 check_string(fplog, "Version", VERSION, version, &mm);
1681 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1682 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1683 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1684 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1685 check_string(fplog, "Program name", Program(), fprog, &mm);
1687 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1696 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1699 if (cr->npmenodes >= 0)
1701 npp -= cr->npmenodes;
1705 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1706 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1707 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1714 "Gromacs binary or parallel settings not identical to previous run.\n"
1715 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1716 fplog ? ",\n see the log file for details" : "");
1721 "Gromacs binary or parallel settings not identical to previous run.\n"
1722 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1727 static void read_checkpoint(const char *fn, FILE **pfplog,
1728 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1729 int eIntegrator, int *init_fep_state, gmx_large_int_t *step, double *t,
1730 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1731 int *simulation_part,
1732 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1737 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1739 char filename[STRLEN], buf[STEPSTRSIZE];
1740 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1742 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1745 gmx_file_position_t *outputfiles;
1747 t_fileio *chksum_file;
1748 FILE * fplog = *pfplog;
1749 unsigned char digest[16];
1750 #ifndef GMX_NATIVE_WINDOWS
1751 struct flock fl; /* don't initialize here: the struct order is OS
1755 const char *int_warn =
1756 "WARNING: The checkpoint file was generated with integrator %s,\n"
1757 " while the simulation uses integrator %s\n\n";
1758 const char *sd_note =
1759 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1760 " while the simulation uses %d SD or BD nodes,\n"
1761 " continuation will be exact, except for the random state\n\n";
1763 #ifndef GMX_NATIVE_WINDOWS
1764 fl.l_type = F_WRLCK;
1765 fl.l_whence = SEEK_SET;
1774 "read_checkpoint not (yet) supported with particle decomposition");
1777 fp = gmx_fio_open(fn, "r");
1778 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1779 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1780 &eIntegrator_f, simulation_part, step, t,
1781 &nppnodes_f, dd_nc_f, &npmenodes_f,
1782 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1783 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1784 &state->edsamstate.nED, NULL);
1786 if (bAppendOutputFiles &&
1787 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1789 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1792 if (cr == NULL || MASTER(cr))
1794 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1798 /* This will not be written if we do appending, since fplog is still NULL then */
1801 fprintf(fplog, "\n");
1802 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1803 fprintf(fplog, " file generated by: %s\n", fprog);
1804 fprintf(fplog, " file generated at: %s\n", ftime);
1805 fprintf(fplog, " GROMACS build time: %s\n", btime);
1806 fprintf(fplog, " GROMACS build user: %s\n", buser);
1807 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1808 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1809 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1810 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1811 fprintf(fplog, " time: %f\n", *t);
1812 fprintf(fplog, "\n");
1815 if (natoms != state->natoms)
1817 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1819 if (ngtc != state->ngtc)
1821 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1823 if (nnhpres != state->nnhpres)
1825 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1828 if (nlambda != state->dfhist.nlambda)
1830 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1833 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1834 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1836 if (eIntegrator_f != eIntegrator)
1840 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1842 if (bAppendOutputFiles)
1845 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1846 "Stopping the run to prevent you from ruining all your data...\n"
1847 "If you _really_ know what you are doing, try with the -noappend option.\n");
1851 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1860 else if (bPartDecomp)
1862 nppnodes = cr->nnodes;
1865 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1867 if (cr->npmenodes < 0)
1869 cr->npmenodes = npmenodes_f;
1871 nppnodes = cr->nnodes - cr->npmenodes;
1872 if (nppnodes == nppnodes_f)
1874 for (d = 0; d < DIM; d++)
1878 dd_nc[d] = dd_nc_f[d];
1885 /* The number of PP nodes has not been set yet */
1889 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1891 /* Correct the RNG state size for the number of PP nodes.
1892 * Such assignments should all be moved to one central function.
1894 state->nrng = nppnodes*gmx_rng_n();
1895 state->nrngi = nppnodes;
1899 if (fflags != state->flags)
1904 if (bAppendOutputFiles)
1907 "Output file appending requested, but input and checkpoint states are not identical.\n"
1908 "Stopping the run to prevent you from ruining all your data...\n"
1909 "You can try with the -noappend option, and get more info in the log file.\n");
1912 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1914 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1919 "WARNING: The checkpoint state entries do not match the simulation,\n"
1920 " see the log file for details\n\n");
1926 print_flag_mismatch(fplog, state->flags, fflags);
1931 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1932 nppnodes != nppnodes_f)
1937 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1941 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1946 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1947 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1950 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1951 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1952 Investigate for 5.0. */
1957 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
1962 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1963 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1965 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1966 flags_enh, &state->enerhist, NULL);
1972 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1978 if (file_version < 6)
1980 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1982 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1985 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1987 state->enerhist.nsum = *step;
1988 state->enerhist.nsum_sim = *step;
1991 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
1997 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2003 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2008 if (gmx_fio_close(fp) != 0)
2010 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2019 /* If the user wants to append to output files,
2020 * we use the file pointer positions of the output files stored
2021 * in the checkpoint file and truncate the files such that any frames
2022 * written after the checkpoint time are removed.
2023 * All files are md5sum checked such that we can be sure that
2024 * we do not truncate other (maybe imprortant) files.
2026 if (bAppendOutputFiles)
2028 if (fn2ftp(outputfiles[0].filename) != efLOG)
2030 /* make sure first file is log file so that it is OK to use it for
2033 gmx_fatal(FARGS, "The first output file should always be the log "
2034 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2036 for (i = 0; i < nfiles; i++)
2038 if (outputfiles[i].offset < 0)
2040 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2041 "is larger than 2 GB, but mdrun did not support large file"
2042 " offsets. Can not append. Run mdrun with -noappend",
2043 outputfiles[i].filename);
2046 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2049 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2054 /* Note that there are systems where the lock operation
2055 * will succeed, but a second process can also lock the file.
2056 * We should probably try to detect this.
2058 #ifndef GMX_NATIVE_WINDOWS
2059 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2062 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2065 if (errno == ENOSYS)
2069 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2073 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2076 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2080 else if (errno == EACCES || errno == EAGAIN)
2082 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2083 "simulation?", outputfiles[i].filename);
2087 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2088 outputfiles[i].filename, strerror(errno));
2093 /* compute md5 chksum */
2094 if (outputfiles[i].chksum_size != -1)
2096 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2097 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2099 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2100 outputfiles[i].chksum_size,
2101 outputfiles[i].filename);
2104 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2106 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2108 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2113 if (i == 0) /*open log file here - so that lock is never lifted
2114 after chksum is calculated */
2116 *pfplog = gmx_fio_getfp(chksum_file);
2120 gmx_fio_close(chksum_file);
2123 /* compare md5 chksum */
2124 if (outputfiles[i].chksum_size != -1 &&
2125 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2129 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2130 for (j = 0; j < 16; j++)
2132 fprintf(debug, "%02x", digest[j]);
2134 fprintf(debug, "\n");
2136 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2137 outputfiles[i].filename);
2142 if (i != 0) /*log file is already seeked to correct position */
2144 #ifdef GMX_NATIVE_WINDOWS
2145 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2147 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2151 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2161 void load_checkpoint(const char *fn, FILE **fplog,
2162 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2163 t_inputrec *ir, t_state *state,
2164 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2165 gmx_bool bAppend, gmx_bool bForceAppend)
2167 gmx_large_int_t step;
2172 /* Read the state from the checkpoint file */
2173 read_checkpoint(fn, fplog,
2174 cr, bPartDecomp, dd_nc,
2175 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2176 &ir->simulation_part, bAppend, bForceAppend);
2180 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2181 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2182 gmx_bcast(sizeof(step), &step, cr);
2183 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2184 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2186 ir->bContinuation = TRUE;
2187 if (ir->nsteps >= 0)
2189 ir->nsteps += ir->init_step - step;
2191 ir->init_step = step;
2192 ir->simulation_part += 1;
2195 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2196 gmx_large_int_t *step, double *t, t_state *state,
2198 int *nfiles, gmx_file_position_t **outputfiles)
2201 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2206 int flags_eks, flags_enh, flags_dfh;
2208 gmx_file_position_t *files_loc = NULL;
2211 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2212 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2213 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2214 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2215 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2216 &state->edsamstate.nED, NULL);
2218 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2223 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
2228 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2229 flags_enh, &state->enerhist, NULL);
2234 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
2240 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2246 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2247 outputfiles != NULL ? outputfiles : &files_loc,
2248 outputfiles != NULL ? nfiles : &nfiles_loc,
2249 NULL, file_version);
2250 if (files_loc != NULL)
2260 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2274 read_checkpoint_state(const char *fn, int *simulation_part,
2275 gmx_large_int_t *step, double *t, t_state *state)
2279 fp = gmx_fio_open(fn, "r");
2280 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2281 if (gmx_fio_close(fp) != 0)
2283 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2287 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2289 /* This next line is nasty because the sub-structures of t_state
2290 * cannot be assumed to be zeroed (or even initialized in ways the
2291 * rest of the code might assume). Using snew would be better, but
2292 * this will all go away for 5.0. */
2294 int simulation_part;
2295 gmx_large_int_t step;
2298 init_state(&state, 0, 0, 0, 0, 0);
2300 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2302 fr->natoms = state.natoms;
2305 fr->step = gmx_large_int_to_int(step,
2306 "conversion of checkpoint to trajectory");
2310 fr->lambda = state.lambda[efptFEP];
2311 fr->fep_state = state.fep_state;
2313 fr->bX = (state.flags & (1<<estX));
2319 fr->bV = (state.flags & (1<<estV));
2326 fr->bBox = (state.flags & (1<<estBOX));
2329 copy_mat(state.box, fr->box);
2334 void list_checkpoint(const char *fn, FILE *out)
2338 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2340 int eIntegrator, simulation_part, nppnodes, npme;
2341 gmx_large_int_t step;
2345 int flags_eks, flags_enh, flags_dfh;
2349 gmx_file_position_t *outputfiles;
2352 init_state(&state, -1, -1, -1, -1, 0);
2354 fp = gmx_fio_open(fn, "r");
2355 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2356 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2357 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2358 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2359 &(state.dfhist.nlambda), &state.flags,
2360 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2361 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2366 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
2371 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2372 flags_enh, &state.enerhist, out);
2376 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
2377 flags_dfh, &state.dfhist, out);
2382 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2387 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2392 ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
2399 if (gmx_fio_close(fp) != 0)
2401 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2408 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2412 /* Check if the output file name stored in the checkpoint file
2413 * is one of the output file names of mdrun.
2417 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2422 return (i < nfile && gmx_fexist(fnm_cp));
2425 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2426 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2427 gmx_large_int_t *cpt_step, t_commrec *cr,
2428 gmx_bool bAppendReq,
2429 int nfile, const t_filenm fnm[],
2430 const char *part_suffix, gmx_bool *bAddPart)
2433 gmx_large_int_t step = 0;
2435 /* This next line is nasty because the sub-structures of t_state
2436 * cannot be assumed to be zeroed (or even initialized in ways the
2437 * rest of the code might assume). Using snew would be better, but
2438 * this will all go away for 5.0. */
2441 gmx_file_position_t *outputfiles;
2444 char *fn, suf_up[STRLEN];
2450 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2452 *simulation_part = 0;
2456 init_state(&state, 0, 0, 0, 0, 0);
2458 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2459 &nfiles, &outputfiles);
2460 if (gmx_fio_close(fp) != 0)
2462 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2469 for (f = 0; f < nfiles; f++)
2471 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2476 if (nexist == nfiles)
2478 bAppend = bAppendReq;
2480 else if (nexist > 0)
2483 "Output file appending has been requested,\n"
2484 "but some output files listed in the checkpoint file %s\n"
2485 "are not present or are named differently by the current program:\n",
2487 fprintf(stderr, "output files present:");
2488 for (f = 0; f < nfiles; f++)
2490 if (exist_output_file(outputfiles[f].filename,
2493 fprintf(stderr, " %s", outputfiles[f].filename);
2496 fprintf(stderr, "\n");
2497 fprintf(stderr, "output files not present or named differently:");
2498 for (f = 0; f < nfiles; f++)
2500 if (!exist_output_file(outputfiles[f].filename,
2503 fprintf(stderr, " %s", outputfiles[f].filename);
2506 fprintf(stderr, "\n");
2508 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2516 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2518 fn = outputfiles[0].filename;
2519 if (strlen(fn) < 4 ||
2520 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2522 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2524 /* Set bAddPart to whether the suffix string '.part' is present
2525 * in the log file name.
2527 strcpy(suf_up, part_suffix);
2529 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2530 strstr(fn, suf_up) != NULL);
2538 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2540 if (*simulation_part > 0 && bAppendReq)
2542 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2543 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2546 if (NULL != cpt_step)