1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
61 #include "buildinfo.h"
68 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
70 gmx_ctime_r(const time_t *clock, char *buf, int n);
73 #define CPT_MAGIC1 171817
74 #define CPT_MAGIC2 171819
75 #define CPTSTRLEN 1024
78 #define GMX_CPT_BUILD_DP 1
80 #define GMX_CPT_BUILD_DP 0
83 /* cpt_version should normally only be changed
84 * when the header of footer format changes.
85 * The state data format itself is backward and forward compatible.
86 * But old code can not read a new entry that is present in the file
87 * (but can read a new format when new entries are not present).
89 static const int cpt_version = 15;
92 const char *est_names[estNR] =
95 "box", "box-rel", "box-v", "pres_prev",
96 "nosehoover-xi", "thermostat-integral",
97 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
98 "disre_initf", "disre_rm3tav",
99 "orire_initf", "orire_Dtav",
100 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
104 eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
107 const char *eeks_names[eeksNR] =
109 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
110 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
114 eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
115 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
116 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
117 eenhENERGY_DELTA_H_NN,
118 eenhENERGY_DELTA_H_LIST,
119 eenhENERGY_DELTA_H_STARTTIME,
120 eenhENERGY_DELTA_H_STARTLAMBDA,
124 const char *eenh_names[eenhNR] =
126 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
127 "energy_sum_sim", "energy_nsum_sim",
128 "energy_nsteps", "energy_nsteps_sim",
130 "energy_delta_h_list",
131 "energy_delta_h_start_time",
132 "energy_delta_h_start_lambda"
135 /* free energy history variables -- need to be preserved over checkpoint */
137 edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
138 edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
140 /* free energy history variable names */
141 const char *edfh_names[edfhNR] =
143 "bEquilibrated", "N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar", "variance",
144 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
147 #ifdef GMX_NATIVE_WINDOWS
149 gmx_wintruncate(const char *filename, __int64 size)
152 /*we do this elsewhere*/
158 fp = fopen(filename, "rb+");
165 return _chsize_s( fileno(fp), size);
172 ecprREAL, ecprRVEC, ecprMATRIX
176 cptpEST, cptpEEKS, cptpEENH, cptpEDFH
178 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
179 cptpEST - state variables.
180 cptpEEKS - Kinetic energy state variables.
181 cptpEENH - Energy history state variables.
182 cptpEDFH - free energy history variables.
186 static const char *st_names(int cptp, int ecpt)
190 case cptpEST: return est_names [ecpt]; break;
191 case cptpEEKS: return eeks_names[ecpt]; break;
192 case cptpEENH: return eenh_names[ecpt]; break;
193 case cptpEDFH: return edfh_names[ecpt]; break;
199 static void cp_warning(FILE *fp)
201 fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
204 static void cp_error()
206 gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
209 static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
217 res = xdr_string(xd, s, CPTSTRLEN);
224 fprintf(list, "%s = %s\n", desc, *s);
229 static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
233 res = xdr_int(xd, i);
240 fprintf(list, "%s = %d\n", desc, *i);
245 static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
251 fprintf(list, "%s = ", desc);
253 for (j = 0; j < n && res; j++)
255 res &= xdr_u_char(xd, &i[j]);
258 fprintf(list, "%02x", i[j]);
273 static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
275 if (do_cpt_int(xd, desc, i, list) < 0)
281 static void do_cpt_step_err(XDR *xd, const char *desc, gmx_large_int_t *i, FILE *list)
284 char buf[STEPSTRSIZE];
286 res = xdr_gmx_large_int(xd, i, "reading checkpoint file");
293 fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
297 static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
301 res = xdr_double(xd, f);
308 fprintf(list, "%s = %f\n", desc, *f);
312 static void do_cpt_real_err(XDR *xd, const char *desc, real *f)
317 res = xdr_double(xd, f);
319 res = xdr_float(xd, f);
327 static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
331 for (i = 0; i < n; i++)
333 for (j = 0; j < DIM; j++)
335 do_cpt_real_err(xd, desc, &f[i][j]);
341 pr_rvecs(list, 0, desc, f, n);
345 /* If nval >= 0, nval is used; on read this should match the passed value.
346 * If nval n<0, *nptr is used; on read the value is stored in nptr
348 static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
349 int nval, int *nptr, real **v,
350 FILE *list, int erealtype)
354 int dtc = xdr_datatype_float;
356 int dtc = xdr_datatype_double;
358 real *vp, *va = NULL;
373 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
378 res = xdr_int(xd, &nf);
389 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
398 res = xdr_int(xd, &dt);
405 fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
406 st_names(cptp, ecpt), xdr_datatype_names[dtc],
407 xdr_datatype_names[dt]);
409 if (list || !(sflags & (1<<ecpt)))
422 if (dt == xdr_datatype_float)
424 if (dtc == xdr_datatype_float)
432 res = xdr_vector(xd, (char *)vf, nf,
433 (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
438 if (dtc != xdr_datatype_float)
440 for (i = 0; i < nf; i++)
449 if (dtc == xdr_datatype_double)
457 res = xdr_vector(xd, (char *)vd, nf,
458 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
463 if (dtc != xdr_datatype_double)
465 for (i = 0; i < nf; i++)
478 pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
481 pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
484 gmx_incons("Unknown checkpoint real type");
496 /* This function stores n along with the reals for reading,
497 * but on reading it assumes that n matches the value in the checkpoint file,
498 * a fatal error is generated when this is not the case.
500 static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
501 int n, real **v, FILE *list)
503 return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
506 /* This function does the same as do_cpte_reals,
507 * except that on reading it ignores the passed value of *n
508 * and stored the value read from the checkpoint file in *n.
510 static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
511 int *n, real **v, FILE *list)
513 return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
516 static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
521 return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
524 static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
525 int n, int **v, FILE *list)
528 int dtc = xdr_datatype_int;
533 res = xdr_int(xd, &nf);
538 if (list == NULL && v != NULL && nf != n)
540 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
543 res = xdr_int(xd, &dt);
550 gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
551 st_names(cptp, ecpt), xdr_datatype_names[dtc],
552 xdr_datatype_names[dt]);
554 if (list || !(sflags & (1<<ecpt)) || v == NULL)
567 res = xdr_vector(xd, (char *)vp, nf,
568 (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
575 pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
585 static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
588 return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
591 static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
592 int n, double **v, FILE *list)
595 int dtc = xdr_datatype_double;
596 double *vp, *va = NULL;
600 res = xdr_int(xd, &nf);
605 if (list == NULL && nf != n)
607 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
610 res = xdr_int(xd, &dt);
617 gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
618 st_names(cptp, ecpt), xdr_datatype_names[dtc],
619 xdr_datatype_names[dt]);
621 if (list || !(sflags & (1<<ecpt)))
634 res = xdr_vector(xd, (char *)vp, nf,
635 (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
642 pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
652 static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
653 double *r, FILE *list)
655 return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
659 static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
660 int n, rvec **v, FILE *list)
664 return do_cpte_reals_low(xd, cptp, ecpt, sflags,
665 n*DIM, NULL, (real **)v, list, ecprRVEC);
668 static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
669 matrix v, FILE *list)
674 vr = (real *)&(v[0][0]);
675 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
676 DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
678 if (list && ret == 0)
680 pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
687 static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
688 int n, real **v, FILE *list)
693 char name[CPTSTRLEN];
700 for (i = 0; i < n; i++)
704 reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
705 if (list && reti == 0)
707 sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
708 pr_reals(list, 0, name, v[i], n);
718 static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
719 int n, matrix **v, FILE *list)
722 matrix *vp, *va = NULL;
728 res = xdr_int(xd, &nf);
733 if (list == NULL && nf != n)
735 gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
737 if (list || !(sflags & (1<<ecpt)))
750 snew(vr, nf*DIM*DIM);
751 for (i = 0; i < nf; i++)
753 for (j = 0; j < DIM; j++)
755 for (k = 0; k < DIM; k++)
757 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
761 ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
762 nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
763 for (i = 0; i < nf; i++)
765 for (j = 0; j < DIM; j++)
767 for (k = 0; k < DIM; k++)
769 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
775 if (list && ret == 0)
777 for (i = 0; i < nf; i++)
779 pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
790 static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
791 char **version, char **btime, char **buser, char **bhost,
793 char **fprog, char **ftime,
794 int *eIntegrator, int *simulation_part,
795 gmx_large_int_t *step, double *t,
796 int *nnodes, int *dd_nc, int *npme,
797 int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
798 int *nlambda, int *flags_state,
799 int *flags_eks, int *flags_enh, int *flags_dfh,
817 res = xdr_int(xd, &magic);
820 gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
822 if (magic != CPT_MAGIC1)
824 gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
825 "The checkpoint file is corrupted or not a checkpoint file",
832 if (gethostname(fhost, 255) != 0)
834 sprintf(fhost, "unknown");
837 sprintf(fhost, "unknown");
840 do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
841 do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
842 do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
843 do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
844 do_cpt_string_err(xd, bRead, "generating program", fprog, list);
845 do_cpt_string_err(xd, bRead, "generation time", ftime, list);
846 *file_version = cpt_version;
847 do_cpt_int_err(xd, "checkpoint file version", file_version, list);
848 if (*file_version > cpt_version)
850 gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
852 if (*file_version >= 13)
854 do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
860 if (*file_version >= 12)
862 do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
868 do_cpt_int_err(xd, "#atoms", natoms, list);
869 do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
870 if (*file_version >= 10)
872 do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
878 if (*file_version >= 11)
880 do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
886 if (*file_version >= 14)
888 do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
894 do_cpt_int_err(xd, "integrator", eIntegrator, list);
895 if (*file_version >= 3)
897 do_cpt_int_err(xd, "simulation part #", simulation_part, list);
901 *simulation_part = 1;
903 if (*file_version >= 5)
905 do_cpt_step_err(xd, "step", step, list);
909 do_cpt_int_err(xd, "step", &idum, list);
912 do_cpt_double_err(xd, "t", t, list);
913 do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
915 do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
916 do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
917 do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
918 do_cpt_int_err(xd, "#PME-only nodes", npme, list);
919 do_cpt_int_err(xd, "state flags", flags_state, list);
920 if (*file_version >= 4)
922 do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
923 do_cpt_int_err(xd, "energy history flags", flags_enh, list);
928 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
929 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
930 (1<<(estORIRE_DTAV+2)) |
931 (1<<(estORIRE_DTAV+3))));
933 if (*file_version >= 14)
935 do_cpt_int_err(xd, "df history flags", flags_dfh, list);
942 if (*file_version >= 15)
944 do_cpt_int_err(xd, "ED data sets", nED, list);
952 static int do_cpt_footer(XDR *xd, gmx_bool bRead, int file_version)
957 if (file_version >= 2)
960 res = xdr_int(xd, &magic);
965 if (magic != CPT_MAGIC2)
974 static int do_cpt_state(XDR *xd, gmx_bool bRead,
975 int fflags, t_state *state,
976 gmx_bool bReadRNG, FILE *list)
979 int **rng_p, **rngi_p;
986 nnht = state->nhchainlength*state->ngtc;
987 nnhtp = state->nhchainlength*state->nnhpres;
991 rng_p = (int **)&state->ld_rng;
992 rngi_p = &state->ld_rngi;
996 /* Do not read the RNG data */
1000 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1002 sflags = state->flags;
1003 for (i = 0; (i < estNR && ret == 0); i++)
1005 if (fflags & (1<<i))
1009 case estLAMBDA: ret = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
1010 case estFEPSTATE: ret = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
1011 case estBOX: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
1012 case estBOX_REL: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
1013 case estBOXV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
1014 case estPRES_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
1015 case estSVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
1016 case estFVIR_PREV: ret = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
1017 case estNH_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
1018 case estNH_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
1019 case estNHPRES_XI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
1020 case estNHPRES_VXI: ret = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
1021 case estTC_INT: ret = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
1022 case estVETA: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
1023 case estVOL0: ret = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
1024 case estX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
1025 case estV: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
1026 case estSDX: ret = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
1027 case estLD_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
1028 case estLD_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
1029 case estMC_RNG: ret = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
1030 case estMC_RNGI: ret = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
1031 case estDISRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
1032 case estDISRE_RM3TAV: ret = do_cpte_reals(xd, cptpEST, i, sflags, state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
1033 case estORIRE_INITF: ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
1034 case estORIRE_DTAV: ret = do_cpte_reals(xd, cptpEST, i, sflags, state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
1036 gmx_fatal(FARGS, "Unknown state entry %d\n"
1037 "You are probably reading a new checkpoint file with old code", i);
1045 static int do_cpt_ekinstate(XDR *xd, gmx_bool bRead,
1046 int fflags, ekinstate_t *ekins,
1054 for (i = 0; (i < eeksNR && ret == 0); i++)
1056 if (fflags & (1<<i))
1061 case eeksEKIN_N: ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
1062 case eeksEKINH: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
1063 case eeksEKINF: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
1064 case eeksEKINO: ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
1065 case eeksEKINTOTAL: ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
1066 case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
1067 case eeksVSCALE: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
1068 case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
1069 case eeksDEKINDL: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
1070 case eeksMVCOS: ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
1072 gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
1073 "You are probably reading a new checkpoint file with old code", i);
1082 static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
1083 int fflags, energyhistory_t *enerhist,
1094 enerhist->nsteps = 0;
1096 enerhist->nsteps_sim = 0;
1097 enerhist->nsum_sim = 0;
1098 enerhist->dht = NULL;
1100 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1102 snew(enerhist->dht, 1);
1103 enerhist->dht->ndh = NULL;
1104 enerhist->dht->dh = NULL;
1105 enerhist->dht->start_lambda_set = FALSE;
1109 for (i = 0; (i < eenhNR && ret == 0); i++)
1111 if (fflags & (1<<i))
1115 case eenhENERGY_N: ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
1116 case eenhENERGY_AVER: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
1117 case eenhENERGY_SUM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
1118 case eenhENERGY_NSUM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
1119 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
1120 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
1121 case eenhENERGY_NSTEPS: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
1122 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
1123 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
1124 if (bRead) /* now allocate memory for it */
1126 snew(enerhist->dht->dh, enerhist->dht->nndh);
1127 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1128 for (j = 0; j < enerhist->dht->nndh; j++)
1130 enerhist->dht->ndh[j] = 0;
1131 enerhist->dht->dh[j] = NULL;
1135 case eenhENERGY_DELTA_H_LIST:
1136 for (j = 0; j < enerhist->dht->nndh; j++)
1138 ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1141 case eenhENERGY_DELTA_H_STARTTIME:
1142 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1143 case eenhENERGY_DELTA_H_STARTLAMBDA:
1144 ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1146 gmx_fatal(FARGS, "Unknown energy history entry %d\n"
1147 "You are probably reading a new checkpoint file with old code", i);
1152 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1154 /* Assume we have an old file format and copy sum to sum_sim */
1155 srenew(enerhist->ener_sum_sim, enerhist->nener);
1156 for (i = 0; i < enerhist->nener; i++)
1158 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1160 fflags |= (1<<eenhENERGY_SUM_SIM);
1163 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1164 !(fflags & (1<<eenhENERGY_NSTEPS)))
1166 /* Assume we have an old file format and copy nsum to nsteps */
1167 enerhist->nsteps = enerhist->nsum;
1168 fflags |= (1<<eenhENERGY_NSTEPS);
1170 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1171 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1173 /* Assume we have an old file format and copy nsum to nsteps */
1174 enerhist->nsteps_sim = enerhist->nsum_sim;
1175 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1181 static int do_cpt_df_hist(XDR *xd, gmx_bool bRead, int fflags, df_history_t *dfhist, FILE *list)
1186 nlambda = dfhist->nlambda;
1189 for (i = 0; (i < edfhNR && ret == 0); i++)
1191 if (fflags & (1<<i))
1195 case edfhBEQUIL: ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
1196 case edfhNATLAMBDA: ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
1197 case edfhWLHISTO: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
1198 case edfhWLDELTA: ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
1199 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
1200 case edfhSUMDG: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
1201 case edfhSUMMINVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
1202 case edfhSUMVAR: ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
1203 case edfhACCUMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
1204 case edfhACCUMM: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
1205 case edfhACCUMP2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
1206 case edfhACCUMM2: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
1207 case edfhTIJ: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
1208 case edfhTIJEMP: ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
1211 gmx_fatal(FARGS, "Unknown df history entry %d\n"
1212 "You are probably reading a new checkpoint file with old code", i);
1221 /* This function stores the last whole configuration of the reference and
1222 * average structure in the .cpt file
1224 static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
1225 edsamstate_t *EDstate, FILE *list)
1232 EDstate->bFromCpt = bRead;
1234 if (EDstate->nED <= 0)
1239 /* When reading, init_edsam has not been called yet,
1240 * so we have to allocate memory first. */
1243 snew(EDstate->nref, EDstate->nED);
1244 snew(EDstate->old_sref, EDstate->nED);
1245 snew(EDstate->nav, EDstate->nED);
1246 snew(EDstate->old_sav, EDstate->nED);
1249 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1250 for (i = 0; i < EDstate->nED; i++)
1252 /* Reference structure SREF */
1253 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1254 do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
1255 sprintf(buf, "ED%d x_ref", i+1);
1258 snew(EDstate->old_sref[i], EDstate->nref[i]);
1259 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1263 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1266 /* Average structure SAV */
1267 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1268 do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
1269 sprintf(buf, "ED%d x_av", i+1);
1272 snew(EDstate->old_sav[i], EDstate->nav[i]);
1273 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1277 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1285 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1286 gmx_file_position_t **p_outputfiles, int *nfiles,
1287 FILE *list, int file_version)
1291 gmx_off_t mask = 0xFFFFFFFFL;
1292 int offset_high, offset_low;
1294 gmx_file_position_t *outputfiles;
1296 if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
1303 snew(*p_outputfiles, *nfiles);
1306 outputfiles = *p_outputfiles;
1308 for (i = 0; i < *nfiles; i++)
1310 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1313 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1314 strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
1320 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1324 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1328 #if (SIZEOF_GMX_OFF_T > 4)
1329 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1331 outputfiles[i].offset = offset_low;
1336 buf = outputfiles[i].filename;
1337 do_cpt_string_err(xd, bRead, "output filename", &buf, list);
1339 offset = outputfiles[i].offset;
1347 #if (SIZEOF_GMX_OFF_T > 4)
1348 offset_low = (int) (offset & mask);
1349 offset_high = (int) ((offset >> 32) & mask);
1351 offset_low = offset;
1355 if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
1359 if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
1364 if (file_version >= 8)
1366 if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
1371 if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
1378 outputfiles[i].chksum_size = -1;
1385 void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
1386 FILE *fplog, t_commrec *cr,
1387 int eIntegrator, int simulation_part,
1388 gmx_bool bExpanded, int elamstats,
1389 gmx_large_int_t step, double t, t_state *state)
1399 char *fntemp; /* the temporary checkpoint file name */
1401 char timebuf[STRLEN];
1402 int nppnodes, npmenodes, flag_64bit;
1403 char buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
1404 gmx_file_position_t *outputfiles;
1407 int flags_eks, flags_enh, flags_dfh, i;
1412 if (DOMAINDECOMP(cr))
1414 nppnodes = cr->dd->nnodes;
1415 npmenodes = cr->npmenodes;
1419 nppnodes = cr->nnodes;
1429 /* make the new temporary filename */
1430 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1432 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1433 sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
1434 strcat(fntemp, suffix);
1435 strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1438 gmx_ctime_r(&now, timebuf, STRLEN);
1442 fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
1443 gmx_step_str(step, buf), timebuf);
1446 /* Get offsets for open files */
1447 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1449 fp = gmx_fio_open(fntemp, "w");
1451 if (state->ekinstate.bUpToDate)
1454 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1455 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1456 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1464 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1466 flags_enh |= (1<<eenhENERGY_N);
1467 if (state->enerhist.nsum > 0)
1469 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1470 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1472 if (state->enerhist.nsum_sim > 0)
1474 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1475 (1<<eenhENERGY_NSUM_SIM));
1477 if (state->enerhist.dht)
1479 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1480 (1<< eenhENERGY_DELTA_H_LIST) |
1481 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1482 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1488 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1489 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1492 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1494 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1496 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1497 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1505 /* We can check many more things now (CPU, acceleration, etc), but
1506 * it is highly unlikely to have two separate builds with exactly
1507 * the same version, user, time, and build host!
1510 version = gmx_strdup(VERSION);
1511 btime = gmx_strdup(BUILD_TIME);
1512 buser = gmx_strdup(BUILD_USER);
1513 bhost = gmx_strdup(BUILD_HOST);
1515 double_prec = GMX_CPT_BUILD_DP;
1516 fprog = gmx_strdup(Program());
1518 ftime = &(timebuf[0]);
1520 do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
1521 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1522 &eIntegrator, &simulation_part, &step, &t, &nppnodes,
1523 DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
1524 &state->natoms, &state->ngtc, &state->nnhpres,
1525 &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
1526 &state->edsamstate.nED,
1535 if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0) ||
1536 (do_cpt_ekinstate(gmx_fio_getxdr(fp), FALSE, flags_eks, &state->ekinstate, NULL) < 0) ||
1537 (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0) ||
1538 (do_cpt_df_hist(gmx_fio_getxdr(fp), FALSE, flags_dfh, &state->dfhist, NULL) < 0) ||
1539 (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0) ||
1540 (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
1543 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1546 do_cpt_footer(gmx_fio_getxdr(fp), FALSE, file_version);
1548 /* we really, REALLY, want to make sure to physically write the checkpoint,
1549 and all the files it depends on, out to disk. Because we've
1550 opened the checkpoint with gmx_fio_open(), it's in our list
1552 ret = gmx_fio_all_output_fsync();
1558 "Cannot fsync '%s'; maybe you are out of disk space?",
1559 gmx_fio_getname(ret));
1561 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
1571 if (gmx_fio_close(fp) != 0)
1573 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1576 /* we don't move the checkpoint if the user specified they didn't want it,
1577 or if the fsyncs failed */
1578 if (!bNumberAndKeep && !ret)
1582 /* Rename the previous checkpoint file */
1584 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1585 strcat(buf, "_prev");
1586 strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1588 /* we copy here so that if something goes wrong between now and
1589 * the rename below, there's always a state.cpt.
1590 * If renames are atomic (such as in POSIX systems),
1591 * this copying should be unneccesary.
1593 gmx_file_copy(fn, buf, FALSE);
1594 /* We don't really care if this fails:
1595 * there's already a new checkpoint.
1598 gmx_file_rename(fn, buf);
1601 if (gmx_file_rename(fntemp, fn) != 0)
1603 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1611 /*code for alternate checkpointing scheme. moved from top of loop over
1613 fcRequestCheckPoint();
1614 if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
1616 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
1618 #endif /* end GMX_FAHCORE block */
1621 static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
1625 fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
1626 fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
1627 fprintf(fplog, " %24s %11s %11s\n", "", "simulation", "checkpoint");
1628 for (i = 0; i < estNR; i++)
1630 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1632 fprintf(fplog, " %24s %11s %11s\n",
1634 (sflags & (1<<i)) ? " present " : "not present",
1635 (fflags & (1<<i)) ? " present " : "not present");
1640 static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
1642 FILE *fp = fplog ? fplog : stderr;
1646 fprintf(fp, " %s mismatch,\n", type);
1647 fprintf(fp, " current program: %d\n", p);
1648 fprintf(fp, " checkpoint file: %d\n", f);
1654 static void check_string(FILE *fplog, const char *type, const char *p,
1655 const char *f, gmx_bool *mm)
1657 FILE *fp = fplog ? fplog : stderr;
1659 if (strcmp(p, f) != 0)
1661 fprintf(fp, " %s mismatch,\n", type);
1662 fprintf(fp, " current program: %s\n", p);
1663 fprintf(fp, " checkpoint file: %s\n", f);
1669 static void check_match(FILE *fplog,
1671 char *btime, char *buser, char *bhost, int double_prec,
1673 t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
1674 ivec dd_nc, ivec dd_nc_f)
1681 check_string(fplog, "Version", VERSION, version, &mm);
1682 check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
1683 check_string(fplog, "Build user", BUILD_USER, buser, &mm);
1684 check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
1685 check_int (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
1686 check_string(fplog, "Program name", Program(), fprog, &mm);
1688 check_int (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
1697 check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
1700 if (cr->npmenodes >= 0)
1702 npp -= cr->npmenodes;
1706 check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
1707 check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
1708 check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
1715 "Gromacs binary or parallel settings not identical to previous run.\n"
1716 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1717 fplog ? ",\n see the log file for details" : "");
1722 "Gromacs binary or parallel settings not identical to previous run.\n"
1723 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1728 static void read_checkpoint(const char *fn, FILE **pfplog,
1729 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
1730 int eIntegrator, int *init_fep_state, gmx_large_int_t *step, double *t,
1731 t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
1732 int *simulation_part,
1733 gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
1738 char *version, *btime, *buser, *bhost, *fprog, *ftime;
1740 char filename[STRLEN], buf[STEPSTRSIZE];
1741 int nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
1743 int natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
1746 gmx_file_position_t *outputfiles;
1748 t_fileio *chksum_file;
1749 FILE * fplog = *pfplog;
1750 unsigned char digest[16];
1751 #ifndef GMX_NATIVE_WINDOWS
1752 struct flock fl; /* don't initialize here: the struct order is OS
1756 const char *int_warn =
1757 "WARNING: The checkpoint file was generated with integrator %s,\n"
1758 " while the simulation uses integrator %s\n\n";
1759 const char *sd_note =
1760 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1761 " while the simulation uses %d SD or BD nodes,\n"
1762 " continuation will be exact, except for the random state\n\n";
1764 #ifndef GMX_NATIVE_WINDOWS
1765 fl.l_type = F_WRLCK;
1766 fl.l_whence = SEEK_SET;
1775 "read_checkpoint not (yet) supported with particle decomposition");
1778 fp = gmx_fio_open(fn, "r");
1779 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
1780 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
1781 &eIntegrator_f, simulation_part, step, t,
1782 &nppnodes_f, dd_nc_f, &npmenodes_f,
1783 &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
1784 &fflags, &flags_eks, &flags_enh, &flags_dfh,
1785 &state->edsamstate.nED, NULL);
1787 if (bAppendOutputFiles &&
1788 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1790 gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1793 if (cr == NULL || MASTER(cr))
1795 fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
1799 /* This will not be written if we do appending, since fplog is still NULL then */
1802 fprintf(fplog, "\n");
1803 fprintf(fplog, "Reading checkpoint file %s\n", fn);
1804 fprintf(fplog, " file generated by: %s\n", fprog);
1805 fprintf(fplog, " file generated at: %s\n", ftime);
1806 fprintf(fplog, " GROMACS build time: %s\n", btime);
1807 fprintf(fplog, " GROMACS build user: %s\n", buser);
1808 fprintf(fplog, " GROMACS build host: %s\n", bhost);
1809 fprintf(fplog, " GROMACS double prec.: %d\n", double_prec);
1810 fprintf(fplog, " simulation part #: %d\n", *simulation_part);
1811 fprintf(fplog, " step: %s\n", gmx_step_str(*step, buf));
1812 fprintf(fplog, " time: %f\n", *t);
1813 fprintf(fplog, "\n");
1816 if (natoms != state->natoms)
1818 gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
1820 if (ngtc != state->ngtc)
1822 gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
1824 if (nnhpres != state->nnhpres)
1826 gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
1829 if (nlambda != state->dfhist.nlambda)
1831 gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
1834 init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
1835 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1837 if (eIntegrator_f != eIntegrator)
1841 fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1843 if (bAppendOutputFiles)
1846 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1847 "Stopping the run to prevent you from ruining all your data...\n"
1848 "If you _really_ know what you are doing, try with the -noappend option.\n");
1852 fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
1861 else if (bPartDecomp)
1863 nppnodes = cr->nnodes;
1866 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1868 if (cr->npmenodes < 0)
1870 cr->npmenodes = npmenodes_f;
1872 nppnodes = cr->nnodes - cr->npmenodes;
1873 if (nppnodes == nppnodes_f)
1875 for (d = 0; d < DIM; d++)
1879 dd_nc[d] = dd_nc_f[d];
1886 /* The number of PP nodes has not been set yet */
1890 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1892 /* Correct the RNG state size for the number of PP nodes.
1893 * Such assignments should all be moved to one central function.
1895 state->nrng = nppnodes*gmx_rng_n();
1896 state->nrngi = nppnodes;
1900 if (fflags != state->flags)
1905 if (bAppendOutputFiles)
1908 "Output file appending requested, but input and checkpoint states are not identical.\n"
1909 "Stopping the run to prevent you from ruining all your data...\n"
1910 "You can try with the -noappend option, and get more info in the log file.\n");
1913 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1915 gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1920 "WARNING: The checkpoint state entries do not match the simulation,\n"
1921 " see the log file for details\n\n");
1927 print_flag_mismatch(fplog, state->flags, fflags);
1932 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1933 nppnodes != nppnodes_f)
1938 fprintf(stderr, sd_note, nppnodes_f, nppnodes);
1942 fprintf(fplog, sd_note, nppnodes_f, nppnodes);
1947 check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
1948 cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
1951 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
1952 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1953 Investigate for 5.0. */
1958 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
1959 flags_eks, &state->ekinstate, NULL);
1964 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1965 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1967 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
1968 flags_enh, &state->enerhist, NULL);
1974 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
1980 if (file_version < 6)
1982 const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1984 fprintf(stderr, "\nWARNING: %s\n\n", warn);
1987 fprintf(fplog, "\nWARNING: %s\n\n", warn);
1989 state->enerhist.nsum = *step;
1990 state->enerhist.nsum_sim = *step;
1993 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
1994 flags_dfh, &state->dfhist, NULL);
2000 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
2006 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2011 if (gmx_fio_close(fp) != 0)
2013 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2022 /* If the user wants to append to output files,
2023 * we use the file pointer positions of the output files stored
2024 * in the checkpoint file and truncate the files such that any frames
2025 * written after the checkpoint time are removed.
2026 * All files are md5sum checked such that we can be sure that
2027 * we do not truncate other (maybe imprortant) files.
2029 if (bAppendOutputFiles)
2031 if (fn2ftp(outputfiles[0].filename) != efLOG)
2033 /* make sure first file is log file so that it is OK to use it for
2036 gmx_fatal(FARGS, "The first output file should always be the log "
2037 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2039 for (i = 0; i < nfiles; i++)
2041 if (outputfiles[i].offset < 0)
2043 gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
2044 "is larger than 2 GB, but mdrun did not support large file"
2045 " offsets. Can not append. Run mdrun with -noappend",
2046 outputfiles[i].filename);
2049 chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
2052 chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
2057 /* Note that there are systems where the lock operation
2058 * will succeed, but a second process can also lock the file.
2059 * We should probably try to detect this.
2061 #ifndef GMX_NATIVE_WINDOWS
2062 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2065 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
2068 if (errno == ENOSYS)
2072 gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2076 fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2079 fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
2083 else if (errno == EACCES || errno == EAGAIN)
2085 gmx_fatal(FARGS, "Failed to lock: %s. Already running "
2086 "simulation?", outputfiles[i].filename);
2090 gmx_fatal(FARGS, "Failed to lock: %s. %s.",
2091 outputfiles[i].filename, strerror(errno));
2096 /* compute md5 chksum */
2097 if (outputfiles[i].chksum_size != -1)
2099 if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
2100 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2102 gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2103 outputfiles[i].chksum_size,
2104 outputfiles[i].filename);
2107 if (i == 0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2109 if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
2111 gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
2116 if (i == 0) /*open log file here - so that lock is never lifted
2117 after chksum is calculated */
2119 *pfplog = gmx_fio_getfp(chksum_file);
2123 gmx_fio_close(chksum_file);
2126 /* compare md5 chksum */
2127 if (outputfiles[i].chksum_size != -1 &&
2128 memcmp(digest, outputfiles[i].chksum, 16) != 0)
2132 fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
2133 for (j = 0; j < 16; j++)
2135 fprintf(debug, "%02x", digest[j]);
2137 fprintf(debug, "\n");
2139 gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2140 outputfiles[i].filename);
2145 if (i != 0) /*log file is already seeked to correct position */
2147 #ifdef GMX_NATIVE_WINDOWS
2148 rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
2150 rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
2154 gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
2164 void load_checkpoint(const char *fn, FILE **fplog,
2165 t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
2166 t_inputrec *ir, t_state *state,
2167 gmx_bool *bReadRNG, gmx_bool *bReadEkin,
2168 gmx_bool bAppend, gmx_bool bForceAppend)
2170 gmx_large_int_t step;
2175 /* Read the state from the checkpoint file */
2176 read_checkpoint(fn, fplog,
2177 cr, bPartDecomp, dd_nc,
2178 ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
2179 &ir->simulation_part, bAppend, bForceAppend);
2183 gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
2184 gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
2185 gmx_bcast(sizeof(step), &step, cr);
2186 gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
2187 gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
2189 ir->bContinuation = TRUE;
2190 if (ir->nsteps >= 0)
2192 ir->nsteps += ir->init_step - step;
2194 ir->init_step = step;
2195 ir->simulation_part += 1;
2198 static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
2199 gmx_large_int_t *step, double *t, t_state *state,
2201 int *nfiles, gmx_file_position_t **outputfiles)
2204 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2209 int flags_eks, flags_enh, flags_dfh;
2211 gmx_file_position_t *files_loc = NULL;
2214 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2215 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2216 &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
2217 &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
2218 &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
2219 &state->edsamstate.nED, NULL);
2221 do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
2226 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
2227 flags_eks, &state->ekinstate, NULL);
2232 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2233 flags_enh, &state->enerhist, NULL);
2238 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
2239 flags_dfh, &state->dfhist, NULL);
2245 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
2251 ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
2252 outputfiles != NULL ? outputfiles : &files_loc,
2253 outputfiles != NULL ? nfiles : &nfiles_loc,
2254 NULL, file_version);
2255 if (files_loc != NULL)
2265 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2279 read_checkpoint_state(const char *fn, int *simulation_part,
2280 gmx_large_int_t *step, double *t, t_state *state)
2284 fp = gmx_fio_open(fn, "r");
2285 read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
2286 if (gmx_fio_close(fp) != 0)
2288 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2292 void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
2295 int simulation_part;
2296 gmx_large_int_t step;
2299 init_state(&state, 0, 0, 0, 0, 0);
2301 read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
2303 fr->natoms = state.natoms;
2306 fr->step = gmx_large_int_to_int(step,
2307 "conversion of checkpoint to trajectory");
2311 fr->lambda = state.lambda[efptFEP];
2312 fr->fep_state = state.fep_state;
2314 fr->bX = (state.flags & (1<<estX));
2320 fr->bV = (state.flags & (1<<estV));
2327 fr->bBox = (state.flags & (1<<estBOX));
2330 copy_mat(state.box, fr->box);
2335 void list_checkpoint(const char *fn, FILE *out)
2339 char *version, *btime, *buser, *bhost, *fprog, *ftime;
2341 int eIntegrator, simulation_part, nppnodes, npme;
2342 gmx_large_int_t step;
2346 int flags_eks, flags_enh, flags_dfh;
2350 gmx_file_position_t *outputfiles;
2353 init_state(&state, -1, -1, -1, -1, 0);
2355 fp = gmx_fio_open(fn, "r");
2356 do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
2357 &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
2358 &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
2359 &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
2360 &(state.dfhist.nlambda), &state.flags,
2361 &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
2362 ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
2367 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), TRUE,
2368 flags_eks, &state.ekinstate, out);
2373 ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
2374 flags_enh, &state.enerhist, out);
2378 init_df_history(&state.dfhist, state.dfhist.nlambda, 0); /* reinitialize state with correct sizes */
2379 ret = do_cpt_df_hist(gmx_fio_getxdr(fp), TRUE,
2380 flags_dfh, &state.dfhist, out);
2385 ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
2390 do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
2395 ret = do_cpt_footer(gmx_fio_getxdr(fp), TRUE, file_version);
2402 if (gmx_fio_close(fp) != 0)
2404 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2411 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
2415 /* Check if the output file name stored in the checkpoint file
2416 * is one of the output file names of mdrun.
2420 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
2425 return (i < nfile && gmx_fexist(fnm_cp));
2428 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2429 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2430 gmx_large_int_t *cpt_step, t_commrec *cr,
2431 gmx_bool bAppendReq,
2432 int nfile, const t_filenm fnm[],
2433 const char *part_suffix, gmx_bool *bAddPart)
2436 gmx_large_int_t step = 0;
2440 gmx_file_position_t *outputfiles;
2443 char *fn, suf_up[STRLEN];
2449 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
2451 *simulation_part = 0;
2455 init_state(&state, 0, 0, 0, 0, 0);
2457 read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
2458 &nfiles, &outputfiles);
2459 if (gmx_fio_close(fp) != 0)
2461 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2468 for (f = 0; f < nfiles; f++)
2470 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
2475 if (nexist == nfiles)
2477 bAppend = bAppendReq;
2479 else if (nexist > 0)
2482 "Output file appending has been requested,\n"
2483 "but some output files listed in the checkpoint file %s\n"
2484 "are not present or are named differently by the current program:\n",
2486 fprintf(stderr, "output files present:");
2487 for (f = 0; f < nfiles; f++)
2489 if (exist_output_file(outputfiles[f].filename,
2492 fprintf(stderr, " %s", outputfiles[f].filename);
2495 fprintf(stderr, "\n");
2496 fprintf(stderr, "output files not present or named differently:");
2497 for (f = 0; f < nfiles; f++)
2499 if (!exist_output_file(outputfiles[f].filename,
2502 fprintf(stderr, " %s", outputfiles[f].filename);
2505 fprintf(stderr, "\n");
2507 gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
2515 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
2517 fn = outputfiles[0].filename;
2518 if (strlen(fn) < 4 ||
2519 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
2521 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
2523 /* Set bAddPart to whether the suffix string '.part' is present
2524 * in the log file name.
2526 strcpy(suf_up, part_suffix);
2528 *bAddPart = (strstr(fn, part_suffix) != NULL ||
2529 strstr(fn, suf_up) != NULL);
2537 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
2539 if (*simulation_part > 0 && bAppendReq)
2541 gmx_bcast(sizeof(bAppend), &bAppend, cr);
2542 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
2545 if (NULL != cpt_step)