1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
61 #include "buildinfo.h"
68 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
70 gmx_ctime_r(const time_t *clock,char *buf, int n);
73 #define CPT_MAGIC1 171817
74 #define CPT_MAGIC2 171819
75 #define CPTSTRLEN 1024
78 #define GMX_CPT_BUILD_DP 1
80 #define GMX_CPT_BUILD_DP 0
83 /* cpt_version should normally only be changed
84 * when the header of footer format changes.
85 * The state data format itself is backward and forward compatible.
86 * But old code can not read a new entry that is present in the file
87 * (but can read a new format when new entries are not present).
89 static const int cpt_version = 14;
92 const char *est_names[estNR]=
95 "box", "box-rel", "box-v", "pres_prev",
96 "nosehoover-xi", "thermostat-integral",
97 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
98 "disre_initf", "disre_rm3tav",
99 "orire_initf", "orire_Dtav",
100 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
103 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
105 const char *eeks_names[eeksNR]=
107 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
108 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
111 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
112 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
113 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
114 eenhENERGY_DELTA_H_NN,
115 eenhENERGY_DELTA_H_LIST,
116 eenhENERGY_DELTA_H_STARTTIME,
117 eenhENERGY_DELTA_H_STARTLAMBDA,
120 const char *eenh_names[eenhNR]=
122 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
123 "energy_sum_sim", "energy_nsum_sim",
124 "energy_nsteps", "energy_nsteps_sim",
126 "energy_delta_h_list",
127 "energy_delta_h_start_time",
128 "energy_delta_h_start_lambda"
131 /* free energy history variables -- need to be preserved over checkpoint */
132 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
133 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
134 /* free energy history variable names */
135 const char *edfh_names[edfhNR]=
137 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
138 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
141 #ifdef GMX_NATIVE_WINDOWS
143 gmx_wintruncate(const char *filename, __int64 size)
146 /*we do this elsewhere*/
152 fp=fopen(filename,"rb+");
159 return _chsize_s( fileno(fp), size);
165 enum { ecprREAL, ecprRVEC, ecprMATRIX };
167 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
168 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
169 cptpEST - state variables.
170 cptpEEKS - Kinetic energy state variables.
171 cptpEENH - Energy history state variables.
172 cptpEDFH - free energy history variables.
176 static const char *st_names(int cptp,int ecpt)
180 case cptpEST: return est_names [ecpt]; break;
181 case cptpEEKS: return eeks_names[ecpt]; break;
182 case cptpEENH: return eenh_names[ecpt]; break;
183 case cptpEDFH: return edfh_names[ecpt]; break;
189 static void cp_warning(FILE *fp)
191 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
194 static void cp_error()
196 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
199 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
207 res = xdr_string(xd,s,CPTSTRLEN);
214 fprintf(list,"%s = %s\n",desc,*s);
219 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
230 fprintf(list,"%s = %d\n",desc,*i);
235 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
241 fprintf(list,"%s = ",desc);
243 for (j=0; j<n && res; j++)
245 res &= xdr_u_char(xd,&i[j]);
248 fprintf(list,"%02x",i[j]);
263 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
265 if (do_cpt_int(xd,desc,i,list) < 0)
271 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
274 char buf[STEPSTRSIZE];
276 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
283 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
287 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
291 res = xdr_double(xd,f);
298 fprintf(list,"%s = %f\n",desc,*f);
302 /* If nval >= 0, nval is used; on read this should match the passed value.
303 * If nval n<0, *nptr is used; on read the value is stored in nptr
305 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
306 int nval,int *nptr,real **v,
307 FILE *list,int erealtype)
311 int dtc=xdr_datatype_float;
313 int dtc=xdr_datatype_double;
330 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
335 res = xdr_int(xd,&nf);
346 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
355 res = xdr_int(xd,&dt);
362 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
363 st_names(cptp,ecpt),xdr_datatype_names[dtc],
364 xdr_datatype_names[dt]);
366 if (list || !(sflags & (1<<ecpt)))
379 if (dt == xdr_datatype_float)
381 if (dtc == xdr_datatype_float)
389 res = xdr_vector(xd,(char *)vf,nf,
390 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
395 if (dtc != xdr_datatype_float)
406 if (dtc == xdr_datatype_double)
414 res = xdr_vector(xd,(char *)vd,nf,
415 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
420 if (dtc != xdr_datatype_double)
435 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
438 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
441 gmx_incons("Unknown checkpoint real type");
453 /* This function stores n along with the reals for reading,
454 * but on reading it assumes that n matches the value in the checkpoint file,
455 * a fatal error is generated when this is not the case.
457 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
458 int n,real **v,FILE *list)
460 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
463 /* This function does the same as do_cpte_reals,
464 * except that on reading it ignores the passed value of *n
465 * and stored the value read from the checkpoint file in *n.
467 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
468 int *n,real **v,FILE *list)
470 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
473 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
478 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
481 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
482 int n,int **v,FILE *list)
485 int dtc=xdr_datatype_int;
490 res = xdr_int(xd,&nf);
495 if (list == NULL && v != NULL && nf != n)
497 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
500 res = xdr_int(xd,&dt);
507 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
508 st_names(cptp,ecpt),xdr_datatype_names[dtc],
509 xdr_datatype_names[dt]);
511 if (list || !(sflags & (1<<ecpt)) || v == NULL)
524 res = xdr_vector(xd,(char *)vp,nf,
525 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
532 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
542 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
545 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
548 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
549 int n,double **v,FILE *list)
552 int dtc=xdr_datatype_double;
557 res = xdr_int(xd,&nf);
562 if (list == NULL && nf != n)
564 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
567 res = xdr_int(xd,&dt);
574 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
575 st_names(cptp,ecpt),xdr_datatype_names[dtc],
576 xdr_datatype_names[dt]);
578 if (list || !(sflags & (1<<ecpt)))
591 res = xdr_vector(xd,(char *)vp,nf,
592 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
599 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
609 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
610 double *r,FILE *list)
612 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
616 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
617 int n,rvec **v,FILE *list)
621 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
622 n*DIM,NULL,(real **)v,list,ecprRVEC);
625 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
631 vr = (real *)&(v[0][0]);
632 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
633 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
635 if (list && ret == 0)
637 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
644 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
645 int n, real **v,FILE *list)
650 char name[CPTSTRLEN];
661 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
662 if (list && reti == 0)
664 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
665 pr_reals(list,0,name,v[i],n);
675 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
676 int n,matrix **v,FILE *list)
685 res = xdr_int(xd,&nf);
690 if (list == NULL && nf != n)
692 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
694 if (list || !(sflags & (1<<ecpt)))
714 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
718 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
719 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
726 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
732 if (list && ret == 0)
736 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
747 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
748 char **version,char **btime,char **buser,char **bhost,
750 char **fprog,char **ftime,
751 int *eIntegrator,int *simulation_part,
752 gmx_large_int_t *step,double *t,
753 int *nnodes,int *dd_nc,int *npme,
754 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
755 int *nlambda, int *flags_state,
756 int *flags_eks,int *flags_enh, int *flags_dfh,
773 res = xdr_int(xd,&magic);
776 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
778 if (magic != CPT_MAGIC1)
780 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
781 "The checkpoint file is corrupted or not a checkpoint file",
788 if (gethostname(fhost,255) != 0)
790 sprintf(fhost,"unknown");
793 sprintf(fhost,"unknown");
796 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
797 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
798 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
799 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
800 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
801 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
802 *file_version = cpt_version;
803 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
804 if (*file_version > cpt_version)
806 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
808 if (*file_version >= 13)
810 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
816 if (*file_version >= 12)
818 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
824 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
825 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
826 if (*file_version >= 10)
828 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
834 if (*file_version >= 11)
836 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
842 if (*file_version >= 14)
844 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
850 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
851 if (*file_version >= 3)
853 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
857 *simulation_part = 1;
859 if (*file_version >= 5)
861 do_cpt_step_err(xd,"step" ,step ,list);
865 do_cpt_int_err(xd,"step" ,&idum ,list);
868 do_cpt_double_err(xd,"t" ,t ,list);
869 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
871 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
872 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
873 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
874 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
875 do_cpt_int_err(xd,"state flags",flags_state,list);
876 if (*file_version >= 4)
878 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
879 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
884 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
885 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
886 (1<<(estORIRE_DTAV+2)) |
887 (1<<(estORIRE_DTAV+3))));
889 if (*file_version >= 14)
891 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
897 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
902 if (file_version >= 2)
905 res = xdr_int(xd,&magic);
910 if (magic != CPT_MAGIC2)
919 static int do_cpt_state(XDR *xd,gmx_bool bRead,
920 int fflags,t_state *state,
921 gmx_bool bReadRNG,FILE *list)
924 int **rng_p,**rngi_p;
931 nnht = state->nhchainlength*state->ngtc;
932 nnhtp = state->nhchainlength*state->nnhpres;
936 rng_p = (int **)&state->ld_rng;
937 rngi_p = &state->ld_rngi;
941 /* Do not read the RNG data */
945 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
947 sflags = state->flags;
948 for(i=0; (i<estNR && ret == 0); i++)
954 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
955 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
956 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
957 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
958 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
959 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
960 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
961 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
962 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
963 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
964 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
965 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
966 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
967 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
968 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
969 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
970 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
971 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
972 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
973 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
974 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
975 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
976 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
977 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
978 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
979 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
981 gmx_fatal(FARGS,"Unknown state entry %d\n"
982 "You are probably reading a new checkpoint file with old code",i);
990 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
991 int fflags,ekinstate_t *ekins,
999 for(i=0; (i<eeksNR && ret == 0); i++)
1001 if (fflags & (1<<i))
1006 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1007 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1008 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1009 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1010 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1011 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1012 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1013 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1014 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1015 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1017 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1018 "You are probably reading a new checkpoint file with old code",i);
1027 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1028 int fflags,energyhistory_t *enerhist,
1039 enerhist->nsteps = 0;
1041 enerhist->nsteps_sim = 0;
1042 enerhist->nsum_sim = 0;
1043 enerhist->dht = NULL;
1045 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1047 snew(enerhist->dht,1);
1048 enerhist->dht->ndh = NULL;
1049 enerhist->dht->dh = NULL;
1050 enerhist->dht->start_lambda_set=FALSE;
1054 for(i=0; (i<eenhNR && ret == 0); i++)
1056 if (fflags & (1<<i))
1060 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1061 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1062 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1063 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1064 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1065 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1066 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1067 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1068 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1069 if (bRead) /* now allocate memory for it */
1071 snew(enerhist->dht->dh, enerhist->dht->nndh);
1072 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1073 for(j=0;j<enerhist->dht->nndh;j++)
1075 enerhist->dht->ndh[j] = 0;
1076 enerhist->dht->dh[j] = NULL;
1080 case eenhENERGY_DELTA_H_LIST:
1081 for(j=0;j<enerhist->dht->nndh;j++)
1083 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1086 case eenhENERGY_DELTA_H_STARTTIME:
1087 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1088 case eenhENERGY_DELTA_H_STARTLAMBDA:
1089 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1091 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1092 "You are probably reading a new checkpoint file with old code",i);
1097 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1099 /* Assume we have an old file format and copy sum to sum_sim */
1100 srenew(enerhist->ener_sum_sim,enerhist->nener);
1101 for(i=0; i<enerhist->nener; i++)
1103 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1105 fflags |= (1<<eenhENERGY_SUM_SIM);
1108 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1109 !(fflags & (1<<eenhENERGY_NSTEPS)))
1111 /* Assume we have an old file format and copy nsum to nsteps */
1112 enerhist->nsteps = enerhist->nsum;
1113 fflags |= (1<<eenhENERGY_NSTEPS);
1115 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1116 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1118 /* Assume we have an old file format and copy nsum to nsteps */
1119 enerhist->nsteps_sim = enerhist->nsum_sim;
1120 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1126 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1131 nlambda = dfhist->nlambda;
1134 for(i=0; (i<edfhNR && ret == 0); i++)
1136 if (fflags & (1<<i))
1140 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1141 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1142 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1143 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1144 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1145 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1146 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1147 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1148 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1149 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1150 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1151 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1152 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1153 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1156 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1157 "You are probably reading a new checkpoint file with old code",i);
1165 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1166 gmx_file_position_t **p_outputfiles, int *nfiles,
1167 FILE *list, int file_version)
1171 gmx_off_t mask = 0xFFFFFFFFL;
1172 int offset_high,offset_low;
1174 gmx_file_position_t *outputfiles;
1176 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1183 snew(*p_outputfiles,*nfiles);
1186 outputfiles = *p_outputfiles;
1188 for(i=0;i<*nfiles;i++)
1190 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1193 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1194 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1200 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1204 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1208 #if (SIZEOF_GMX_OFF_T > 4)
1209 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1211 outputfiles[i].offset = offset_low;
1216 buf = outputfiles[i].filename;
1217 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1219 offset = outputfiles[i].offset;
1227 #if (SIZEOF_GMX_OFF_T > 4)
1228 offset_low = (int) (offset & mask);
1229 offset_high = (int) ((offset >> 32) & mask);
1231 offset_low = offset;
1235 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1239 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1244 if (file_version >= 8)
1246 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1251 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1258 outputfiles[i].chksum_size = -1;
1265 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1266 FILE *fplog,t_commrec *cr,
1267 int eIntegrator,int simulation_part,
1268 gmx_bool bExpanded, int elamstats,
1269 gmx_large_int_t step,double t,t_state *state)
1279 char *fntemp; /* the temporary checkpoint file name */
1281 char timebuf[STRLEN];
1282 int nppnodes,npmenodes,flag_64bit;
1283 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1284 gmx_file_position_t *outputfiles;
1287 int flags_eks,flags_enh,flags_dfh,i;
1292 if (DOMAINDECOMP(cr))
1294 nppnodes = cr->dd->nnodes;
1295 npmenodes = cr->npmenodes;
1299 nppnodes = cr->nnodes;
1309 /* make the new temporary filename */
1310 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1312 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1313 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1314 strcat(fntemp,suffix);
1315 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1318 gmx_ctime_r(&now,timebuf,STRLEN);
1322 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1323 gmx_step_str(step,buf),timebuf);
1326 /* Get offsets for open files */
1327 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1329 fp = gmx_fio_open(fntemp,"w");
1331 if (state->ekinstate.bUpToDate)
1334 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1335 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1336 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1344 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1346 flags_enh |= (1<<eenhENERGY_N);
1347 if (state->enerhist.nsum > 0)
1349 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1350 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1352 if (state->enerhist.nsum_sim > 0)
1354 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1355 (1<<eenhENERGY_NSUM_SIM));
1357 if (state->enerhist.dht)
1359 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1360 (1<< eenhENERGY_DELTA_H_LIST) |
1361 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1362 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1368 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1369 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1372 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1374 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1376 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1377 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1383 /* We can check many more things now (CPU, acceleration, etc), but
1384 * it is highly unlikely to have two separate builds with exactly
1385 * the same version, user, time, and build host!
1388 version = gmx_strdup(VERSION);
1389 btime = gmx_strdup(BUILD_TIME);
1390 buser = gmx_strdup(BUILD_USER);
1391 bhost = gmx_strdup(BUILD_HOST);
1393 double_prec = GMX_CPT_BUILD_DP;
1394 fprog = gmx_strdup(Program());
1396 ftime = &(timebuf[0]);
1398 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1399 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1400 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1401 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1402 &state->natoms,&state->ngtc,&state->nnhpres,
1403 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1412 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1413 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1414 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1415 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1416 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1419 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1422 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1424 /* we really, REALLY, want to make sure to physically write the checkpoint,
1425 and all the files it depends on, out to disk. Because we've
1426 opened the checkpoint with gmx_fio_open(), it's in our list
1428 ret=gmx_fio_all_output_fsync();
1434 "Cannot fsync '%s'; maybe you are out of disk space?",
1435 gmx_fio_getname(ret));
1437 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1447 if( gmx_fio_close(fp) != 0)
1449 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1452 /* we don't move the checkpoint if the user specified they didn't want it,
1453 or if the fsyncs failed */
1454 if (!bNumberAndKeep && !ret)
1458 /* Rename the previous checkpoint file */
1460 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1461 strcat(buf,"_prev");
1462 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1464 /* we copy here so that if something goes wrong between now and
1465 * the rename below, there's always a state.cpt.
1466 * If renames are atomic (such as in POSIX systems),
1467 * this copying should be unneccesary.
1469 gmx_file_copy(fn, buf, FALSE);
1470 /* We don't really care if this fails:
1471 * there's already a new checkpoint.
1474 gmx_file_rename(fn, buf);
1477 if (gmx_file_rename(fntemp, fn) != 0)
1479 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1487 /*code for alternate checkpointing scheme. moved from top of loop over
1489 fcRequestCheckPoint();
1490 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1491 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1493 #endif /* end GMX_FAHCORE block */
1496 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1500 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1501 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1502 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1503 for(i=0; i<estNR; i++)
1505 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1507 fprintf(fplog," %24s %11s %11s\n",
1509 (sflags & (1<<i)) ? " present " : "not present",
1510 (fflags & (1<<i)) ? " present " : "not present");
1515 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1517 FILE *fp = fplog ? fplog : stderr;
1521 fprintf(fp," %s mismatch,\n",type);
1522 fprintf(fp," current program: %d\n",p);
1523 fprintf(fp," checkpoint file: %d\n",f);
1529 static void check_string(FILE *fplog,const char *type,const char *p,
1530 const char *f,gmx_bool *mm)
1532 FILE *fp = fplog ? fplog : stderr;
1534 if (strcmp(p,f) != 0)
1536 fprintf(fp," %s mismatch,\n",type);
1537 fprintf(fp," current program: %s\n",p);
1538 fprintf(fp," checkpoint file: %s\n",f);
1544 static void check_match(FILE *fplog,
1546 char *btime,char *buser,char *bhost,int double_prec,
1548 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1549 ivec dd_nc,ivec dd_nc_f)
1556 check_string(fplog,"Version" ,VERSION ,version,&mm);
1557 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1558 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1559 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1560 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1561 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1563 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1572 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1575 if (cr->npmenodes >= 0)
1577 npp -= cr->npmenodes;
1581 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1582 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1583 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1590 "Gromacs binary or parallel settings not identical to previous run.\n"
1591 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1592 fplog ? ",\n see the log file for details" : "");
1597 "Gromacs binary or parallel settings not identical to previous run.\n"
1598 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1603 static void read_checkpoint(const char *fn,FILE **pfplog,
1604 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1605 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1606 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1607 int *simulation_part,
1608 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1613 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1615 char filename[STRLEN],buf[STEPSTRSIZE];
1616 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1618 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1621 gmx_file_position_t *outputfiles;
1623 t_fileio *chksum_file;
1624 FILE* fplog = *pfplog;
1625 unsigned char digest[16];
1626 #ifndef GMX_NATIVE_WINDOWS
1627 struct flock fl; /* don't initialize here: the struct order is OS
1631 const char *int_warn=
1632 "WARNING: The checkpoint file was generated with integrator %s,\n"
1633 " while the simulation uses integrator %s\n\n";
1634 const char *sd_note=
1635 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1636 " while the simulation uses %d SD or BD nodes,\n"
1637 " continuation will be exact, except for the random state\n\n";
1639 #ifndef GMX_NATIVE_WINDOWS
1641 fl.l_whence=SEEK_SET;
1650 "read_checkpoint not (yet) supported with particle decomposition");
1653 fp = gmx_fio_open(fn,"r");
1654 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1655 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1656 &eIntegrator_f,simulation_part,step,t,
1657 &nppnodes_f,dd_nc_f,&npmenodes_f,
1658 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1659 &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
1661 if (bAppendOutputFiles &&
1662 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1664 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1667 if (cr == NULL || MASTER(cr))
1669 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1673 /* This will not be written if we do appending, since fplog is still NULL then */
1676 fprintf(fplog,"\n");
1677 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1678 fprintf(fplog," file generated by: %s\n",fprog);
1679 fprintf(fplog," file generated at: %s\n",ftime);
1680 fprintf(fplog," GROMACS build time: %s\n",btime);
1681 fprintf(fplog," GROMACS build user: %s\n",buser);
1682 fprintf(fplog," GROMACS build host: %s\n",bhost);
1683 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1684 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1685 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1686 fprintf(fplog," time: %f\n",*t);
1687 fprintf(fplog,"\n");
1690 if (natoms != state->natoms)
1692 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1694 if (ngtc != state->ngtc)
1696 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1698 if (nnhpres != state->nnhpres)
1700 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1703 if (nlambda != state->dfhist.nlambda)
1705 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1708 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1709 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1711 if (eIntegrator_f != eIntegrator)
1715 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1717 if(bAppendOutputFiles)
1720 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1721 "Stopping the run to prevent you from ruining all your data...\n"
1722 "If you _really_ know what you are doing, try with the -noappend option.\n");
1726 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1735 else if (bPartDecomp)
1737 nppnodes = cr->nnodes;
1740 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1742 if (cr->npmenodes < 0)
1744 cr->npmenodes = npmenodes_f;
1746 nppnodes = cr->nnodes - cr->npmenodes;
1747 if (nppnodes == nppnodes_f)
1749 for(d=0; d<DIM; d++)
1753 dd_nc[d] = dd_nc_f[d];
1760 /* The number of PP nodes has not been set yet */
1764 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1766 /* Correct the RNG state size for the number of PP nodes.
1767 * Such assignments should all be moved to one central function.
1769 state->nrng = nppnodes*gmx_rng_n();
1770 state->nrngi = nppnodes;
1774 if (fflags != state->flags)
1779 if(bAppendOutputFiles)
1782 "Output file appending requested, but input and checkpoint states are not identical.\n"
1783 "Stopping the run to prevent you from ruining all your data...\n"
1784 "You can try with the -noappend option, and get more info in the log file.\n");
1787 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1789 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1794 "WARNING: The checkpoint state entries do not match the simulation,\n"
1795 " see the log file for details\n\n");
1801 print_flag_mismatch(fplog,state->flags,fflags);
1806 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1807 nppnodes != nppnodes_f)
1812 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1816 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1821 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1822 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1825 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1826 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1827 Investigate for 5.0. */
1832 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1833 flags_eks,&state->ekinstate,NULL);
1838 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1839 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1841 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1842 flags_enh,&state->enerhist,NULL);
1848 if (file_version < 6)
1850 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1852 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1855 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1857 state->enerhist.nsum = *step;
1858 state->enerhist.nsum_sim = *step;
1861 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1862 flags_dfh,&state->dfhist,NULL);
1868 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1874 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1879 if( gmx_fio_close(fp) != 0)
1881 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1890 /* If the user wants to append to output files,
1891 * we use the file pointer positions of the output files stored
1892 * in the checkpoint file and truncate the files such that any frames
1893 * written after the checkpoint time are removed.
1894 * All files are md5sum checked such that we can be sure that
1895 * we do not truncate other (maybe imprortant) files.
1897 if (bAppendOutputFiles)
1899 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1901 /* make sure first file is log file so that it is OK to use it for
1904 gmx_fatal(FARGS,"The first output file should always be the log "
1905 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1907 for(i=0;i<nfiles;i++)
1909 if (outputfiles[i].offset < 0)
1911 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1912 "is larger than 2 GB, but mdrun did not support large file"
1913 " offsets. Can not append. Run mdrun with -noappend",
1914 outputfiles[i].filename);
1917 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1920 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1925 /* Note that there are systems where the lock operation
1926 * will succeed, but a second process can also lock the file.
1927 * We should probably try to detect this.
1929 #ifndef GMX_NATIVE_WINDOWS
1930 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1933 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1936 if (errno == ENOSYS)
1940 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1944 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1947 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1951 else if (errno == EACCES || errno == EAGAIN)
1953 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1954 "simulation?", outputfiles[i].filename);
1958 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1959 outputfiles[i].filename, strerror(errno));
1964 /* compute md5 chksum */
1965 if (outputfiles[i].chksum_size != -1)
1967 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1968 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1970 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1971 outputfiles[i].chksum_size,
1972 outputfiles[i].filename);
1975 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1977 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1979 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
1984 if (i==0) /*open log file here - so that lock is never lifted
1985 after chksum is calculated */
1987 *pfplog = gmx_fio_getfp(chksum_file);
1991 gmx_fio_close(chksum_file);
1994 /* compare md5 chksum */
1995 if (outputfiles[i].chksum_size != -1 &&
1996 memcmp(digest,outputfiles[i].chksum,16)!=0)
2000 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2001 for (j=0; j<16; j++)
2003 fprintf(debug,"%02x",digest[j]);
2005 fprintf(debug,"\n");
2007 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2008 outputfiles[i].filename);
2013 if (i!=0) /*log file is already seeked to correct position */
2015 #ifdef GMX_NATIVE_WINDOWS
2016 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2018 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2022 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2032 void load_checkpoint(const char *fn,FILE **fplog,
2033 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2034 t_inputrec *ir,t_state *state,
2035 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2036 gmx_bool bAppend,gmx_bool bForceAppend)
2038 gmx_large_int_t step;
2041 if (SIMMASTER(cr)) {
2042 /* Read the state from the checkpoint file */
2043 read_checkpoint(fn,fplog,
2044 cr,bPartDecomp,dd_nc,
2045 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2046 &ir->simulation_part,bAppend,bForceAppend);
2049 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2050 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2051 gmx_bcast(sizeof(step),&step,cr);
2052 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2053 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2055 ir->bContinuation = TRUE;
2056 if (ir->nsteps >= 0)
2058 ir->nsteps += ir->init_step - step;
2060 ir->init_step = step;
2061 ir->simulation_part += 1;
2064 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2065 gmx_large_int_t *step,double *t,t_state *state,
2067 int *nfiles,gmx_file_position_t **outputfiles)
2070 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2075 int flags_eks,flags_enh,flags_dfh;
2077 gmx_file_position_t *files_loc=NULL;
2080 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2081 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2082 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2083 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2084 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
2086 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2091 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2092 flags_eks,&state->ekinstate,NULL);
2097 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2098 flags_enh,&state->enerhist,NULL);
2103 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2104 flags_dfh,&state->dfhist,NULL);
2110 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2111 outputfiles != NULL ? outputfiles : &files_loc,
2112 outputfiles != NULL ? nfiles : &nfiles_loc,
2114 if (files_loc != NULL)
2124 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2138 read_checkpoint_state(const char *fn,int *simulation_part,
2139 gmx_large_int_t *step,double *t,t_state *state)
2143 fp = gmx_fio_open(fn,"r");
2144 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2145 if( gmx_fio_close(fp) != 0)
2147 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2151 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2154 int simulation_part;
2155 gmx_large_int_t step;
2158 init_state(&state,0,0,0,0,0);
2160 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2162 fr->natoms = state.natoms;
2165 fr->step = gmx_large_int_to_int(step,
2166 "conversion of checkpoint to trajectory");
2170 fr->lambda = state.lambda[efptFEP];
2171 fr->fep_state = state.fep_state;
2173 fr->bX = (state.flags & (1<<estX));
2179 fr->bV = (state.flags & (1<<estV));
2186 fr->bBox = (state.flags & (1<<estBOX));
2189 copy_mat(state.box,fr->box);
2194 void list_checkpoint(const char *fn,FILE *out)
2198 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2200 int eIntegrator,simulation_part,nppnodes,npme;
2201 gmx_large_int_t step;
2205 int flags_eks,flags_enh,flags_dfh;
2209 gmx_file_position_t *outputfiles;
2212 init_state(&state,-1,-1,-1,-1,0);
2214 fp = gmx_fio_open(fn,"r");
2215 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2216 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2217 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2218 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2219 &(state.dfhist.nlambda),&state.flags,
2220 &flags_eks,&flags_enh,&flags_dfh,out);
2221 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2226 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2227 flags_eks,&state.ekinstate,out);
2232 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2233 flags_enh,&state.enerhist,out);
2237 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2238 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2239 flags_dfh,&state.dfhist,out);
2243 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2248 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2255 if( gmx_fio_close(fp) != 0)
2257 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2264 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2268 /* Check if the output file name stored in the checkpoint file
2269 * is one of the output file names of mdrun.
2273 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2278 return (i < nfile && gmx_fexist(fnm_cp));
2281 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2282 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2283 gmx_large_int_t *cpt_step,t_commrec *cr,
2284 gmx_bool bAppendReq,
2285 int nfile,const t_filenm fnm[],
2286 const char *part_suffix,gmx_bool *bAddPart)
2289 gmx_large_int_t step=0;
2293 gmx_file_position_t *outputfiles;
2296 char *fn,suf_up[STRLEN];
2300 if (SIMMASTER(cr)) {
2301 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2303 *simulation_part = 0;
2307 init_state(&state,0,0,0,0,0);
2309 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2310 &nfiles,&outputfiles);
2311 if( gmx_fio_close(fp) != 0)
2313 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2320 for(f=0; f<nfiles; f++)
2322 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2327 if (nexist == nfiles)
2329 bAppend = bAppendReq;
2331 else if (nexist > 0)
2334 "Output file appending has been requested,\n"
2335 "but some output files listed in the checkpoint file %s\n"
2336 "are not present or are named differently by the current program:\n",
2338 fprintf(stderr,"output files present:");
2339 for(f=0; f<nfiles; f++)
2341 if (exist_output_file(outputfiles[f].filename,
2344 fprintf(stderr," %s",outputfiles[f].filename);
2347 fprintf(stderr,"\n");
2348 fprintf(stderr,"output files not present or named differently:");
2349 for(f=0; f<nfiles; f++)
2351 if (!exist_output_file(outputfiles[f].filename,
2354 fprintf(stderr," %s",outputfiles[f].filename);
2357 fprintf(stderr,"\n");
2359 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2367 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2369 fn = outputfiles[0].filename;
2370 if (strlen(fn) < 4 ||
2371 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2373 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2375 /* Set bAddPart to whether the suffix string '.part' is present
2376 * in the log file name.
2378 strcpy(suf_up,part_suffix);
2380 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2381 strstr(fn,suf_up) != NULL);
2389 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2391 if (*simulation_part > 0 && bAppendReq)
2393 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2394 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2397 if (NULL != cpt_step)