1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
26 #include "gromacs/utility/gmx_header_config.h"
31 #ifdef HAVE_SYS_TIME_H
39 #ifdef GMX_NATIVE_WINDOWS
42 #include <sys/locking.h>
57 #include "gmx_random.h"
58 #include "checkpoint.h"
69 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
71 gmx_ctime_r(const time_t *clock,char *buf, int n);
74 #define CPT_MAGIC1 171817
75 #define CPT_MAGIC2 171819
76 #define CPTSTRLEN 1024
79 #define GMX_CPT_BUILD_DP 1
81 #define GMX_CPT_BUILD_DP 0
84 /* cpt_version should normally only be changed
85 * when the header of footer format changes.
86 * The state data format itself is backward and forward compatible.
87 * But old code can not read a new entry that is present in the file
88 * (but can read a new format when new entries are not present).
90 static const int cpt_version = 14;
93 const char *est_names[estNR]=
96 "box", "box-rel", "box-v", "pres_prev",
97 "nosehoover-xi", "thermostat-integral",
98 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
99 "disre_initf", "disre_rm3tav",
100 "orire_initf", "orire_Dtav",
101 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
104 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
106 const char *eeks_names[eeksNR]=
108 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
109 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
112 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
113 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
114 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
115 eenhENERGY_DELTA_H_NN,
116 eenhENERGY_DELTA_H_LIST,
117 eenhENERGY_DELTA_H_STARTTIME,
118 eenhENERGY_DELTA_H_STARTLAMBDA,
121 const char *eenh_names[eenhNR]=
123 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
124 "energy_sum_sim", "energy_nsum_sim",
125 "energy_nsteps", "energy_nsteps_sim",
127 "energy_delta_h_list",
128 "energy_delta_h_start_time",
129 "energy_delta_h_start_lambda"
132 /* free energy history variables -- need to be preserved over checkpoint */
133 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
134 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
135 /* free energy history variable names */
136 const char *edfh_names[edfhNR]=
138 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
139 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
142 #ifdef GMX_NATIVE_WINDOWS
144 gmx_wintruncate(const char *filename, __int64 size)
147 /*we do this elsewhere*/
153 fp=fopen(filename,"rb+");
160 return _chsize_s( fileno(fp), size);
166 enum { ecprREAL, ecprRVEC, ecprMATRIX };
168 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
169 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
170 cptpEST - state variables.
171 cptpEEKS - Kinetic energy state variables.
172 cptpEENH - Energy history state variables.
173 cptpEDFH - free energy history variables.
177 static const char *st_names(int cptp,int ecpt)
181 case cptpEST: return est_names [ecpt]; break;
182 case cptpEEKS: return eeks_names[ecpt]; break;
183 case cptpEENH: return eenh_names[ecpt]; break;
184 case cptpEDFH: return edfh_names[ecpt]; break;
190 static void cp_warning(FILE *fp)
192 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
195 static void cp_error()
197 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
200 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
208 res = xdr_string(xd,s,CPTSTRLEN);
215 fprintf(list,"%s = %s\n",desc,*s);
220 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
231 fprintf(list,"%s = %d\n",desc,*i);
236 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
242 fprintf(list,"%s = ",desc);
244 for (j=0; j<n && res; j++)
246 res &= xdr_u_char(xd,&i[j]);
249 fprintf(list,"%02x",i[j]);
264 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
266 if (do_cpt_int(xd,desc,i,list) < 0)
272 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
275 char buf[STEPSTRSIZE];
277 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
284 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
288 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
292 res = xdr_double(xd,f);
299 fprintf(list,"%s = %f\n",desc,*f);
303 /* If nval >= 0, nval is used; on read this should match the passed value.
304 * If nval n<0, *nptr is used; on read the value is stored in nptr
306 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
307 int nval,int *nptr,real **v,
308 FILE *list,int erealtype)
312 int dtc=xdr_datatype_float;
314 int dtc=xdr_datatype_double;
331 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
336 res = xdr_int(xd,&nf);
347 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
356 res = xdr_int(xd,&dt);
363 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
364 st_names(cptp,ecpt),xdr_datatype_names[dtc],
365 xdr_datatype_names[dt]);
367 if (list || !(sflags & (1<<ecpt)))
380 if (dt == xdr_datatype_float)
382 if (dtc == xdr_datatype_float)
390 res = xdr_vector(xd,(char *)vf,nf,
391 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
396 if (dtc != xdr_datatype_float)
407 if (dtc == xdr_datatype_double)
415 res = xdr_vector(xd,(char *)vd,nf,
416 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
421 if (dtc != xdr_datatype_double)
436 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
439 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
442 gmx_incons("Unknown checkpoint real type");
454 /* This function stores n along with the reals for reading,
455 * but on reading it assumes that n matches the value in the checkpoint file,
456 * a fatal error is generated when this is not the case.
458 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
459 int n,real **v,FILE *list)
461 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
464 /* This function does the same as do_cpte_reals,
465 * except that on reading it ignores the passed value of *n
466 * and stored the value read from the checkpoint file in *n.
468 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
469 int *n,real **v,FILE *list)
471 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
474 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
479 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
482 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
483 int n,int **v,FILE *list)
486 int dtc=xdr_datatype_int;
491 res = xdr_int(xd,&nf);
496 if (list == NULL && v != NULL && nf != n)
498 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
501 res = xdr_int(xd,&dt);
508 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
509 st_names(cptp,ecpt),xdr_datatype_names[dtc],
510 xdr_datatype_names[dt]);
512 if (list || !(sflags & (1<<ecpt)) || v == NULL)
525 res = xdr_vector(xd,(char *)vp,nf,
526 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
533 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
543 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
546 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
549 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
550 int n,double **v,FILE *list)
553 int dtc=xdr_datatype_double;
558 res = xdr_int(xd,&nf);
563 if (list == NULL && nf != n)
565 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
568 res = xdr_int(xd,&dt);
575 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
576 st_names(cptp,ecpt),xdr_datatype_names[dtc],
577 xdr_datatype_names[dt]);
579 if (list || !(sflags & (1<<ecpt)))
592 res = xdr_vector(xd,(char *)vp,nf,
593 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
600 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
610 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
611 double *r,FILE *list)
613 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
617 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
618 int n,rvec **v,FILE *list)
622 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
623 n*DIM,NULL,(real **)v,list,ecprRVEC);
626 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
632 vr = (real *)&(v[0][0]);
633 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
634 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
636 if (list && ret == 0)
638 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
645 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
646 int n, real **v,FILE *list)
651 char name[CPTSTRLEN];
662 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
663 if (list && reti == 0)
665 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
666 pr_reals(list,0,name,v[i],n);
676 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
677 int n,matrix **v,FILE *list)
686 res = xdr_int(xd,&nf);
691 if (list == NULL && nf != n)
693 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
695 if (list || !(sflags & (1<<ecpt)))
715 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
719 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
720 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
727 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
733 if (list && ret == 0)
737 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
748 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
749 char **version,char **btime,char **buser,char **bhost,
751 char **fprog,char **ftime,
752 int *eIntegrator,int *simulation_part,
753 gmx_large_int_t *step,double *t,
754 int *nnodes,int *dd_nc,int *npme,
755 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
756 int *nlambda, int *flags_state,
757 int *flags_eks,int *flags_enh, int *flags_dfh,
774 res = xdr_int(xd,&magic);
777 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
779 if (magic != CPT_MAGIC1)
781 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
782 "The checkpoint file is corrupted or not a checkpoint file",
789 if (gethostname(fhost,255) != 0)
791 sprintf(fhost,"unknown");
794 sprintf(fhost,"unknown");
797 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
798 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
799 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
800 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
801 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
802 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
803 *file_version = cpt_version;
804 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
805 if (*file_version > cpt_version)
807 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
809 if (*file_version >= 13)
811 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
817 if (*file_version >= 12)
819 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
825 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
826 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
827 if (*file_version >= 10)
829 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
835 if (*file_version >= 11)
837 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
843 if (*file_version >= 14)
845 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
851 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
852 if (*file_version >= 3)
854 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
858 *simulation_part = 1;
860 if (*file_version >= 5)
862 do_cpt_step_err(xd,"step" ,step ,list);
866 do_cpt_int_err(xd,"step" ,&idum ,list);
869 do_cpt_double_err(xd,"t" ,t ,list);
870 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
872 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
873 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
874 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
875 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
876 do_cpt_int_err(xd,"state flags",flags_state,list);
877 if (*file_version >= 4)
879 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
880 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
885 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
886 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
887 (1<<(estORIRE_DTAV+2)) |
888 (1<<(estORIRE_DTAV+3))));
890 if (*file_version >= 14)
892 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
898 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
903 if (file_version >= 2)
906 res = xdr_int(xd,&magic);
911 if (magic != CPT_MAGIC2)
920 static int do_cpt_state(XDR *xd,gmx_bool bRead,
921 int fflags,t_state *state,
922 gmx_bool bReadRNG,FILE *list)
925 int **rng_p,**rngi_p;
932 nnht = state->nhchainlength*state->ngtc;
933 nnhtp = state->nhchainlength*state->nnhpres;
937 rng_p = (int **)&state->ld_rng;
938 rngi_p = &state->ld_rngi;
942 /* Do not read the RNG data */
946 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
948 sflags = state->flags;
949 for(i=0; (i<estNR && ret == 0); i++)
955 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
956 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
957 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
958 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
959 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
960 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
961 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
962 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
963 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
964 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
965 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
966 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
967 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
968 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
969 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
970 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
971 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
972 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
973 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
974 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
975 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
976 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
977 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
978 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
979 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
980 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
982 gmx_fatal(FARGS,"Unknown state entry %d\n"
983 "You are probably reading a new checkpoint file with old code",i);
991 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
992 int fflags,ekinstate_t *ekins,
1000 for(i=0; (i<eeksNR && ret == 0); i++)
1002 if (fflags & (1<<i))
1007 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1008 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1009 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1010 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1011 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1012 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1013 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1014 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1015 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1016 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1018 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1019 "You are probably reading a new checkpoint file with old code",i);
1028 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1029 int fflags,energyhistory_t *enerhist,
1040 enerhist->nsteps = 0;
1042 enerhist->nsteps_sim = 0;
1043 enerhist->nsum_sim = 0;
1044 enerhist->dht = NULL;
1046 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1048 snew(enerhist->dht,1);
1049 enerhist->dht->ndh = NULL;
1050 enerhist->dht->dh = NULL;
1051 enerhist->dht->start_lambda_set=FALSE;
1055 for(i=0; (i<eenhNR && ret == 0); i++)
1057 if (fflags & (1<<i))
1061 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1062 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1063 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1064 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1065 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1066 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1067 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1068 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1069 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1070 if (bRead) /* now allocate memory for it */
1072 snew(enerhist->dht->dh, enerhist->dht->nndh);
1073 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1074 for(j=0;j<enerhist->dht->nndh;j++)
1076 enerhist->dht->ndh[j] = 0;
1077 enerhist->dht->dh[j] = NULL;
1081 case eenhENERGY_DELTA_H_LIST:
1082 for(j=0;j<enerhist->dht->nndh;j++)
1084 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1087 case eenhENERGY_DELTA_H_STARTTIME:
1088 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1089 case eenhENERGY_DELTA_H_STARTLAMBDA:
1090 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1092 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1093 "You are probably reading a new checkpoint file with old code",i);
1098 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1100 /* Assume we have an old file format and copy sum to sum_sim */
1101 srenew(enerhist->ener_sum_sim,enerhist->nener);
1102 for(i=0; i<enerhist->nener; i++)
1104 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1106 fflags |= (1<<eenhENERGY_SUM_SIM);
1109 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1110 !(fflags & (1<<eenhENERGY_NSTEPS)))
1112 /* Assume we have an old file format and copy nsum to nsteps */
1113 enerhist->nsteps = enerhist->nsum;
1114 fflags |= (1<<eenhENERGY_NSTEPS);
1116 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1117 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1119 /* Assume we have an old file format and copy nsum to nsteps */
1120 enerhist->nsteps_sim = enerhist->nsum_sim;
1121 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1127 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1132 nlambda = dfhist->nlambda;
1135 for(i=0; (i<edfhNR && ret == 0); i++)
1137 if (fflags & (1<<i))
1141 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1142 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1143 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1144 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1145 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1146 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1147 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1148 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1149 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1150 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1151 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1152 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1153 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1154 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1157 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1158 "You are probably reading a new checkpoint file with old code",i);
1166 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1167 gmx_file_position_t **p_outputfiles, int *nfiles,
1168 FILE *list, int file_version)
1172 gmx_off_t mask = 0xFFFFFFFFL;
1173 int offset_high,offset_low;
1175 gmx_file_position_t *outputfiles;
1177 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1184 snew(*p_outputfiles,*nfiles);
1187 outputfiles = *p_outputfiles;
1189 for(i=0;i<*nfiles;i++)
1191 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1194 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1195 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1201 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1205 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1209 #if (SIZEOF_GMX_OFF_T > 4)
1210 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1212 outputfiles[i].offset = offset_low;
1217 buf = outputfiles[i].filename;
1218 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1220 offset = outputfiles[i].offset;
1228 #if (SIZEOF_GMX_OFF_T > 4)
1229 offset_low = (int) (offset & mask);
1230 offset_high = (int) ((offset >> 32) & mask);
1232 offset_low = offset;
1236 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1240 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1245 if (file_version >= 8)
1247 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1252 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1259 outputfiles[i].chksum_size = -1;
1266 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1267 FILE *fplog,t_commrec *cr,
1268 int eIntegrator,int simulation_part,
1269 gmx_bool bExpanded, int elamstats,
1270 gmx_large_int_t step,double t,t_state *state)
1280 char *fntemp; /* the temporary checkpoint file name */
1282 char timebuf[STRLEN];
1283 int nppnodes,npmenodes,flag_64bit;
1284 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1285 gmx_file_position_t *outputfiles;
1288 int flags_eks,flags_enh,flags_dfh,i;
1293 if (DOMAINDECOMP(cr))
1295 nppnodes = cr->dd->nnodes;
1296 npmenodes = cr->npmenodes;
1300 nppnodes = cr->nnodes;
1310 /* make the new temporary filename */
1311 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1313 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1314 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1315 strcat(fntemp,suffix);
1316 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1319 gmx_ctime_r(&now,timebuf,STRLEN);
1323 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1324 gmx_step_str(step,buf),timebuf);
1327 /* Get offsets for open files */
1328 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1330 fp = gmx_fio_open(fntemp,"w");
1332 if (state->ekinstate.bUpToDate)
1335 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1336 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1337 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1345 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1347 flags_enh |= (1<<eenhENERGY_N);
1348 if (state->enerhist.nsum > 0)
1350 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1351 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1353 if (state->enerhist.nsum_sim > 0)
1355 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1356 (1<<eenhENERGY_NSUM_SIM));
1358 if (state->enerhist.dht)
1360 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1361 (1<< eenhENERGY_DELTA_H_LIST) |
1362 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1363 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1369 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1370 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1373 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1375 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1377 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1378 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1384 /* We can check many more things now (CPU, acceleration, etc), but
1385 * it is highly unlikely to have two separate builds with exactly
1386 * the same version, user, time, and build host!
1389 version = gmx_strdup(VERSION);
1390 btime = gmx_strdup(BUILD_TIME);
1391 buser = gmx_strdup(BUILD_USER);
1392 bhost = gmx_strdup(BUILD_HOST);
1394 double_prec = GMX_CPT_BUILD_DP;
1395 fprog = gmx_strdup(Program());
1397 ftime = &(timebuf[0]);
1399 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1400 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1401 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1402 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1403 &state->natoms,&state->ngtc,&state->nnhpres,
1404 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1413 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1414 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1415 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1416 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1417 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1420 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1423 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1425 /* we really, REALLY, want to make sure to physically write the checkpoint,
1426 and all the files it depends on, out to disk. Because we've
1427 opened the checkpoint with gmx_fio_open(), it's in our list
1429 ret=gmx_fio_all_output_fsync();
1435 "Cannot fsync '%s'; maybe you are out of disk space?",
1436 gmx_fio_getname(ret));
1438 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1448 if( gmx_fio_close(fp) != 0)
1450 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1453 /* we don't move the checkpoint if the user specified they didn't want it,
1454 or if the fsyncs failed */
1455 if (!bNumberAndKeep && !ret)
1459 /* Rename the previous checkpoint file */
1461 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1462 strcat(buf,"_prev");
1463 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1465 /* we copy here so that if something goes wrong between now and
1466 * the rename below, there's always a state.cpt.
1467 * If renames are atomic (such as in POSIX systems),
1468 * this copying should be unneccesary.
1470 gmx_file_copy(fn, buf, FALSE);
1471 /* We don't really care if this fails:
1472 * there's already a new checkpoint.
1475 gmx_file_rename(fn, buf);
1478 if (gmx_file_rename(fntemp, fn) != 0)
1480 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1488 /*code for alternate checkpointing scheme. moved from top of loop over
1490 fcRequestCheckPoint();
1491 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1492 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1494 #endif /* end GMX_FAHCORE block */
1497 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1501 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1502 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1503 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1504 for(i=0; i<estNR; i++)
1506 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1508 fprintf(fplog," %24s %11s %11s\n",
1510 (sflags & (1<<i)) ? " present " : "not present",
1511 (fflags & (1<<i)) ? " present " : "not present");
1516 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1518 FILE *fp = fplog ? fplog : stderr;
1522 fprintf(fp," %s mismatch,\n",type);
1523 fprintf(fp," current program: %d\n",p);
1524 fprintf(fp," checkpoint file: %d\n",f);
1530 static void check_string(FILE *fplog,const char *type,const char *p,
1531 const char *f,gmx_bool *mm)
1533 FILE *fp = fplog ? fplog : stderr;
1535 if (strcmp(p,f) != 0)
1537 fprintf(fp," %s mismatch,\n",type);
1538 fprintf(fp," current program: %s\n",p);
1539 fprintf(fp," checkpoint file: %s\n",f);
1545 static void check_match(FILE *fplog,
1547 char *btime,char *buser,char *bhost,int double_prec,
1549 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1550 ivec dd_nc,ivec dd_nc_f)
1557 check_string(fplog,"Version" ,VERSION ,version,&mm);
1558 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1559 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1560 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1561 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1562 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1564 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1573 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1576 if (cr->npmenodes >= 0)
1578 npp -= cr->npmenodes;
1582 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1583 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1584 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1591 "Gromacs binary or parallel settings not identical to previous run.\n"
1592 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1593 fplog ? ",\n see the log file for details" : "");
1598 "Gromacs binary or parallel settings not identical to previous run.\n"
1599 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1604 static void read_checkpoint(const char *fn,FILE **pfplog,
1605 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1606 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1607 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1608 int *simulation_part,
1609 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1614 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1616 char filename[STRLEN],buf[STEPSTRSIZE];
1617 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1619 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1622 gmx_file_position_t *outputfiles;
1624 t_fileio *chksum_file;
1625 FILE* fplog = *pfplog;
1626 unsigned char digest[16];
1627 #ifndef GMX_NATIVE_WINDOWS
1628 struct flock fl; /* don't initialize here: the struct order is OS
1632 const char *int_warn=
1633 "WARNING: The checkpoint file was generated with integrator %s,\n"
1634 " while the simulation uses integrator %s\n\n";
1635 const char *sd_note=
1636 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1637 " while the simulation uses %d SD or BD nodes,\n"
1638 " continuation will be exact, except for the random state\n\n";
1640 #ifndef GMX_NATIVE_WINDOWS
1642 fl.l_whence=SEEK_SET;
1651 "read_checkpoint not (yet) supported with particle decomposition");
1654 fp = gmx_fio_open(fn,"r");
1655 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1656 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1657 &eIntegrator_f,simulation_part,step,t,
1658 &nppnodes_f,dd_nc_f,&npmenodes_f,
1659 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1660 &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
1662 if (bAppendOutputFiles &&
1663 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1665 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1668 if (cr == NULL || MASTER(cr))
1670 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1674 /* This will not be written if we do appending, since fplog is still NULL then */
1677 fprintf(fplog,"\n");
1678 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1679 fprintf(fplog," file generated by: %s\n",fprog);
1680 fprintf(fplog," file generated at: %s\n",ftime);
1681 fprintf(fplog," GROMACS build time: %s\n",btime);
1682 fprintf(fplog," GROMACS build user: %s\n",buser);
1683 fprintf(fplog," GROMACS build host: %s\n",bhost);
1684 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1685 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1686 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1687 fprintf(fplog," time: %f\n",*t);
1688 fprintf(fplog,"\n");
1691 if (natoms != state->natoms)
1693 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1695 if (ngtc != state->ngtc)
1697 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1699 if (nnhpres != state->nnhpres)
1701 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1704 if (nlambda != state->dfhist.nlambda)
1706 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1709 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1710 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1712 if (eIntegrator_f != eIntegrator)
1716 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1718 if(bAppendOutputFiles)
1721 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1722 "Stopping the run to prevent you from ruining all your data...\n"
1723 "If you _really_ know what you are doing, try with the -noappend option.\n");
1727 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1736 else if (bPartDecomp)
1738 nppnodes = cr->nnodes;
1741 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1743 if (cr->npmenodes < 0)
1745 cr->npmenodes = npmenodes_f;
1747 nppnodes = cr->nnodes - cr->npmenodes;
1748 if (nppnodes == nppnodes_f)
1750 for(d=0; d<DIM; d++)
1754 dd_nc[d] = dd_nc_f[d];
1761 /* The number of PP nodes has not been set yet */
1765 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1767 /* Correct the RNG state size for the number of PP nodes.
1768 * Such assignments should all be moved to one central function.
1770 state->nrng = nppnodes*gmx_rng_n();
1771 state->nrngi = nppnodes;
1775 if (fflags != state->flags)
1780 if(bAppendOutputFiles)
1783 "Output file appending requested, but input and checkpoint states are not identical.\n"
1784 "Stopping the run to prevent you from ruining all your data...\n"
1785 "You can try with the -noappend option, and get more info in the log file.\n");
1788 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1790 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1795 "WARNING: The checkpoint state entries do not match the simulation,\n"
1796 " see the log file for details\n\n");
1802 print_flag_mismatch(fplog,state->flags,fflags);
1807 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1808 nppnodes != nppnodes_f)
1813 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1817 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1822 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1823 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1826 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1827 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1828 Investigate for 5.0. */
1833 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1834 flags_eks,&state->ekinstate,NULL);
1839 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1840 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1842 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1843 flags_enh,&state->enerhist,NULL);
1849 if (file_version < 6)
1851 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1853 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1856 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1858 state->enerhist.nsum = *step;
1859 state->enerhist.nsum_sim = *step;
1862 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1863 flags_dfh,&state->dfhist,NULL);
1869 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1875 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1880 if( gmx_fio_close(fp) != 0)
1882 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1891 /* If the user wants to append to output files,
1892 * we use the file pointer positions of the output files stored
1893 * in the checkpoint file and truncate the files such that any frames
1894 * written after the checkpoint time are removed.
1895 * All files are md5sum checked such that we can be sure that
1896 * we do not truncate other (maybe imprortant) files.
1898 if (bAppendOutputFiles)
1900 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1902 /* make sure first file is log file so that it is OK to use it for
1905 gmx_fatal(FARGS,"The first output file should always be the log "
1906 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1908 for(i=0;i<nfiles;i++)
1910 if (outputfiles[i].offset < 0)
1912 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1913 "is larger than 2 GB, but mdrun did not support large file"
1914 " offsets. Can not append. Run mdrun with -noappend",
1915 outputfiles[i].filename);
1918 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1921 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1926 /* Note that there are systems where the lock operation
1927 * will succeed, but a second process can also lock the file.
1928 * We should probably try to detect this.
1930 #ifndef GMX_NATIVE_WINDOWS
1931 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1934 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1937 if (errno == ENOSYS)
1941 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1945 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1948 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1952 else if (errno == EACCES || errno == EAGAIN)
1954 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1955 "simulation?", outputfiles[i].filename);
1959 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1960 outputfiles[i].filename, strerror(errno));
1965 /* compute md5 chksum */
1966 if (outputfiles[i].chksum_size != -1)
1968 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1969 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1971 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1972 outputfiles[i].chksum_size,
1973 outputfiles[i].filename);
1976 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1978 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1980 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
1985 if (i==0) /*open log file here - so that lock is never lifted
1986 after chksum is calculated */
1988 *pfplog = gmx_fio_getfp(chksum_file);
1992 gmx_fio_close(chksum_file);
1995 /* compare md5 chksum */
1996 if (outputfiles[i].chksum_size != -1 &&
1997 memcmp(digest,outputfiles[i].chksum,16)!=0)
2001 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2002 for (j=0; j<16; j++)
2004 fprintf(debug,"%02x",digest[j]);
2006 fprintf(debug,"\n");
2008 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2009 outputfiles[i].filename);
2014 if (i!=0) /*log file is already seeked to correct position */
2016 #ifdef GMX_NATIVE_WINDOWS
2017 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2019 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2023 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2033 void load_checkpoint(const char *fn,FILE **fplog,
2034 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2035 t_inputrec *ir,t_state *state,
2036 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2037 gmx_bool bAppend,gmx_bool bForceAppend)
2039 gmx_large_int_t step;
2042 if (SIMMASTER(cr)) {
2043 /* Read the state from the checkpoint file */
2044 read_checkpoint(fn,fplog,
2045 cr,bPartDecomp,dd_nc,
2046 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2047 &ir->simulation_part,bAppend,bForceAppend);
2050 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2051 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2052 gmx_bcast(sizeof(step),&step,cr);
2053 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2054 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2056 ir->bContinuation = TRUE;
2057 if (ir->nsteps >= 0)
2059 ir->nsteps += ir->init_step - step;
2061 ir->init_step = step;
2062 ir->simulation_part += 1;
2065 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2066 gmx_large_int_t *step,double *t,t_state *state,
2068 int *nfiles,gmx_file_position_t **outputfiles)
2071 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2076 int flags_eks,flags_enh,flags_dfh;
2078 gmx_file_position_t *files_loc=NULL;
2081 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2082 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2083 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2084 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2085 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
2087 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2092 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2093 flags_eks,&state->ekinstate,NULL);
2098 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2099 flags_enh,&state->enerhist,NULL);
2104 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2105 flags_dfh,&state->dfhist,NULL);
2111 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2112 outputfiles != NULL ? outputfiles : &files_loc,
2113 outputfiles != NULL ? nfiles : &nfiles_loc,
2115 if (files_loc != NULL)
2125 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2139 read_checkpoint_state(const char *fn,int *simulation_part,
2140 gmx_large_int_t *step,double *t,t_state *state)
2144 fp = gmx_fio_open(fn,"r");
2145 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2146 if( gmx_fio_close(fp) != 0)
2148 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2152 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2155 int simulation_part;
2156 gmx_large_int_t step;
2159 init_state(&state,0,0,0,0,0);
2161 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2163 fr->natoms = state.natoms;
2166 fr->step = gmx_large_int_to_int(step,
2167 "conversion of checkpoint to trajectory");
2171 fr->lambda = state.lambda[efptFEP];
2172 fr->fep_state = state.fep_state;
2174 fr->bX = (state.flags & (1<<estX));
2180 fr->bV = (state.flags & (1<<estV));
2187 fr->bBox = (state.flags & (1<<estBOX));
2190 copy_mat(state.box,fr->box);
2195 void list_checkpoint(const char *fn,FILE *out)
2199 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2201 int eIntegrator,simulation_part,nppnodes,npme;
2202 gmx_large_int_t step;
2206 int flags_eks,flags_enh,flags_dfh;
2210 gmx_file_position_t *outputfiles;
2213 init_state(&state,-1,-1,-1,-1,0);
2215 fp = gmx_fio_open(fn,"r");
2216 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2217 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2218 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2219 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2220 &(state.dfhist.nlambda),&state.flags,
2221 &flags_eks,&flags_enh,&flags_dfh,out);
2222 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2227 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2228 flags_eks,&state.ekinstate,out);
2233 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2234 flags_enh,&state.enerhist,out);
2238 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2239 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2240 flags_dfh,&state.dfhist,out);
2244 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2249 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2256 if( gmx_fio_close(fp) != 0)
2258 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2265 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2269 /* Check if the output file name stored in the checkpoint file
2270 * is one of the output file names of mdrun.
2274 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2279 return (i < nfile && gmx_fexist(fnm_cp));
2282 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2283 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2284 gmx_large_int_t *cpt_step,t_commrec *cr,
2285 gmx_bool bAppendReq,
2286 int nfile,const t_filenm fnm[],
2287 const char *part_suffix,gmx_bool *bAddPart)
2290 gmx_large_int_t step=0;
2294 gmx_file_position_t *outputfiles;
2297 char *fn,suf_up[STRLEN];
2301 if (SIMMASTER(cr)) {
2302 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2304 *simulation_part = 0;
2308 init_state(&state,0,0,0,0,0);
2310 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2311 &nfiles,&outputfiles);
2312 if( gmx_fio_close(fp) != 0)
2314 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2321 for(f=0; f<nfiles; f++)
2323 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2328 if (nexist == nfiles)
2330 bAppend = bAppendReq;
2332 else if (nexist > 0)
2335 "Output file appending has been requested,\n"
2336 "but some output files listed in the checkpoint file %s\n"
2337 "are not present or are named differently by the current program:\n",
2339 fprintf(stderr,"output files present:");
2340 for(f=0; f<nfiles; f++)
2342 if (exist_output_file(outputfiles[f].filename,
2345 fprintf(stderr," %s",outputfiles[f].filename);
2348 fprintf(stderr,"\n");
2349 fprintf(stderr,"output files not present or named differently:");
2350 for(f=0; f<nfiles; f++)
2352 if (!exist_output_file(outputfiles[f].filename,
2355 fprintf(stderr," %s",outputfiles[f].filename);
2358 fprintf(stderr,"\n");
2360 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2368 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2370 fn = outputfiles[0].filename;
2371 if (strlen(fn) < 4 ||
2372 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2374 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2376 /* Set bAddPart to whether the suffix string '.part' is present
2377 * in the log file name.
2379 strcpy(suf_up,part_suffix);
2381 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2382 strstr(fn,suf_up) != NULL);
2390 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2392 if (*simulation_part > 0 && bAppendReq)
2394 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2395 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2398 if (NULL != cpt_step)