1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
67 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
69 gmx_ctime_r(const time_t *clock,char *buf, int n);
72 #define CPT_MAGIC1 171817
73 #define CPT_MAGIC2 171819
74 #define CPTSTRLEN 1024
77 #define GMX_CPT_BUILD_DP 1
79 #define GMX_CPT_BUILD_DP 0
82 /* cpt_version should normally only be changed
83 * when the header of footer format changes.
84 * The state data format itself is backward and forward compatible.
85 * But old code can not read a new entry that is present in the file
86 * (but can read a new format when new entries are not present).
88 static const int cpt_version = 14;
91 const char *est_names[estNR]=
94 "box", "box-rel", "box-v", "pres_prev",
95 "nosehoover-xi", "thermostat-integral",
96 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
97 "disre_initf", "disre_rm3tav",
98 "orire_initf", "orire_Dtav",
99 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
102 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
104 const char *eeks_names[eeksNR]=
106 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
107 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
110 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
111 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
112 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
113 eenhENERGY_DELTA_H_NN,
114 eenhENERGY_DELTA_H_LIST,
115 eenhENERGY_DELTA_H_STARTTIME,
116 eenhENERGY_DELTA_H_STARTLAMBDA,
119 const char *eenh_names[eenhNR]=
121 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
122 "energy_sum_sim", "energy_nsum_sim",
123 "energy_nsteps", "energy_nsteps_sim",
125 "energy_delta_h_list",
126 "energy_delta_h_start_time",
127 "energy_delta_h_start_lambda"
130 /* free energy history variables -- need to be preserved over checkpoint */
131 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
132 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
133 /* free energy history variable names */
134 const char *edfh_names[edfhNR]=
136 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
137 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
140 #ifdef GMX_NATIVE_WINDOWS
142 gmx_wintruncate(const char *filename, __int64 size)
145 /*we do this elsewhere*/
151 fp=fopen(filename,"rb+");
158 return _chsize_s( fileno(fp), size);
164 enum { ecprREAL, ecprRVEC, ecprMATRIX };
166 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
167 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
168 cptpEST - state variables.
169 cptpEEKS - Kinetic energy state variables.
170 cptpEENH - Energy history state variables.
171 cptpEDFH - free energy history variables.
175 static const char *st_names(int cptp,int ecpt)
179 case cptpEST: return est_names [ecpt]; break;
180 case cptpEEKS: return eeks_names[ecpt]; break;
181 case cptpEENH: return eenh_names[ecpt]; break;
182 case cptpEDFH: return edfh_names[ecpt]; break;
188 static void cp_warning(FILE *fp)
190 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
193 static void cp_error()
195 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
198 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
206 res = xdr_string(xd,s,CPTSTRLEN);
213 fprintf(list,"%s = %s\n",desc,*s);
218 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
229 fprintf(list,"%s = %d\n",desc,*i);
234 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
240 fprintf(list,"%s = ",desc);
242 for (j=0; j<n && res; j++)
244 res &= xdr_u_char(xd,&i[j]);
247 fprintf(list,"%02x",i[j]);
262 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
264 if (do_cpt_int(xd,desc,i,list) < 0)
270 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
273 char buf[STEPSTRSIZE];
275 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
282 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
286 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
290 res = xdr_double(xd,f);
297 fprintf(list,"%s = %f\n",desc,*f);
301 /* If nval >= 0, nval is used; on read this should match the passed value.
302 * If nval n<0, *nptr is used; on read the value is stored in nptr
304 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
305 int nval,int *nptr,real **v,
306 FILE *list,int erealtype)
310 int dtc=xdr_datatype_float;
312 int dtc=xdr_datatype_double;
329 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
334 res = xdr_int(xd,&nf);
345 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
354 res = xdr_int(xd,&dt);
361 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
362 st_names(cptp,ecpt),xdr_datatype_names[dtc],
363 xdr_datatype_names[dt]);
365 if (list || !(sflags & (1<<ecpt)))
378 if (dt == xdr_datatype_float)
380 if (dtc == xdr_datatype_float)
388 res = xdr_vector(xd,(char *)vf,nf,
389 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
394 if (dtc != xdr_datatype_float)
405 if (dtc == xdr_datatype_double)
413 res = xdr_vector(xd,(char *)vd,nf,
414 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
419 if (dtc != xdr_datatype_double)
434 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
437 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
440 gmx_incons("Unknown checkpoint real type");
452 /* This function stores n along with the reals for reading,
453 * but on reading it assumes that n matches the value in the checkpoint file,
454 * a fatal error is generated when this is not the case.
456 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
457 int n,real **v,FILE *list)
459 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
462 /* This function does the same as do_cpte_reals,
463 * except that on reading it ignores the passed value of *n
464 * and stored the value read from the checkpoint file in *n.
466 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
467 int *n,real **v,FILE *list)
469 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
472 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
477 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
480 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
481 int n,int **v,FILE *list)
484 int dtc=xdr_datatype_int;
489 res = xdr_int(xd,&nf);
494 if (list == NULL && v != NULL && nf != n)
496 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
499 res = xdr_int(xd,&dt);
506 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
507 st_names(cptp,ecpt),xdr_datatype_names[dtc],
508 xdr_datatype_names[dt]);
510 if (list || !(sflags & (1<<ecpt)) || v == NULL)
523 res = xdr_vector(xd,(char *)vp,nf,
524 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
531 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
541 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
544 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
547 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
548 int n,double **v,FILE *list)
551 int dtc=xdr_datatype_double;
556 res = xdr_int(xd,&nf);
561 if (list == NULL && nf != n)
563 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
566 res = xdr_int(xd,&dt);
573 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
574 st_names(cptp,ecpt),xdr_datatype_names[dtc],
575 xdr_datatype_names[dt]);
577 if (list || !(sflags & (1<<ecpt)))
590 res = xdr_vector(xd,(char *)vp,nf,
591 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
598 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
608 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
609 double *r,FILE *list)
611 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
615 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
616 int n,rvec **v,FILE *list)
620 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
621 n*DIM,NULL,(real **)v,list,ecprRVEC);
624 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
630 vr = (real *)&(v[0][0]);
631 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
632 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
634 if (list && ret == 0)
636 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
643 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
644 int n, real **v,FILE *list)
649 char name[CPTSTRLEN];
660 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
661 if (list && reti == 0)
663 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
664 pr_reals(list,0,name,v[i],n);
674 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
675 int n,matrix **v,FILE *list)
684 res = xdr_int(xd,&nf);
689 if (list == NULL && nf != n)
691 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
693 if (list || !(sflags & (1<<ecpt)))
713 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
717 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
718 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
725 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
731 if (list && ret == 0)
735 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
746 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
747 char **version,char **btime,char **buser,char **bhost,
749 char **fprog,char **ftime,
750 int *eIntegrator,int *simulation_part,
751 gmx_large_int_t *step,double *t,
752 int *nnodes,int *dd_nc,int *npme,
753 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
754 int *nlambda, int *flags_state,
755 int *flags_eks,int *flags_enh, int *flags_dfh,
772 res = xdr_int(xd,&magic);
775 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
777 if (magic != CPT_MAGIC1)
779 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
780 "The checkpoint file is corrupted or not a checkpoint file",
787 if (gethostname(fhost,255) != 0)
789 sprintf(fhost,"unknown");
792 sprintf(fhost,"unknown");
795 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
796 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
797 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
798 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
799 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
800 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
801 *file_version = cpt_version;
802 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
803 if (*file_version > cpt_version)
805 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
807 if (*file_version >= 13)
809 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
815 if (*file_version >= 12)
817 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
823 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
824 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
825 if (*file_version >= 10)
827 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
833 if (*file_version >= 11)
835 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
841 if (*file_version >= 14)
843 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
849 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
850 if (*file_version >= 3)
852 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
856 *simulation_part = 1;
858 if (*file_version >= 5)
860 do_cpt_step_err(xd,"step" ,step ,list);
864 do_cpt_int_err(xd,"step" ,&idum ,list);
867 do_cpt_double_err(xd,"t" ,t ,list);
868 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
870 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
871 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
872 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
873 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
874 do_cpt_int_err(xd,"state flags",flags_state,list);
875 if (*file_version >= 4)
877 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
878 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
883 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
884 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
885 (1<<(estORIRE_DTAV+2)) |
886 (1<<(estORIRE_DTAV+3))));
888 if (*file_version >= 14)
890 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
896 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
901 if (file_version >= 2)
904 res = xdr_int(xd,&magic);
909 if (magic != CPT_MAGIC2)
918 static int do_cpt_state(XDR *xd,gmx_bool bRead,
919 int fflags,t_state *state,
920 gmx_bool bReadRNG,FILE *list)
923 int **rng_p,**rngi_p;
930 nnht = state->nhchainlength*state->ngtc;
931 nnhtp = state->nhchainlength*state->nnhpres;
935 rng_p = (int **)&state->ld_rng;
936 rngi_p = &state->ld_rngi;
940 /* Do not read the RNG data */
944 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
946 sflags = state->flags;
947 for(i=0; (i<estNR && ret == 0); i++)
953 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
954 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
955 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
956 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
957 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
958 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
959 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
960 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
961 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
962 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
963 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
964 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
965 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
966 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
967 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
968 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
969 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
970 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
971 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
972 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
973 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
974 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
975 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
976 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
977 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
978 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
980 gmx_fatal(FARGS,"Unknown state entry %d\n"
981 "You are probably reading a new checkpoint file with old code",i);
989 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
990 int fflags,ekinstate_t *ekins,
998 for(i=0; (i<eeksNR && ret == 0); i++)
1000 if (fflags & (1<<i))
1005 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1006 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1007 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1008 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1009 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1010 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1011 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1012 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1013 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1014 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1016 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1017 "You are probably reading a new checkpoint file with old code",i);
1026 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1027 int fflags,energyhistory_t *enerhist,
1038 enerhist->nsteps = 0;
1040 enerhist->nsteps_sim = 0;
1041 enerhist->nsum_sim = 0;
1042 enerhist->dht = NULL;
1044 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1046 snew(enerhist->dht,1);
1047 enerhist->dht->ndh = NULL;
1048 enerhist->dht->dh = NULL;
1049 enerhist->dht->start_lambda_set=FALSE;
1053 for(i=0; (i<eenhNR && ret == 0); i++)
1055 if (fflags & (1<<i))
1059 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1060 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1061 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1062 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1063 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1064 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1065 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1066 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1067 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1068 if (bRead) /* now allocate memory for it */
1070 snew(enerhist->dht->dh, enerhist->dht->nndh);
1071 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1072 for(j=0;j<enerhist->dht->nndh;j++)
1074 enerhist->dht->ndh[j] = 0;
1075 enerhist->dht->dh[j] = NULL;
1079 case eenhENERGY_DELTA_H_LIST:
1080 for(j=0;j<enerhist->dht->nndh;j++)
1082 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1085 case eenhENERGY_DELTA_H_STARTTIME:
1086 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1087 case eenhENERGY_DELTA_H_STARTLAMBDA:
1088 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1090 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1091 "You are probably reading a new checkpoint file with old code",i);
1096 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1098 /* Assume we have an old file format and copy sum to sum_sim */
1099 srenew(enerhist->ener_sum_sim,enerhist->nener);
1100 for(i=0; i<enerhist->nener; i++)
1102 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1104 fflags |= (1<<eenhENERGY_SUM_SIM);
1107 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1108 !(fflags & (1<<eenhENERGY_NSTEPS)))
1110 /* Assume we have an old file format and copy nsum to nsteps */
1111 enerhist->nsteps = enerhist->nsum;
1112 fflags |= (1<<eenhENERGY_NSTEPS);
1114 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1115 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1117 /* Assume we have an old file format and copy nsum to nsteps */
1118 enerhist->nsteps_sim = enerhist->nsum_sim;
1119 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1125 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1130 nlambda = dfhist->nlambda;
1133 for(i=0; (i<edfhNR && ret == 0); i++)
1135 if (fflags & (1<<i))
1139 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1140 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1141 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1142 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1143 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1144 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1145 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1146 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1147 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1148 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1149 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1150 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1151 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1152 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1155 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1156 "You are probably reading a new checkpoint file with old code",i);
1164 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1165 gmx_file_position_t **p_outputfiles, int *nfiles,
1166 FILE *list, int file_version)
1170 gmx_off_t mask = 0xFFFFFFFFL;
1171 int offset_high,offset_low;
1173 gmx_file_position_t *outputfiles;
1175 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1182 snew(*p_outputfiles,*nfiles);
1185 outputfiles = *p_outputfiles;
1187 for(i=0;i<*nfiles;i++)
1189 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1192 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1193 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1199 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1203 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1207 #if (SIZEOF_GMX_OFF_T > 4)
1208 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1210 outputfiles[i].offset = offset_low;
1215 buf = outputfiles[i].filename;
1216 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1218 offset = outputfiles[i].offset;
1226 #if (SIZEOF_GMX_OFF_T > 4)
1227 offset_low = (int) (offset & mask);
1228 offset_high = (int) ((offset >> 32) & mask);
1230 offset_low = offset;
1234 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1238 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1243 if (file_version >= 8)
1245 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1250 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1257 outputfiles[i].chksum_size = -1;
1264 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1265 FILE *fplog,t_commrec *cr,
1266 int eIntegrator,int simulation_part,
1267 gmx_bool bExpanded, int elamstats,
1268 gmx_large_int_t step,double t,t_state *state)
1278 char *fntemp; /* the temporary checkpoint file name */
1280 char timebuf[STRLEN];
1281 int nppnodes,npmenodes,flag_64bit;
1282 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1283 gmx_file_position_t *outputfiles;
1286 int flags_eks,flags_enh,flags_dfh,i;
1291 if (DOMAINDECOMP(cr))
1293 nppnodes = cr->dd->nnodes;
1294 npmenodes = cr->npmenodes;
1298 nppnodes = cr->nnodes;
1308 /* make the new temporary filename */
1309 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1311 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1312 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1313 strcat(fntemp,suffix);
1314 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1317 gmx_ctime_r(&now,timebuf,STRLEN);
1321 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1322 gmx_step_str(step,buf),timebuf);
1325 /* Get offsets for open files */
1326 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1328 fp = gmx_fio_open(fntemp,"w");
1330 if (state->ekinstate.bUpToDate)
1333 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1334 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1335 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1343 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1345 flags_enh |= (1<<eenhENERGY_N);
1346 if (state->enerhist.nsum > 0)
1348 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1349 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1351 if (state->enerhist.nsum_sim > 0)
1353 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1354 (1<<eenhENERGY_NSUM_SIM));
1356 if (state->enerhist.dht)
1358 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1359 (1<< eenhENERGY_DELTA_H_LIST) |
1360 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1361 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1367 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1368 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1371 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1373 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1375 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1376 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1382 /* We can check many more things now (CPU, acceleration, etc), but
1383 * it is highly unlikely to have two separate builds with exactly
1384 * the same version, user, time, and build host!
1387 version = gmx_strdup(VERSION);
1388 btime = gmx_strdup(BUILD_TIME);
1389 buser = gmx_strdup(BUILD_USER);
1390 bhost = gmx_strdup(BUILD_HOST);
1392 double_prec = GMX_CPT_BUILD_DP;
1393 fprog = gmx_strdup(Program());
1395 ftime = &(timebuf[0]);
1397 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1398 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1399 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1400 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1401 &state->natoms,&state->ngtc,&state->nnhpres,
1402 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1411 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1412 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1413 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1414 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1415 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1418 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1421 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1423 /* we really, REALLY, want to make sure to physically write the checkpoint,
1424 and all the files it depends on, out to disk. Because we've
1425 opened the checkpoint with gmx_fio_open(), it's in our list
1427 ret=gmx_fio_all_output_fsync();
1433 "Cannot fsync '%s'; maybe you are out of disk space?",
1434 gmx_fio_getname(ret));
1436 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1446 if( gmx_fio_close(fp) != 0)
1448 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1451 /* we don't move the checkpoint if the user specified they didn't want it,
1452 or if the fsyncs failed */
1453 if (!bNumberAndKeep && !ret)
1457 /* Rename the previous checkpoint file */
1459 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1460 strcat(buf,"_prev");
1461 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1463 /* we copy here so that if something goes wrong between now and
1464 * the rename below, there's always a state.cpt.
1465 * If renames are atomic (such as in POSIX systems),
1466 * this copying should be unneccesary.
1468 gmx_file_copy(fn, buf, FALSE);
1469 /* We don't really care if this fails:
1470 * there's already a new checkpoint.
1473 gmx_file_rename(fn, buf);
1476 if (gmx_file_rename(fntemp, fn) != 0)
1478 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1486 /*code for alternate checkpointing scheme. moved from top of loop over
1488 fcRequestCheckPoint();
1489 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1490 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1492 #endif /* end GMX_FAHCORE block */
1495 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1499 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1500 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1501 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1502 for(i=0; i<estNR; i++)
1504 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1506 fprintf(fplog," %24s %11s %11s\n",
1508 (sflags & (1<<i)) ? " present " : "not present",
1509 (fflags & (1<<i)) ? " present " : "not present");
1514 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1516 FILE *fp = fplog ? fplog : stderr;
1520 fprintf(fp," %s mismatch,\n",type);
1521 fprintf(fp," current program: %d\n",p);
1522 fprintf(fp," checkpoint file: %d\n",f);
1528 static void check_string(FILE *fplog,const char *type,const char *p,
1529 const char *f,gmx_bool *mm)
1531 FILE *fp = fplog ? fplog : stderr;
1533 if (strcmp(p,f) != 0)
1535 fprintf(fp," %s mismatch,\n",type);
1536 fprintf(fp," current program: %s\n",p);
1537 fprintf(fp," checkpoint file: %s\n",f);
1543 static void check_match(FILE *fplog,
1545 char *btime,char *buser,char *bhost,int double_prec,
1547 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1548 ivec dd_nc,ivec dd_nc_f)
1555 check_string(fplog,"Version" ,VERSION ,version,&mm);
1556 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1557 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1558 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1559 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1560 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1562 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1571 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1574 if (cr->npmenodes >= 0)
1576 npp -= cr->npmenodes;
1580 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1581 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1582 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1589 "Gromacs binary or parallel settings not identical to previous run.\n"
1590 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1591 fplog ? ",\n see the log file for details" : "");
1596 "Gromacs binary or parallel settings not identical to previous run.\n"
1597 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1602 static void read_checkpoint(const char *fn,FILE **pfplog,
1603 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1604 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1605 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1606 int *simulation_part,
1607 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1612 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1614 char filename[STRLEN],buf[STEPSTRSIZE];
1615 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1617 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1620 gmx_file_position_t *outputfiles;
1622 t_fileio *chksum_file;
1623 FILE* fplog = *pfplog;
1624 unsigned char digest[16];
1625 #ifndef GMX_NATIVE_WINDOWS
1626 struct flock fl; /* don't initialize here: the struct order is OS
1630 const char *int_warn=
1631 "WARNING: The checkpoint file was generated with integrator %s,\n"
1632 " while the simulation uses integrator %s\n\n";
1633 const char *sd_note=
1634 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1635 " while the simulation uses %d SD or BD nodes,\n"
1636 " continuation will be exact, except for the random state\n\n";
1638 #ifndef GMX_NATIVE_WINDOWS
1640 fl.l_whence=SEEK_SET;
1649 "read_checkpoint not (yet) supported with particle decomposition");
1652 fp = gmx_fio_open(fn,"r");
1653 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1654 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1655 &eIntegrator_f,simulation_part,step,t,
1656 &nppnodes_f,dd_nc_f,&npmenodes_f,
1657 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1658 &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
1660 if (bAppendOutputFiles &&
1661 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1663 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1666 if (cr == NULL || MASTER(cr))
1668 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1672 /* This will not be written if we do appending, since fplog is still NULL then */
1675 fprintf(fplog,"\n");
1676 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1677 fprintf(fplog," file generated by: %s\n",fprog);
1678 fprintf(fplog," file generated at: %s\n",ftime);
1679 fprintf(fplog," GROMACS build time: %s\n",btime);
1680 fprintf(fplog," GROMACS build user: %s\n",buser);
1681 fprintf(fplog," GROMACS build host: %s\n",bhost);
1682 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1683 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1684 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1685 fprintf(fplog," time: %f\n",*t);
1686 fprintf(fplog,"\n");
1689 if (natoms != state->natoms)
1691 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1693 if (ngtc != state->ngtc)
1695 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1697 if (nnhpres != state->nnhpres)
1699 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1702 if (nlambda != state->dfhist.nlambda)
1704 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1707 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1708 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1710 if (eIntegrator_f != eIntegrator)
1714 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1716 if(bAppendOutputFiles)
1719 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1720 "Stopping the run to prevent you from ruining all your data...\n"
1721 "If you _really_ know what you are doing, try with the -noappend option.\n");
1725 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1734 else if (bPartDecomp)
1736 nppnodes = cr->nnodes;
1739 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1741 if (cr->npmenodes < 0)
1743 cr->npmenodes = npmenodes_f;
1745 nppnodes = cr->nnodes - cr->npmenodes;
1746 if (nppnodes == nppnodes_f)
1748 for(d=0; d<DIM; d++)
1752 dd_nc[d] = dd_nc_f[d];
1759 /* The number of PP nodes has not been set yet */
1763 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1765 /* Correct the RNG state size for the number of PP nodes.
1766 * Such assignments should all be moved to one central function.
1768 state->nrng = nppnodes*gmx_rng_n();
1769 state->nrngi = nppnodes;
1773 if (fflags != state->flags)
1778 if(bAppendOutputFiles)
1781 "Output file appending requested, but input and checkpoint states are not identical.\n"
1782 "Stopping the run to prevent you from ruining all your data...\n"
1783 "You can try with the -noappend option, and get more info in the log file.\n");
1786 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1788 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1793 "WARNING: The checkpoint state entries do not match the simulation,\n"
1794 " see the log file for details\n\n");
1800 print_flag_mismatch(fplog,state->flags,fflags);
1805 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1806 nppnodes != nppnodes_f)
1811 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1815 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1820 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1821 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1824 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1825 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1826 Investigate for 5.0. */
1831 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1832 flags_eks,&state->ekinstate,NULL);
1837 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1838 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1840 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1841 flags_enh,&state->enerhist,NULL);
1847 if (file_version < 6)
1849 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1851 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1854 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1856 state->enerhist.nsum = *step;
1857 state->enerhist.nsum_sim = *step;
1860 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1861 flags_dfh,&state->dfhist,NULL);
1867 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1873 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1878 if( gmx_fio_close(fp) != 0)
1880 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1889 /* If the user wants to append to output files,
1890 * we use the file pointer positions of the output files stored
1891 * in the checkpoint file and truncate the files such that any frames
1892 * written after the checkpoint time are removed.
1893 * All files are md5sum checked such that we can be sure that
1894 * we do not truncate other (maybe imprortant) files.
1896 if (bAppendOutputFiles)
1898 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1900 /* make sure first file is log file so that it is OK to use it for
1903 gmx_fatal(FARGS,"The first output file should always be the log "
1904 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1906 for(i=0;i<nfiles;i++)
1908 if (outputfiles[i].offset < 0)
1910 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1911 "is larger than 2 GB, but mdrun did not support large file"
1912 " offsets. Can not append. Run mdrun with -noappend",
1913 outputfiles[i].filename);
1916 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1919 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1924 /* Note that there are systems where the lock operation
1925 * will succeed, but a second process can also lock the file.
1926 * We should probably try to detect this.
1928 #ifndef GMX_NATIVE_WINDOWS
1929 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1932 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1935 if (errno == ENOSYS)
1939 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1943 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1946 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1950 else if (errno == EACCES || errno == EAGAIN)
1952 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1953 "simulation?", outputfiles[i].filename);
1957 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1958 outputfiles[i].filename, strerror(errno));
1963 /* compute md5 chksum */
1964 if (outputfiles[i].chksum_size != -1)
1966 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1967 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1969 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1970 outputfiles[i].chksum_size,
1971 outputfiles[i].filename);
1974 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1976 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1978 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
1983 if (i==0) /*open log file here - so that lock is never lifted
1984 after chksum is calculated */
1986 *pfplog = gmx_fio_getfp(chksum_file);
1990 gmx_fio_close(chksum_file);
1993 /* compare md5 chksum */
1994 if (outputfiles[i].chksum_size != -1 &&
1995 memcmp(digest,outputfiles[i].chksum,16)!=0)
1999 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2000 for (j=0; j<16; j++)
2002 fprintf(debug,"%02x",digest[j]);
2004 fprintf(debug,"\n");
2006 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2007 outputfiles[i].filename);
2012 if (i!=0) /*log file is already seeked to correct position */
2014 #ifdef GMX_NATIVE_WINDOWS
2015 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2017 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2021 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2031 void load_checkpoint(const char *fn,FILE **fplog,
2032 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2033 t_inputrec *ir,t_state *state,
2034 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2035 gmx_bool bAppend,gmx_bool bForceAppend)
2037 gmx_large_int_t step;
2040 if (SIMMASTER(cr)) {
2041 /* Read the state from the checkpoint file */
2042 read_checkpoint(fn,fplog,
2043 cr,bPartDecomp,dd_nc,
2044 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2045 &ir->simulation_part,bAppend,bForceAppend);
2048 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2049 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2050 gmx_bcast(sizeof(step),&step,cr);
2051 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2052 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2054 ir->bContinuation = TRUE;
2055 if (ir->nsteps >= 0)
2057 ir->nsteps += ir->init_step - step;
2059 ir->init_step = step;
2060 ir->simulation_part += 1;
2063 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2064 gmx_large_int_t *step,double *t,t_state *state,
2066 int *nfiles,gmx_file_position_t **outputfiles)
2069 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2074 int flags_eks,flags_enh,flags_dfh;
2076 gmx_file_position_t *files_loc=NULL;
2079 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2080 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2081 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2082 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2083 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
2085 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2090 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2091 flags_eks,&state->ekinstate,NULL);
2096 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2097 flags_enh,&state->enerhist,NULL);
2102 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2103 flags_dfh,&state->dfhist,NULL);
2109 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2110 outputfiles != NULL ? outputfiles : &files_loc,
2111 outputfiles != NULL ? nfiles : &nfiles_loc,
2113 if (files_loc != NULL)
2123 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2137 read_checkpoint_state(const char *fn,int *simulation_part,
2138 gmx_large_int_t *step,double *t,t_state *state)
2142 fp = gmx_fio_open(fn,"r");
2143 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2144 if( gmx_fio_close(fp) != 0)
2146 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2150 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2153 int simulation_part;
2154 gmx_large_int_t step;
2157 init_state(&state,0,0,0,0,0);
2159 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2161 fr->natoms = state.natoms;
2164 fr->step = gmx_large_int_to_int(step,
2165 "conversion of checkpoint to trajectory");
2169 fr->lambda = state.lambda[efptFEP];
2170 fr->fep_state = state.fep_state;
2172 fr->bX = (state.flags & (1<<estX));
2178 fr->bV = (state.flags & (1<<estV));
2185 fr->bBox = (state.flags & (1<<estBOX));
2188 copy_mat(state.box,fr->box);
2193 void list_checkpoint(const char *fn,FILE *out)
2197 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2199 int eIntegrator,simulation_part,nppnodes,npme;
2200 gmx_large_int_t step;
2204 int flags_eks,flags_enh,flags_dfh;
2208 gmx_file_position_t *outputfiles;
2211 init_state(&state,-1,-1,-1,-1,0);
2213 fp = gmx_fio_open(fn,"r");
2214 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2215 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2216 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2217 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2218 &(state.dfhist.nlambda),&state.flags,
2219 &flags_eks,&flags_enh,&flags_dfh,out);
2220 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2225 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2226 flags_eks,&state.ekinstate,out);
2231 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2232 flags_enh,&state.enerhist,out);
2236 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2237 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2238 flags_dfh,&state.dfhist,out);
2242 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2247 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2254 if( gmx_fio_close(fp) != 0)
2256 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2263 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2267 /* Check if the output file name stored in the checkpoint file
2268 * is one of the output file names of mdrun.
2272 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2277 return (i < nfile && gmx_fexist(fnm_cp));
2280 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2281 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2282 gmx_large_int_t *cpt_step,t_commrec *cr,
2283 gmx_bool bAppendReq,
2284 int nfile,const t_filenm fnm[],
2285 const char *part_suffix,gmx_bool *bAddPart)
2288 gmx_large_int_t step=0;
2292 gmx_file_position_t *outputfiles;
2295 char *fn,suf_up[STRLEN];
2299 if (SIMMASTER(cr)) {
2300 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2302 *simulation_part = 0;
2306 init_state(&state,0,0,0,0,0);
2308 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2309 &nfiles,&outputfiles);
2310 if( gmx_fio_close(fp) != 0)
2312 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2319 for(f=0; f<nfiles; f++)
2321 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2326 if (nexist == nfiles)
2328 bAppend = bAppendReq;
2330 else if (nexist > 0)
2333 "Output file appending has been requested,\n"
2334 "but some output files listed in the checkpoint file %s\n"
2335 "are not present or are named differently by the current program:\n",
2337 fprintf(stderr,"output files present:");
2338 for(f=0; f<nfiles; f++)
2340 if (exist_output_file(outputfiles[f].filename,
2343 fprintf(stderr," %s",outputfiles[f].filename);
2346 fprintf(stderr,"\n");
2347 fprintf(stderr,"output files not present or named differently:");
2348 for(f=0; f<nfiles; f++)
2350 if (!exist_output_file(outputfiles[f].filename,
2353 fprintf(stderr," %s",outputfiles[f].filename);
2356 fprintf(stderr,"\n");
2358 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2366 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2368 fn = outputfiles[0].filename;
2369 if (strlen(fn) < 4 ||
2370 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2372 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2374 /* Set bAddPart to whether the suffix string '.part' is present
2375 * in the log file name.
2377 strcpy(suf_up,part_suffix);
2379 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2380 strstr(fn,suf_up) != NULL);
2388 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2390 if (*simulation_part > 0 && bAppendReq)
2392 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2393 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2396 if (NULL != cpt_step)