1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
30 #ifdef HAVE_SYS_TIME_H
38 #ifdef GMX_NATIVE_WINDOWS
41 #include <sys/locking.h>
55 #include "gmx_random.h"
56 #include "checkpoint.h"
61 #include "buildinfo.h"
68 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
70 gmx_ctime_r(const time_t *clock,char *buf, int n);
73 #define CPT_MAGIC1 171817
74 #define CPT_MAGIC2 171819
75 #define CPTSTRLEN 1024
78 #define GMX_CPT_BUILD_DP 1
80 #define GMX_CPT_BUILD_DP 0
83 /* cpt_version should normally only be changed
84 * when the header of footer format changes.
85 * The state data format itself is backward and forward compatible.
86 * But old code can not read a new entry that is present in the file
87 * (but can read a new format when new entries are not present).
89 static const int cpt_version = 15;
92 const char *est_names[estNR]=
95 "box", "box-rel", "box-v", "pres_prev",
96 "nosehoover-xi", "thermostat-integral",
97 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
98 "disre_initf", "disre_rm3tav",
99 "orire_initf", "orire_Dtav",
100 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
103 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
105 const char *eeks_names[eeksNR]=
107 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
108 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
111 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
112 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
113 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
114 eenhENERGY_DELTA_H_NN,
115 eenhENERGY_DELTA_H_LIST,
116 eenhENERGY_DELTA_H_STARTTIME,
117 eenhENERGY_DELTA_H_STARTLAMBDA,
120 const char *eenh_names[eenhNR]=
122 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
123 "energy_sum_sim", "energy_nsum_sim",
124 "energy_nsteps", "energy_nsteps_sim",
126 "energy_delta_h_list",
127 "energy_delta_h_start_time",
128 "energy_delta_h_start_lambda"
131 /* free energy history variables -- need to be preserved over checkpoint */
132 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
133 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
134 /* free energy history variable names */
135 const char *edfh_names[edfhNR]=
137 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
138 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
141 #ifdef GMX_NATIVE_WINDOWS
143 gmx_wintruncate(const char *filename, __int64 size)
146 /*we do this elsewhere*/
152 fp=fopen(filename,"rb+");
159 return _chsize_s( fileno(fp), size);
165 enum { ecprREAL, ecprRVEC, ecprMATRIX };
167 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
168 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
169 cptpEST - state variables.
170 cptpEEKS - Kinetic energy state variables.
171 cptpEENH - Energy history state variables.
172 cptpEDFH - free energy history variables.
176 static const char *st_names(int cptp,int ecpt)
180 case cptpEST: return est_names [ecpt]; break;
181 case cptpEEKS: return eeks_names[ecpt]; break;
182 case cptpEENH: return eenh_names[ecpt]; break;
183 case cptpEDFH: return edfh_names[ecpt]; break;
189 static void cp_warning(FILE *fp)
191 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
194 static void cp_error()
196 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
199 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
207 res = xdr_string(xd,s,CPTSTRLEN);
214 fprintf(list,"%s = %s\n",desc,*s);
219 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
230 fprintf(list,"%s = %d\n",desc,*i);
235 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
241 fprintf(list,"%s = ",desc);
243 for (j=0; j<n && res; j++)
245 res &= xdr_u_char(xd,&i[j]);
248 fprintf(list,"%02x",i[j]);
263 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
265 if (do_cpt_int(xd,desc,i,list) < 0)
271 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
274 char buf[STEPSTRSIZE];
276 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
283 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
287 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
291 res = xdr_double(xd,f);
298 fprintf(list,"%s = %f\n",desc,*f);
302 static void do_cpt_real_err(XDR *xd,const char *desc,real *f)
307 res = xdr_double(xd,f);
309 res = xdr_float(xd,f);
317 static void do_cpt_n_rvecs_err(XDR *xd,const char *desc,int n, rvec f[],FILE *list)
323 for (j=0; j<DIM; j++)
325 do_cpt_real_err(xd, desc, &f[i][j]);
331 pr_rvecs(list,0,desc,f,n);
335 /* If nval >= 0, nval is used; on read this should match the passed value.
336 * If nval n<0, *nptr is used; on read the value is stored in nptr
338 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
339 int nval,int *nptr,real **v,
340 FILE *list,int erealtype)
344 int dtc=xdr_datatype_float;
346 int dtc=xdr_datatype_double;
363 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
368 res = xdr_int(xd,&nf);
379 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
388 res = xdr_int(xd,&dt);
395 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
396 st_names(cptp,ecpt),xdr_datatype_names[dtc],
397 xdr_datatype_names[dt]);
399 if (list || !(sflags & (1<<ecpt)))
412 if (dt == xdr_datatype_float)
414 if (dtc == xdr_datatype_float)
422 res = xdr_vector(xd,(char *)vf,nf,
423 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
428 if (dtc != xdr_datatype_float)
439 if (dtc == xdr_datatype_double)
447 res = xdr_vector(xd,(char *)vd,nf,
448 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
453 if (dtc != xdr_datatype_double)
468 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
471 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
474 gmx_incons("Unknown checkpoint real type");
486 /* This function stores n along with the reals for reading,
487 * but on reading it assumes that n matches the value in the checkpoint file,
488 * a fatal error is generated when this is not the case.
490 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
491 int n,real **v,FILE *list)
493 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
496 /* This function does the same as do_cpte_reals,
497 * except that on reading it ignores the passed value of *n
498 * and stored the value read from the checkpoint file in *n.
500 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
501 int *n,real **v,FILE *list)
503 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
506 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
511 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
514 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
515 int n,int **v,FILE *list)
518 int dtc=xdr_datatype_int;
523 res = xdr_int(xd,&nf);
528 if (list == NULL && v != NULL && nf != n)
530 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
533 res = xdr_int(xd,&dt);
540 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
541 st_names(cptp,ecpt),xdr_datatype_names[dtc],
542 xdr_datatype_names[dt]);
544 if (list || !(sflags & (1<<ecpt)) || v == NULL)
557 res = xdr_vector(xd,(char *)vp,nf,
558 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
565 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
575 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
578 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
581 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
582 int n,double **v,FILE *list)
585 int dtc=xdr_datatype_double;
590 res = xdr_int(xd,&nf);
595 if (list == NULL && nf != n)
597 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
600 res = xdr_int(xd,&dt);
607 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
608 st_names(cptp,ecpt),xdr_datatype_names[dtc],
609 xdr_datatype_names[dt]);
611 if (list || !(sflags & (1<<ecpt)))
624 res = xdr_vector(xd,(char *)vp,nf,
625 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
632 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
642 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
643 double *r,FILE *list)
645 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
649 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
650 int n,rvec **v,FILE *list)
654 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
655 n*DIM,NULL,(real **)v,list,ecprRVEC);
658 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
664 vr = (real *)&(v[0][0]);
665 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
666 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
668 if (list && ret == 0)
670 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
677 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
678 int n, real **v,FILE *list)
683 char name[CPTSTRLEN];
694 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
695 if (list && reti == 0)
697 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
698 pr_reals(list,0,name,v[i],n);
708 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
709 int n,matrix **v,FILE *list)
718 res = xdr_int(xd,&nf);
723 if (list == NULL && nf != n)
725 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
727 if (list || !(sflags & (1<<ecpt)))
747 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
751 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
752 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
759 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
765 if (list && ret == 0)
769 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
780 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
781 char **version,char **btime,char **buser,char **bhost,
783 char **fprog,char **ftime,
784 int *eIntegrator,int *simulation_part,
785 gmx_large_int_t *step,double *t,
786 int *nnodes,int *dd_nc,int *npme,
787 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
788 int *nlambda, int *flags_state,
789 int *flags_eks,int *flags_enh, int *flags_dfh,
807 res = xdr_int(xd,&magic);
810 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
812 if (magic != CPT_MAGIC1)
814 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
815 "The checkpoint file is corrupted or not a checkpoint file",
822 if (gethostname(fhost,255) != 0)
824 sprintf(fhost,"unknown");
827 sprintf(fhost,"unknown");
830 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
831 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
832 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
833 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
834 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
835 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
836 *file_version = cpt_version;
837 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
838 if (*file_version > cpt_version)
840 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
842 if (*file_version >= 13)
844 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
850 if (*file_version >= 12)
852 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
858 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
859 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
860 if (*file_version >= 10)
862 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
868 if (*file_version >= 11)
870 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
876 if (*file_version >= 14)
878 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
884 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
885 if (*file_version >= 3)
887 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
891 *simulation_part = 1;
893 if (*file_version >= 5)
895 do_cpt_step_err(xd,"step" ,step ,list);
899 do_cpt_int_err(xd,"step" ,&idum ,list);
902 do_cpt_double_err(xd,"t" ,t ,list);
903 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
905 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
906 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
907 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
908 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
909 do_cpt_int_err(xd,"state flags",flags_state,list);
910 if (*file_version >= 4)
912 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
913 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
918 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
919 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
920 (1<<(estORIRE_DTAV+2)) |
921 (1<<(estORIRE_DTAV+3))));
923 if (*file_version >= 14)
925 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
930 if (*file_version >= 15)
932 do_cpt_int_err(xd,"ED data sets",nED,list);
940 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
945 if (file_version >= 2)
948 res = xdr_int(xd,&magic);
953 if (magic != CPT_MAGIC2)
962 static int do_cpt_state(XDR *xd,gmx_bool bRead,
963 int fflags,t_state *state,
964 gmx_bool bReadRNG,FILE *list)
967 int **rng_p,**rngi_p;
974 nnht = state->nhchainlength*state->ngtc;
975 nnhtp = state->nhchainlength*state->nnhpres;
979 rng_p = (int **)&state->ld_rng;
980 rngi_p = &state->ld_rngi;
984 /* Do not read the RNG data */
988 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
990 sflags = state->flags;
991 for(i=0; (i<estNR && ret == 0); i++)
997 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
998 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
999 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
1000 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
1001 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
1002 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
1003 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
1004 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
1005 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
1006 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
1007 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
1008 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
1009 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
1010 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
1011 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
1012 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
1013 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
1014 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
1015 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
1016 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
1017 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
1018 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
1019 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
1020 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
1021 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
1022 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
1024 gmx_fatal(FARGS,"Unknown state entry %d\n"
1025 "You are probably reading a new checkpoint file with old code",i);
1033 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
1034 int fflags,ekinstate_t *ekins,
1042 for(i=0; (i<eeksNR && ret == 0); i++)
1044 if (fflags & (1<<i))
1049 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1050 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1051 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1052 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1053 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1054 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1055 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1056 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1057 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1058 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1060 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1061 "You are probably reading a new checkpoint file with old code",i);
1070 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1071 int fflags,energyhistory_t *enerhist,
1082 enerhist->nsteps = 0;
1084 enerhist->nsteps_sim = 0;
1085 enerhist->nsum_sim = 0;
1086 enerhist->dht = NULL;
1088 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1090 snew(enerhist->dht,1);
1091 enerhist->dht->ndh = NULL;
1092 enerhist->dht->dh = NULL;
1093 enerhist->dht->start_lambda_set=FALSE;
1097 for(i=0; (i<eenhNR && ret == 0); i++)
1099 if (fflags & (1<<i))
1103 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1104 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1105 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1106 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1107 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1108 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1109 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1110 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1111 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1112 if (bRead) /* now allocate memory for it */
1114 snew(enerhist->dht->dh, enerhist->dht->nndh);
1115 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1116 for(j=0;j<enerhist->dht->nndh;j++)
1118 enerhist->dht->ndh[j] = 0;
1119 enerhist->dht->dh[j] = NULL;
1123 case eenhENERGY_DELTA_H_LIST:
1124 for(j=0;j<enerhist->dht->nndh;j++)
1126 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1129 case eenhENERGY_DELTA_H_STARTTIME:
1130 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1131 case eenhENERGY_DELTA_H_STARTLAMBDA:
1132 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1134 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1135 "You are probably reading a new checkpoint file with old code",i);
1140 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1142 /* Assume we have an old file format and copy sum to sum_sim */
1143 srenew(enerhist->ener_sum_sim,enerhist->nener);
1144 for(i=0; i<enerhist->nener; i++)
1146 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1148 fflags |= (1<<eenhENERGY_SUM_SIM);
1151 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1152 !(fflags & (1<<eenhENERGY_NSTEPS)))
1154 /* Assume we have an old file format and copy nsum to nsteps */
1155 enerhist->nsteps = enerhist->nsum;
1156 fflags |= (1<<eenhENERGY_NSTEPS);
1158 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1159 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1161 /* Assume we have an old file format and copy nsum to nsteps */
1162 enerhist->nsteps_sim = enerhist->nsum_sim;
1163 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1169 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1174 nlambda = dfhist->nlambda;
1177 for(i=0; (i<edfhNR && ret == 0); i++)
1179 if (fflags & (1<<i))
1183 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1184 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1185 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1186 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1187 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1188 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1189 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1190 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1191 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1192 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1193 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1194 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1195 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1196 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1199 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1200 "You are probably reading a new checkpoint file with old code",i);
1209 /* This function stores the last whole configuration of the reference and
1210 * average structure in the .cpt file
1212 static int do_cpt_EDstate(XDR *xd,gmx_bool bRead,
1213 edsamstate_t *EDstate, FILE *list)
1220 EDstate->bFromCpt = bRead;
1222 if (EDstate->nED <= 0)
1227 /* When reading, init_edsam has not been called yet,
1228 * so we have to allocate memory first. */
1231 snew(EDstate->nref , EDstate->nED);
1232 snew(EDstate->old_sref, EDstate->nED);
1233 snew(EDstate->nav , EDstate->nED);
1234 snew(EDstate->old_sav , EDstate->nED);
1237 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1238 for (i=0; i< EDstate->nED; i++)
1240 /* Reference structure SREF */
1241 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1242 do_cpt_int_err(xd, buf, &EDstate->nref[i],list);
1243 sprintf(buf, "ED%d x_ref", i+1);
1246 snew(EDstate->old_sref[i], EDstate->nref[i]);
1247 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1251 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1254 /* Average structure SAV */
1255 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1256 do_cpt_int_err(xd, buf, &EDstate->nav[i] ,list);
1257 sprintf(buf, "ED%d x_av", i+1);
1260 snew(EDstate->old_sav[i], EDstate->nav[i]);
1261 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1265 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1273 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1274 gmx_file_position_t **p_outputfiles, int *nfiles,
1275 FILE *list, int file_version)
1279 gmx_off_t mask = 0xFFFFFFFFL;
1280 int offset_high,offset_low;
1282 gmx_file_position_t *outputfiles;
1284 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1291 snew(*p_outputfiles,*nfiles);
1294 outputfiles = *p_outputfiles;
1296 for(i=0;i<*nfiles;i++)
1298 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1301 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1302 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1308 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1312 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1316 #if (SIZEOF_GMX_OFF_T > 4)
1317 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1319 outputfiles[i].offset = offset_low;
1324 buf = outputfiles[i].filename;
1325 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1327 offset = outputfiles[i].offset;
1335 #if (SIZEOF_GMX_OFF_T > 4)
1336 offset_low = (int) (offset & mask);
1337 offset_high = (int) ((offset >> 32) & mask);
1339 offset_low = offset;
1343 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1347 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1352 if (file_version >= 8)
1354 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1359 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1366 outputfiles[i].chksum_size = -1;
1373 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1374 FILE *fplog,t_commrec *cr,
1375 int eIntegrator,int simulation_part,
1376 gmx_bool bExpanded, int elamstats,
1377 gmx_large_int_t step,double t,t_state *state)
1387 char *fntemp; /* the temporary checkpoint file name */
1389 char timebuf[STRLEN];
1390 int nppnodes,npmenodes,flag_64bit;
1391 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1392 gmx_file_position_t *outputfiles;
1395 int flags_eks,flags_enh,flags_dfh,i;
1400 if (DOMAINDECOMP(cr))
1402 nppnodes = cr->dd->nnodes;
1403 npmenodes = cr->npmenodes;
1407 nppnodes = cr->nnodes;
1417 /* make the new temporary filename */
1418 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1420 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1421 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1422 strcat(fntemp,suffix);
1423 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1426 gmx_ctime_r(&now,timebuf,STRLEN);
1430 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1431 gmx_step_str(step,buf),timebuf);
1434 /* Get offsets for open files */
1435 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1437 fp = gmx_fio_open(fntemp,"w");
1439 if (state->ekinstate.bUpToDate)
1442 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1443 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1444 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1452 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1454 flags_enh |= (1<<eenhENERGY_N);
1455 if (state->enerhist.nsum > 0)
1457 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1458 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1460 if (state->enerhist.nsum_sim > 0)
1462 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1463 (1<<eenhENERGY_NSUM_SIM));
1465 if (state->enerhist.dht)
1467 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1468 (1<< eenhENERGY_DELTA_H_LIST) |
1469 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1470 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1476 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1477 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1480 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1482 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1484 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1485 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1491 /* We can check many more things now (CPU, acceleration, etc), but
1492 * it is highly unlikely to have two separate builds with exactly
1493 * the same version, user, time, and build host!
1496 version = gmx_strdup(VERSION);
1497 btime = gmx_strdup(BUILD_TIME);
1498 buser = gmx_strdup(BUILD_USER);
1499 bhost = gmx_strdup(BUILD_HOST);
1501 double_prec = GMX_CPT_BUILD_DP;
1502 fprog = gmx_strdup(Program());
1504 ftime = &(timebuf[0]);
1506 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1507 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1508 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1509 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1510 &state->natoms,&state->ngtc,&state->nnhpres,
1511 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1512 &state->edsamstate.nED,
1521 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1522 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1523 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1524 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1525 (do_cpt_EDstate(gmx_fio_getxdr(fp),FALSE,&state->edsamstate,NULL) < 0) ||
1526 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1529 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1532 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1534 /* we really, REALLY, want to make sure to physically write the checkpoint,
1535 and all the files it depends on, out to disk. Because we've
1536 opened the checkpoint with gmx_fio_open(), it's in our list
1538 ret=gmx_fio_all_output_fsync();
1544 "Cannot fsync '%s'; maybe you are out of disk space?",
1545 gmx_fio_getname(ret));
1547 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1557 if( gmx_fio_close(fp) != 0)
1559 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1562 /* we don't move the checkpoint if the user specified they didn't want it,
1563 or if the fsyncs failed */
1564 if (!bNumberAndKeep && !ret)
1568 /* Rename the previous checkpoint file */
1570 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1571 strcat(buf,"_prev");
1572 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1574 /* we copy here so that if something goes wrong between now and
1575 * the rename below, there's always a state.cpt.
1576 * If renames are atomic (such as in POSIX systems),
1577 * this copying should be unneccesary.
1579 gmx_file_copy(fn, buf, FALSE);
1580 /* We don't really care if this fails:
1581 * there's already a new checkpoint.
1584 gmx_file_rename(fn, buf);
1587 if (gmx_file_rename(fntemp, fn) != 0)
1589 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1597 /*code for alternate checkpointing scheme. moved from top of loop over
1599 fcRequestCheckPoint();
1600 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1601 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1603 #endif /* end GMX_FAHCORE block */
1606 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1610 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1611 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1612 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1613 for(i=0; i<estNR; i++)
1615 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1617 fprintf(fplog," %24s %11s %11s\n",
1619 (sflags & (1<<i)) ? " present " : "not present",
1620 (fflags & (1<<i)) ? " present " : "not present");
1625 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1627 FILE *fp = fplog ? fplog : stderr;
1631 fprintf(fp," %s mismatch,\n",type);
1632 fprintf(fp," current program: %d\n",p);
1633 fprintf(fp," checkpoint file: %d\n",f);
1639 static void check_string(FILE *fplog,const char *type,const char *p,
1640 const char *f,gmx_bool *mm)
1642 FILE *fp = fplog ? fplog : stderr;
1644 if (strcmp(p,f) != 0)
1646 fprintf(fp," %s mismatch,\n",type);
1647 fprintf(fp," current program: %s\n",p);
1648 fprintf(fp," checkpoint file: %s\n",f);
1654 static void check_match(FILE *fplog,
1656 char *btime,char *buser,char *bhost,int double_prec,
1658 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1659 ivec dd_nc,ivec dd_nc_f)
1666 check_string(fplog,"Version" ,VERSION ,version,&mm);
1667 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1668 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1669 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1670 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1671 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1673 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1682 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1685 if (cr->npmenodes >= 0)
1687 npp -= cr->npmenodes;
1691 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1692 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1693 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1700 "Gromacs binary or parallel settings not identical to previous run.\n"
1701 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1702 fplog ? ",\n see the log file for details" : "");
1707 "Gromacs binary or parallel settings not identical to previous run.\n"
1708 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1713 static void read_checkpoint(const char *fn,FILE **pfplog,
1714 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1715 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1716 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1717 int *simulation_part,
1718 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1723 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1725 char filename[STRLEN],buf[STEPSTRSIZE];
1726 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1728 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1731 gmx_file_position_t *outputfiles;
1733 t_fileio *chksum_file;
1734 FILE* fplog = *pfplog;
1735 unsigned char digest[16];
1736 #ifndef GMX_NATIVE_WINDOWS
1737 struct flock fl; /* don't initialize here: the struct order is OS
1741 const char *int_warn=
1742 "WARNING: The checkpoint file was generated with integrator %s,\n"
1743 " while the simulation uses integrator %s\n\n";
1744 const char *sd_note=
1745 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1746 " while the simulation uses %d SD or BD nodes,\n"
1747 " continuation will be exact, except for the random state\n\n";
1749 #ifndef GMX_NATIVE_WINDOWS
1751 fl.l_whence=SEEK_SET;
1760 "read_checkpoint not (yet) supported with particle decomposition");
1763 fp = gmx_fio_open(fn,"r");
1764 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1765 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1766 &eIntegrator_f,simulation_part,step,t,
1767 &nppnodes_f,dd_nc_f,&npmenodes_f,
1768 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1769 &fflags,&flags_eks,&flags_enh,&flags_dfh,
1770 &state->edsamstate.nED,NULL);
1772 if (bAppendOutputFiles &&
1773 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1775 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1778 if (cr == NULL || MASTER(cr))
1780 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1784 /* This will not be written if we do appending, since fplog is still NULL then */
1787 fprintf(fplog,"\n");
1788 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1789 fprintf(fplog," file generated by: %s\n",fprog);
1790 fprintf(fplog," file generated at: %s\n",ftime);
1791 fprintf(fplog," GROMACS build time: %s\n",btime);
1792 fprintf(fplog," GROMACS build user: %s\n",buser);
1793 fprintf(fplog," GROMACS build host: %s\n",bhost);
1794 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1795 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1796 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1797 fprintf(fplog," time: %f\n",*t);
1798 fprintf(fplog,"\n");
1801 if (natoms != state->natoms)
1803 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1805 if (ngtc != state->ngtc)
1807 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1809 if (nnhpres != state->nnhpres)
1811 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1814 if (nlambda != state->dfhist.nlambda)
1816 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1819 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1820 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1822 if (eIntegrator_f != eIntegrator)
1826 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1828 if(bAppendOutputFiles)
1831 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1832 "Stopping the run to prevent you from ruining all your data...\n"
1833 "If you _really_ know what you are doing, try with the -noappend option.\n");
1837 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1846 else if (bPartDecomp)
1848 nppnodes = cr->nnodes;
1851 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1853 if (cr->npmenodes < 0)
1855 cr->npmenodes = npmenodes_f;
1857 nppnodes = cr->nnodes - cr->npmenodes;
1858 if (nppnodes == nppnodes_f)
1860 for(d=0; d<DIM; d++)
1864 dd_nc[d] = dd_nc_f[d];
1871 /* The number of PP nodes has not been set yet */
1875 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1877 /* Correct the RNG state size for the number of PP nodes.
1878 * Such assignments should all be moved to one central function.
1880 state->nrng = nppnodes*gmx_rng_n();
1881 state->nrngi = nppnodes;
1885 if (fflags != state->flags)
1890 if(bAppendOutputFiles)
1893 "Output file appending requested, but input and checkpoint states are not identical.\n"
1894 "Stopping the run to prevent you from ruining all your data...\n"
1895 "You can try with the -noappend option, and get more info in the log file.\n");
1898 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1900 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1905 "WARNING: The checkpoint state entries do not match the simulation,\n"
1906 " see the log file for details\n\n");
1912 print_flag_mismatch(fplog,state->flags,fflags);
1917 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1918 nppnodes != nppnodes_f)
1923 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1927 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1932 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1933 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1936 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1937 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1938 Investigate for 5.0. */
1943 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1944 flags_eks,&state->ekinstate,NULL);
1949 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1950 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1952 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1953 flags_enh,&state->enerhist,NULL);
1959 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state->edsamstate,NULL);
1965 if (file_version < 6)
1967 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1969 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1972 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1974 state->enerhist.nsum = *step;
1975 state->enerhist.nsum_sim = *step;
1978 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1979 flags_dfh,&state->dfhist,NULL);
1985 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1991 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1996 if( gmx_fio_close(fp) != 0)
1998 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2007 /* If the user wants to append to output files,
2008 * we use the file pointer positions of the output files stored
2009 * in the checkpoint file and truncate the files such that any frames
2010 * written after the checkpoint time are removed.
2011 * All files are md5sum checked such that we can be sure that
2012 * we do not truncate other (maybe imprortant) files.
2014 if (bAppendOutputFiles)
2016 if (fn2ftp(outputfiles[0].filename)!=efLOG)
2018 /* make sure first file is log file so that it is OK to use it for
2021 gmx_fatal(FARGS,"The first output file should always be the log "
2022 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2024 for(i=0;i<nfiles;i++)
2026 if (outputfiles[i].offset < 0)
2028 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
2029 "is larger than 2 GB, but mdrun did not support large file"
2030 " offsets. Can not append. Run mdrun with -noappend",
2031 outputfiles[i].filename);
2034 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
2037 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
2042 /* Note that there are systems where the lock operation
2043 * will succeed, but a second process can also lock the file.
2044 * We should probably try to detect this.
2046 #ifndef GMX_NATIVE_WINDOWS
2047 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2050 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
2053 if (errno == ENOSYS)
2057 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2061 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
2064 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
2068 else if (errno == EACCES || errno == EAGAIN)
2070 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
2071 "simulation?", outputfiles[i].filename);
2075 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
2076 outputfiles[i].filename, strerror(errno));
2081 /* compute md5 chksum */
2082 if (outputfiles[i].chksum_size != -1)
2084 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
2085 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2087 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2088 outputfiles[i].chksum_size,
2089 outputfiles[i].filename);
2092 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2094 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
2096 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
2101 if (i==0) /*open log file here - so that lock is never lifted
2102 after chksum is calculated */
2104 *pfplog = gmx_fio_getfp(chksum_file);
2108 gmx_fio_close(chksum_file);
2111 /* compare md5 chksum */
2112 if (outputfiles[i].chksum_size != -1 &&
2113 memcmp(digest,outputfiles[i].chksum,16)!=0)
2117 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2118 for (j=0; j<16; j++)
2120 fprintf(debug,"%02x",digest[j]);
2122 fprintf(debug,"\n");
2124 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2125 outputfiles[i].filename);
2130 if (i!=0) /*log file is already seeked to correct position */
2132 #ifdef GMX_NATIVE_WINDOWS
2133 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2135 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2139 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2149 void load_checkpoint(const char *fn,FILE **fplog,
2150 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2151 t_inputrec *ir,t_state *state,
2152 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2153 gmx_bool bAppend,gmx_bool bForceAppend)
2155 gmx_large_int_t step;
2158 if (SIMMASTER(cr)) {
2159 /* Read the state from the checkpoint file */
2160 read_checkpoint(fn,fplog,
2161 cr,bPartDecomp,dd_nc,
2162 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2163 &ir->simulation_part,bAppend,bForceAppend);
2166 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2167 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2168 gmx_bcast(sizeof(step),&step,cr);
2169 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2170 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2172 ir->bContinuation = TRUE;
2173 if (ir->nsteps >= 0)
2175 ir->nsteps += ir->init_step - step;
2177 ir->init_step = step;
2178 ir->simulation_part += 1;
2181 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2182 gmx_large_int_t *step,double *t,t_state *state,
2184 int *nfiles,gmx_file_position_t **outputfiles)
2187 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2192 int flags_eks,flags_enh,flags_dfh;
2194 gmx_file_position_t *files_loc=NULL;
2197 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2198 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2199 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2200 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2201 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
2202 &state->edsamstate.nED,NULL);
2204 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2209 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2210 flags_eks,&state->ekinstate,NULL);
2215 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2216 flags_enh,&state->enerhist,NULL);
2221 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2222 flags_dfh,&state->dfhist,NULL);
2228 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state->edsamstate,NULL);
2234 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2235 outputfiles != NULL ? outputfiles : &files_loc,
2236 outputfiles != NULL ? nfiles : &nfiles_loc,
2238 if (files_loc != NULL)
2248 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2262 read_checkpoint_state(const char *fn,int *simulation_part,
2263 gmx_large_int_t *step,double *t,t_state *state)
2267 fp = gmx_fio_open(fn,"r");
2268 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2269 if( gmx_fio_close(fp) != 0)
2271 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2275 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2278 int simulation_part;
2279 gmx_large_int_t step;
2282 init_state(&state,0,0,0,0,0);
2284 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2286 fr->natoms = state.natoms;
2289 fr->step = gmx_large_int_to_int(step,
2290 "conversion of checkpoint to trajectory");
2294 fr->lambda = state.lambda[efptFEP];
2295 fr->fep_state = state.fep_state;
2297 fr->bX = (state.flags & (1<<estX));
2303 fr->bV = (state.flags & (1<<estV));
2310 fr->bBox = (state.flags & (1<<estBOX));
2313 copy_mat(state.box,fr->box);
2318 void list_checkpoint(const char *fn,FILE *out)
2322 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2324 int eIntegrator,simulation_part,nppnodes,npme;
2325 gmx_large_int_t step;
2329 int flags_eks,flags_enh,flags_dfh;
2333 gmx_file_position_t *outputfiles;
2336 init_state(&state,-1,-1,-1,-1,0);
2338 fp = gmx_fio_open(fn,"r");
2339 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2340 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2341 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2342 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2343 &(state.dfhist.nlambda),&state.flags,
2344 &flags_eks,&flags_enh,&flags_dfh,&state.edsamstate.nED,out);
2345 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2350 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2351 flags_eks,&state.ekinstate,out);
2356 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2357 flags_enh,&state.enerhist,out);
2361 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2362 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2363 flags_dfh,&state.dfhist,out);
2368 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state.edsamstate,out);
2373 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2378 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2385 if( gmx_fio_close(fp) != 0)
2387 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2394 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2398 /* Check if the output file name stored in the checkpoint file
2399 * is one of the output file names of mdrun.
2403 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2408 return (i < nfile && gmx_fexist(fnm_cp));
2411 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2412 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2413 gmx_large_int_t *cpt_step,t_commrec *cr,
2414 gmx_bool bAppendReq,
2415 int nfile,const t_filenm fnm[],
2416 const char *part_suffix,gmx_bool *bAddPart)
2419 gmx_large_int_t step=0;
2423 gmx_file_position_t *outputfiles;
2426 char *fn,suf_up[STRLEN];
2430 if (SIMMASTER(cr)) {
2431 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2433 *simulation_part = 0;
2437 init_state(&state,0,0,0,0,0);
2439 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2440 &nfiles,&outputfiles);
2441 if( gmx_fio_close(fp) != 0)
2443 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2450 for(f=0; f<nfiles; f++)
2452 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2457 if (nexist == nfiles)
2459 bAppend = bAppendReq;
2461 else if (nexist > 0)
2464 "Output file appending has been requested,\n"
2465 "but some output files listed in the checkpoint file %s\n"
2466 "are not present or are named differently by the current program:\n",
2468 fprintf(stderr,"output files present:");
2469 for(f=0; f<nfiles; f++)
2471 if (exist_output_file(outputfiles[f].filename,
2474 fprintf(stderr," %s",outputfiles[f].filename);
2477 fprintf(stderr,"\n");
2478 fprintf(stderr,"output files not present or named differently:");
2479 for(f=0; f<nfiles; f++)
2481 if (!exist_output_file(outputfiles[f].filename,
2484 fprintf(stderr," %s",outputfiles[f].filename);
2487 fprintf(stderr,"\n");
2489 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2497 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2499 fn = outputfiles[0].filename;
2500 if (strlen(fn) < 4 ||
2501 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2503 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2505 /* Set bAddPart to whether the suffix string '.part' is present
2506 * in the log file name.
2508 strcpy(suf_up,part_suffix);
2510 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2511 strstr(fn,suf_up) != NULL);
2519 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2521 if (*simulation_part > 0 && bAppendReq)
2523 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2524 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2527 if (NULL != cpt_step)