1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
26 #include "gmx_header_config.h"
31 #ifdef HAVE_SYS_TIME_H
36 #ifdef GMX_NATIVE_WINDOWS
39 #include <sys/locking.h>
53 #include "gmx_random.h"
54 #include "checkpoint.h"
59 #include "buildinfo.h"
66 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
68 gmx_ctime_r(const time_t *clock,char *buf, int n);
71 #define CPT_MAGIC1 171817
72 #define CPT_MAGIC2 171819
73 #define CPTSTRLEN 1024
76 #define GMX_CPT_BUILD_DP 1
78 #define GMX_CPT_BUILD_DP 0
81 /* cpt_version should normally only be changed
82 * when the header of footer format changes.
83 * The state data format itself is backward and forward compatible.
84 * But old code can not read a new entry that is present in the file
85 * (but can read a new format when new entries are not present).
87 static const int cpt_version = 14;
90 const char *est_names[estNR]=
93 "box", "box-rel", "box-v", "pres_prev",
94 "nosehoover-xi", "thermostat-integral",
95 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
96 "disre_initf", "disre_rm3tav",
97 "orire_initf", "orire_Dtav",
98 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
101 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
103 const char *eeks_names[eeksNR]=
105 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
106 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
109 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
110 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
111 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
112 eenhENERGY_DELTA_H_NN,
113 eenhENERGY_DELTA_H_LIST,
114 eenhENERGY_DELTA_H_STARTTIME,
115 eenhENERGY_DELTA_H_STARTLAMBDA,
118 const char *eenh_names[eenhNR]=
120 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
121 "energy_sum_sim", "energy_nsum_sim",
122 "energy_nsteps", "energy_nsteps_sim",
124 "energy_delta_h_list",
125 "energy_delta_h_start_time",
126 "energy_delta_h_start_lambda"
129 /* free energy history variables -- need to be preserved over checkpoint */
130 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
131 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
132 /* free energy history variable names */
133 const char *edfh_names[edfhNR]=
135 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
136 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
139 #ifdef GMX_NATIVE_WINDOWS
141 gmx_wintruncate(const char *filename, __int64 size)
144 /*we do this elsewhere*/
150 fp=fopen(filename,"rb+");
157 return _chsize_s( fileno(fp), size);
163 enum { ecprREAL, ecprRVEC, ecprMATRIX };
165 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
166 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
167 cptpEST - state variables.
168 cptpEEKS - Kinetic energy state variables.
169 cptpEENH - Energy history state variables.
170 cptpEDFH - free energy history variables.
174 static const char *st_names(int cptp,int ecpt)
178 case cptpEST: return est_names [ecpt]; break;
179 case cptpEEKS: return eeks_names[ecpt]; break;
180 case cptpEENH: return eenh_names[ecpt]; break;
181 case cptpEDFH: return edfh_names[ecpt]; break;
187 static void cp_warning(FILE *fp)
189 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
192 static void cp_error()
194 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
197 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
205 res = xdr_string(xd,s,CPTSTRLEN);
212 fprintf(list,"%s = %s\n",desc,*s);
217 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
228 fprintf(list,"%s = %d\n",desc,*i);
233 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
239 fprintf(list,"%s = ",desc);
241 for (j=0; j<n && res; j++)
243 res &= xdr_u_char(xd,&i[j]);
246 fprintf(list,"%02x",i[j]);
261 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
263 if (do_cpt_int(xd,desc,i,list) < 0)
269 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
272 char buf[STEPSTRSIZE];
274 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
281 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
285 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
289 res = xdr_double(xd,f);
296 fprintf(list,"%s = %f\n",desc,*f);
300 /* If nval >= 0, nval is used; on read this should match the passed value.
301 * If nval n<0, *nptr is used; on read the value is stored in nptr
303 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
304 int nval,int *nptr,real **v,
305 FILE *list,int erealtype)
309 int dtc=xdr_datatype_float;
311 int dtc=xdr_datatype_double;
328 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
333 res = xdr_int(xd,&nf);
344 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
353 res = xdr_int(xd,&dt);
360 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
361 st_names(cptp,ecpt),xdr_datatype_names[dtc],
362 xdr_datatype_names[dt]);
364 if (list || !(sflags & (1<<ecpt)))
377 if (dt == xdr_datatype_float)
379 if (dtc == xdr_datatype_float)
387 res = xdr_vector(xd,(char *)vf,nf,
388 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
393 if (dtc != xdr_datatype_float)
404 if (dtc == xdr_datatype_double)
412 res = xdr_vector(xd,(char *)vd,nf,
413 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
418 if (dtc != xdr_datatype_double)
433 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
436 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
439 gmx_incons("Unknown checkpoint real type");
451 /* This function stores n along with the reals for reading,
452 * but on reading it assumes that n matches the value in the checkpoint file,
453 * a fatal error is generated when this is not the case.
455 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
456 int n,real **v,FILE *list)
458 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
461 /* This function does the same as do_cpte_reals,
462 * except that on reading it ignores the passed value of *n
463 * and stored the value read from the checkpoint file in *n.
465 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
466 int *n,real **v,FILE *list)
468 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
471 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
476 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
479 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
480 int n,int **v,FILE *list)
483 int dtc=xdr_datatype_int;
488 res = xdr_int(xd,&nf);
493 if (list == NULL && v != NULL && nf != n)
495 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
498 res = xdr_int(xd,&dt);
505 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
506 st_names(cptp,ecpt),xdr_datatype_names[dtc],
507 xdr_datatype_names[dt]);
509 if (list || !(sflags & (1<<ecpt)) || v == NULL)
522 res = xdr_vector(xd,(char *)vp,nf,
523 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
530 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
540 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
543 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
546 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
547 int n,double **v,FILE *list)
550 int dtc=xdr_datatype_double;
555 res = xdr_int(xd,&nf);
560 if (list == NULL && nf != n)
562 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
565 res = xdr_int(xd,&dt);
572 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
573 st_names(cptp,ecpt),xdr_datatype_names[dtc],
574 xdr_datatype_names[dt]);
576 if (list || !(sflags & (1<<ecpt)))
589 res = xdr_vector(xd,(char *)vp,nf,
590 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
597 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
607 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
608 double *r,FILE *list)
610 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
614 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
615 int n,rvec **v,FILE *list)
619 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
620 n*DIM,NULL,(real **)v,list,ecprRVEC);
623 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
629 vr = (real *)&(v[0][0]);
630 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
631 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
633 if (list && ret == 0)
635 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
642 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
643 int n, real **v,FILE *list)
648 char name[CPTSTRLEN];
659 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
660 if (list && reti == 0)
662 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
663 pr_reals(list,0,name,v[i],n);
673 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
674 int n,matrix **v,FILE *list)
683 res = xdr_int(xd,&nf);
688 if (list == NULL && nf != n)
690 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
692 if (list || !(sflags & (1<<ecpt)))
712 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
716 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
717 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
724 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
730 if (list && ret == 0)
734 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
745 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
746 char **version,char **btime,char **buser,char **bhost,
748 char **fprog,char **ftime,
749 int *eIntegrator,int *simulation_part,
750 gmx_large_int_t *step,double *t,
751 int *nnodes,int *dd_nc,int *npme,
752 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
753 int *nlambda, int *flags_state,
754 int *flags_eks,int *flags_enh, int *flags_dfh,
771 res = xdr_int(xd,&magic);
774 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
776 if (magic != CPT_MAGIC1)
778 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
779 "The checkpoint file is corrupted or not a checkpoint file",
786 if (gethostname(fhost,255) != 0)
788 sprintf(fhost,"unknown");
791 sprintf(fhost,"unknown");
794 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
795 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
796 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
797 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
798 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
799 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
800 *file_version = cpt_version;
801 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
802 if (*file_version > cpt_version)
804 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
806 if (*file_version >= 13)
808 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
814 if (*file_version >= 12)
816 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
822 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
823 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
824 if (*file_version >= 10)
826 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
832 if (*file_version >= 11)
834 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
840 if (*file_version >= 14)
842 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
848 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
849 if (*file_version >= 3)
851 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
855 *simulation_part = 1;
857 if (*file_version >= 5)
859 do_cpt_step_err(xd,"step" ,step ,list);
863 do_cpt_int_err(xd,"step" ,&idum ,list);
866 do_cpt_double_err(xd,"t" ,t ,list);
867 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
869 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
870 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
871 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
872 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
873 do_cpt_int_err(xd,"state flags",flags_state,list);
874 if (*file_version >= 4)
876 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
877 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
882 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
883 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
884 (1<<(estORIRE_DTAV+2)) |
885 (1<<(estORIRE_DTAV+3))));
887 if (*file_version >= 14)
889 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
895 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
900 if (file_version >= 2)
903 res = xdr_int(xd,&magic);
908 if (magic != CPT_MAGIC2)
917 static int do_cpt_state(XDR *xd,gmx_bool bRead,
918 int fflags,t_state *state,
919 gmx_bool bReadRNG,FILE *list)
922 int **rng_p,**rngi_p;
929 nnht = state->nhchainlength*state->ngtc;
930 nnhtp = state->nhchainlength*state->nnhpres;
934 rng_p = (int **)&state->ld_rng;
935 rngi_p = &state->ld_rngi;
939 /* Do not read the RNG data */
943 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
945 sflags = state->flags;
946 for(i=0; (i<estNR && ret == 0); i++)
952 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
953 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
954 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
955 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
956 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
957 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
958 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
959 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
960 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
961 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
962 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
963 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
964 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
965 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
966 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
967 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
968 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
969 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
970 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
971 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
972 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
973 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
974 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
975 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
976 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
977 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
979 gmx_fatal(FARGS,"Unknown state entry %d\n"
980 "You are probably reading a new checkpoint file with old code",i);
988 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
989 int fflags,ekinstate_t *ekins,
997 for(i=0; (i<eeksNR && ret == 0); i++)
1004 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1005 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1006 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1007 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1008 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1009 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1010 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1011 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1012 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1013 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1015 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1016 "You are probably reading a new checkpoint file with old code",i);
1025 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1026 int fflags,energyhistory_t *enerhist,
1037 enerhist->nsteps = 0;
1039 enerhist->nsteps_sim = 0;
1040 enerhist->nsum_sim = 0;
1041 enerhist->dht = NULL;
1043 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1045 snew(enerhist->dht,1);
1046 enerhist->dht->ndh = NULL;
1047 enerhist->dht->dh = NULL;
1048 enerhist->dht->start_lambda_set=FALSE;
1052 for(i=0; (i<eenhNR && ret == 0); i++)
1054 if (fflags & (1<<i))
1058 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1059 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1060 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1061 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1062 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1063 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1064 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1065 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1066 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1067 if (bRead) /* now allocate memory for it */
1069 snew(enerhist->dht->dh, enerhist->dht->nndh);
1070 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1071 for(j=0;j<enerhist->dht->nndh;j++)
1073 enerhist->dht->ndh[j] = 0;
1074 enerhist->dht->dh[j] = NULL;
1078 case eenhENERGY_DELTA_H_LIST:
1079 for(j=0;j<enerhist->dht->nndh;j++)
1081 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1084 case eenhENERGY_DELTA_H_STARTTIME:
1085 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1086 case eenhENERGY_DELTA_H_STARTLAMBDA:
1087 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1089 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1090 "You are probably reading a new checkpoint file with old code",i);
1095 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1097 /* Assume we have an old file format and copy sum to sum_sim */
1098 srenew(enerhist->ener_sum_sim,enerhist->nener);
1099 for(i=0; i<enerhist->nener; i++)
1101 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1103 fflags |= (1<<eenhENERGY_SUM_SIM);
1106 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1107 !(fflags & (1<<eenhENERGY_NSTEPS)))
1109 /* Assume we have an old file format and copy nsum to nsteps */
1110 enerhist->nsteps = enerhist->nsum;
1111 fflags |= (1<<eenhENERGY_NSTEPS);
1113 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1114 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1116 /* Assume we have an old file format and copy nsum to nsteps */
1117 enerhist->nsteps_sim = enerhist->nsum_sim;
1118 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1124 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1129 nlambda = dfhist->nlambda;
1132 for(i=0; (i<edfhNR && ret == 0); i++)
1134 if (fflags & (1<<i))
1138 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1139 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1140 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1141 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1142 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1143 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1144 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1145 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1146 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1147 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1148 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1149 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1150 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1151 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1154 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1155 "You are probably reading a new checkpoint file with old code",i);
1163 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1164 gmx_file_position_t **p_outputfiles, int *nfiles,
1165 FILE *list, int file_version)
1169 gmx_off_t mask = 0xFFFFFFFFL;
1170 int offset_high,offset_low;
1172 gmx_file_position_t *outputfiles;
1174 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1181 snew(*p_outputfiles,*nfiles);
1184 outputfiles = *p_outputfiles;
1186 for(i=0;i<*nfiles;i++)
1188 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1191 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1192 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1198 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1202 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1206 #if (SIZEOF_GMX_OFF_T > 4)
1207 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1209 outputfiles[i].offset = offset_low;
1214 buf = outputfiles[i].filename;
1215 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1217 offset = outputfiles[i].offset;
1225 #if (SIZEOF_GMX_OFF_T > 4)
1226 offset_low = (int) (offset & mask);
1227 offset_high = (int) ((offset >> 32) & mask);
1229 offset_low = offset;
1233 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1237 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1242 if (file_version >= 8)
1244 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1249 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1256 outputfiles[i].chksum_size = -1;
1263 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1264 FILE *fplog,t_commrec *cr,
1265 int eIntegrator,int simulation_part,
1266 gmx_bool bExpanded, int elamstats,
1267 gmx_large_int_t step,double t,t_state *state)
1277 char *fntemp; /* the temporary checkpoint file name */
1279 char timebuf[STRLEN];
1280 int nppnodes,npmenodes,flag_64bit;
1281 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1282 gmx_file_position_t *outputfiles;
1285 int flags_eks,flags_enh,flags_dfh,i;
1290 if (DOMAINDECOMP(cr))
1292 nppnodes = cr->dd->nnodes;
1293 npmenodes = cr->npmenodes;
1297 nppnodes = cr->nnodes;
1307 /* make the new temporary filename */
1308 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1310 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1311 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1312 strcat(fntemp,suffix);
1313 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1316 gmx_ctime_r(&now,timebuf,STRLEN);
1320 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1321 gmx_step_str(step,buf),timebuf);
1324 /* Get offsets for open files */
1325 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1327 fp = gmx_fio_open(fntemp,"w");
1329 if (state->ekinstate.bUpToDate)
1332 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1333 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1334 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1342 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1344 flags_enh |= (1<<eenhENERGY_N);
1345 if (state->enerhist.nsum > 0)
1347 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1348 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1350 if (state->enerhist.nsum_sim > 0)
1352 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1353 (1<<eenhENERGY_NSUM_SIM));
1355 if (state->enerhist.dht)
1357 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1358 (1<< eenhENERGY_DELTA_H_LIST) |
1359 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1360 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1366 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1367 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1370 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1372 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1374 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1375 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1381 /* We can check many more things now (CPU, acceleration, etc), but
1382 * it is highly unlikely to have two separate builds with exactly
1383 * the same version, user, time, and build host!
1386 version = gmx_strdup(VERSION);
1387 btime = gmx_strdup(BUILD_TIME);
1388 buser = gmx_strdup(BUILD_USER);
1389 bhost = gmx_strdup(BUILD_HOST);
1391 double_prec = GMX_CPT_BUILD_DP;
1392 fprog = gmx_strdup(Program());
1394 ftime = &(timebuf[0]);
1396 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1397 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1398 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1399 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1400 &state->natoms,&state->ngtc,&state->nnhpres,
1401 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1410 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1411 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1412 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1413 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1414 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1417 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1420 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1422 /* we really, REALLY, want to make sure to physically write the checkpoint,
1423 and all the files it depends on, out to disk. Because we've
1424 opened the checkpoint with gmx_fio_open(), it's in our list
1426 ret=gmx_fio_all_output_fsync();
1432 "Cannot fsync '%s'; maybe you are out of disk space?",
1433 gmx_fio_getname(ret));
1435 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1445 if( gmx_fio_close(fp) != 0)
1447 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1450 /* we don't move the checkpoint if the user specified they didn't want it,
1451 or if the fsyncs failed */
1452 if (!bNumberAndKeep && !ret)
1456 /* Rename the previous checkpoint file */
1458 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1459 strcat(buf,"_prev");
1460 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1462 /* we copy here so that if something goes wrong between now and
1463 * the rename below, there's always a state.cpt.
1464 * If renames are atomic (such as in POSIX systems),
1465 * this copying should be unneccesary.
1467 gmx_file_copy(fn, buf, FALSE);
1468 /* We don't really care if this fails:
1469 * there's already a new checkpoint.
1472 gmx_file_rename(fn, buf);
1475 if (gmx_file_rename(fntemp, fn) != 0)
1477 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1485 /*code for alternate checkpointing scheme. moved from top of loop over
1487 fcRequestCheckPoint();
1488 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1489 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1491 #endif /* end GMX_FAHCORE block */
1494 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1498 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1499 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1500 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1501 for(i=0; i<estNR; i++)
1503 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1505 fprintf(fplog," %24s %11s %11s\n",
1507 (sflags & (1<<i)) ? " present " : "not present",
1508 (fflags & (1<<i)) ? " present " : "not present");
1513 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1515 FILE *fp = fplog ? fplog : stderr;
1519 fprintf(fp," %s mismatch,\n",type);
1520 fprintf(fp," current program: %d\n",p);
1521 fprintf(fp," checkpoint file: %d\n",f);
1527 static void check_string(FILE *fplog,const char *type,const char *p,
1528 const char *f,gmx_bool *mm)
1530 FILE *fp = fplog ? fplog : stderr;
1532 if (strcmp(p,f) != 0)
1534 fprintf(fp," %s mismatch,\n",type);
1535 fprintf(fp," current program: %s\n",p);
1536 fprintf(fp," checkpoint file: %s\n",f);
1542 static void check_match(FILE *fplog,
1544 char *btime,char *buser,char *bhost,int double_prec,
1546 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1547 ivec dd_nc,ivec dd_nc_f)
1554 check_string(fplog,"Version" ,VERSION ,version,&mm);
1555 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1556 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1557 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1558 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1559 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1561 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1570 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1573 if (cr->npmenodes >= 0)
1575 npp -= cr->npmenodes;
1579 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1580 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1581 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1588 "Gromacs binary or parallel settings not identical to previous run.\n"
1589 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1590 fplog ? ",\n see the log file for details" : "");
1595 "Gromacs binary or parallel settings not identical to previous run.\n"
1596 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1601 static void read_checkpoint(const char *fn,FILE **pfplog,
1602 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1603 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1604 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1605 int *simulation_part,
1606 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1611 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1613 char filename[STRLEN],buf[STEPSTRSIZE];
1614 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1616 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1619 gmx_file_position_t *outputfiles;
1621 t_fileio *chksum_file;
1622 FILE* fplog = *pfplog;
1623 unsigned char digest[16];
1624 #ifndef GMX_NATIVE_WINDOWS
1625 struct flock fl; /* don't initialize here: the struct order is OS
1629 const char *int_warn=
1630 "WARNING: The checkpoint file was generated with integrator %s,\n"
1631 " while the simulation uses integrator %s\n\n";
1632 const char *sd_note=
1633 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1634 " while the simulation uses %d SD or BD nodes,\n"
1635 " continuation will be exact, except for the random state\n\n";
1637 #ifndef GMX_NATIVE_WINDOWS
1639 fl.l_whence=SEEK_SET;
1648 "read_checkpoint not (yet) supported with particle decomposition");
1651 fp = gmx_fio_open(fn,"r");
1652 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1653 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1654 &eIntegrator_f,simulation_part,step,t,
1655 &nppnodes_f,dd_nc_f,&npmenodes_f,
1656 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1657 &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
1659 if (bAppendOutputFiles &&
1660 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1662 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1665 if (cr == NULL || MASTER(cr))
1667 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1671 /* This will not be written if we do appending, since fplog is still NULL then */
1674 fprintf(fplog,"\n");
1675 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1676 fprintf(fplog," file generated by: %s\n",fprog);
1677 fprintf(fplog," file generated at: %s\n",ftime);
1678 fprintf(fplog," GROMACS build time: %s\n",btime);
1679 fprintf(fplog," GROMACS build user: %s\n",buser);
1680 fprintf(fplog," GROMACS build host: %s\n",bhost);
1681 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1682 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1683 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1684 fprintf(fplog," time: %f\n",*t);
1685 fprintf(fplog,"\n");
1688 if (natoms != state->natoms)
1690 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1692 if (ngtc != state->ngtc)
1694 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1696 if (nnhpres != state->nnhpres)
1698 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1701 if (nlambda != state->dfhist.nlambda)
1703 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1706 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1707 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1709 if (eIntegrator_f != eIntegrator)
1713 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1715 if(bAppendOutputFiles)
1718 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1719 "Stopping the run to prevent you from ruining all your data...\n"
1720 "If you _really_ know what you are doing, try with the -noappend option.\n");
1724 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1733 else if (bPartDecomp)
1735 nppnodes = cr->nnodes;
1738 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1740 if (cr->npmenodes < 0)
1742 cr->npmenodes = npmenodes_f;
1744 nppnodes = cr->nnodes - cr->npmenodes;
1745 if (nppnodes == nppnodes_f)
1747 for(d=0; d<DIM; d++)
1751 dd_nc[d] = dd_nc_f[d];
1758 /* The number of PP nodes has not been set yet */
1762 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1764 /* Correct the RNG state size for the number of PP nodes.
1765 * Such assignments should all be moved to one central function.
1767 state->nrng = nppnodes*gmx_rng_n();
1768 state->nrngi = nppnodes;
1772 if (fflags != state->flags)
1777 if(bAppendOutputFiles)
1780 "Output file appending requested, but input and checkpoint states are not identical.\n"
1781 "Stopping the run to prevent you from ruining all your data...\n"
1782 "You can try with the -noappend option, and get more info in the log file.\n");
1785 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1787 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1792 "WARNING: The checkpoint state entries do not match the simulation,\n"
1793 " see the log file for details\n\n");
1799 print_flag_mismatch(fplog,state->flags,fflags);
1804 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1805 nppnodes != nppnodes_f)
1810 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1814 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1819 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1820 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1823 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1824 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1825 Investigate for 5.0. */
1830 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1831 flags_eks,&state->ekinstate,NULL);
1836 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1837 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1839 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1840 flags_enh,&state->enerhist,NULL);
1846 if (file_version < 6)
1848 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1850 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1853 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1855 state->enerhist.nsum = *step;
1856 state->enerhist.nsum_sim = *step;
1859 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1860 flags_dfh,&state->dfhist,NULL);
1866 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1872 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1877 if( gmx_fio_close(fp) != 0)
1879 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1888 /* If the user wants to append to output files,
1889 * we use the file pointer positions of the output files stored
1890 * in the checkpoint file and truncate the files such that any frames
1891 * written after the checkpoint time are removed.
1892 * All files are md5sum checked such that we can be sure that
1893 * we do not truncate other (maybe imprortant) files.
1895 if (bAppendOutputFiles)
1897 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1899 /* make sure first file is log file so that it is OK to use it for
1902 gmx_fatal(FARGS,"The first output file should always be the log "
1903 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1905 for(i=0;i<nfiles;i++)
1907 if (outputfiles[i].offset < 0)
1909 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1910 "is larger than 2 GB, but mdrun did not support large file"
1911 " offsets. Can not append. Run mdrun with -noappend",
1912 outputfiles[i].filename);
1915 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1918 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1923 /* Note that there are systems where the lock operation
1924 * will succeed, but a second process can also lock the file.
1925 * We should probably try to detect this.
1927 #ifndef GMX_NATIVE_WINDOWS
1928 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1931 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1934 if (errno == ENOSYS)
1938 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1942 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1945 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1949 else if (errno == EACCES || errno == EAGAIN)
1951 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1952 "simulation?", outputfiles[i].filename);
1956 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1957 outputfiles[i].filename, strerror(errno));
1962 /* compute md5 chksum */
1963 if (outputfiles[i].chksum_size != -1)
1965 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1966 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1968 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1969 outputfiles[i].chksum_size,
1970 outputfiles[i].filename);
1973 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1975 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1977 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
1982 if (i==0) /*open log file here - so that lock is never lifted
1983 after chksum is calculated */
1985 *pfplog = gmx_fio_getfp(chksum_file);
1989 gmx_fio_close(chksum_file);
1992 /* compare md5 chksum */
1993 if (outputfiles[i].chksum_size != -1 &&
1994 memcmp(digest,outputfiles[i].chksum,16)!=0)
1998 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
1999 for (j=0; j<16; j++)
2001 fprintf(debug,"%02x",digest[j]);
2003 fprintf(debug,"\n");
2005 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2006 outputfiles[i].filename);
2011 if (i!=0) /*log file is already seeked to correct position */
2013 #ifdef GMX_NATIVE_WINDOWS
2014 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2016 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2020 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2030 void load_checkpoint(const char *fn,FILE **fplog,
2031 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2032 t_inputrec *ir,t_state *state,
2033 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2034 gmx_bool bAppend,gmx_bool bForceAppend)
2036 gmx_large_int_t step;
2039 if (SIMMASTER(cr)) {
2040 /* Read the state from the checkpoint file */
2041 read_checkpoint(fn,fplog,
2042 cr,bPartDecomp,dd_nc,
2043 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2044 &ir->simulation_part,bAppend,bForceAppend);
2047 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2048 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2049 gmx_bcast(sizeof(step),&step,cr);
2050 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2051 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2053 ir->bContinuation = TRUE;
2054 if (ir->nsteps >= 0)
2056 ir->nsteps += ir->init_step - step;
2058 ir->init_step = step;
2059 ir->simulation_part += 1;
2062 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2063 gmx_large_int_t *step,double *t,t_state *state,
2065 int *nfiles,gmx_file_position_t **outputfiles)
2068 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2073 int flags_eks,flags_enh,flags_dfh;
2075 gmx_file_position_t *files_loc=NULL;
2078 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2079 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2080 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2081 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2082 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
2084 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2089 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2090 flags_eks,&state->ekinstate,NULL);
2095 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2096 flags_enh,&state->enerhist,NULL);
2101 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2102 flags_dfh,&state->dfhist,NULL);
2108 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2109 outputfiles != NULL ? outputfiles : &files_loc,
2110 outputfiles != NULL ? nfiles : &nfiles_loc,
2112 if (files_loc != NULL)
2122 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2136 read_checkpoint_state(const char *fn,int *simulation_part,
2137 gmx_large_int_t *step,double *t,t_state *state)
2141 fp = gmx_fio_open(fn,"r");
2142 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2143 if( gmx_fio_close(fp) != 0)
2145 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2149 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2152 int simulation_part;
2153 gmx_large_int_t step;
2156 init_state(&state,0,0,0,0,0);
2158 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2160 fr->natoms = state.natoms;
2163 fr->step = gmx_large_int_to_int(step,
2164 "conversion of checkpoint to trajectory");
2168 fr->lambda = state.lambda[efptFEP];
2169 fr->fep_state = state.fep_state;
2171 fr->bX = (state.flags & (1<<estX));
2177 fr->bV = (state.flags & (1<<estV));
2184 fr->bBox = (state.flags & (1<<estBOX));
2187 copy_mat(state.box,fr->box);
2192 void list_checkpoint(const char *fn,FILE *out)
2196 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2198 int eIntegrator,simulation_part,nppnodes,npme;
2199 gmx_large_int_t step;
2203 int flags_eks,flags_enh,flags_dfh;
2207 gmx_file_position_t *outputfiles;
2210 init_state(&state,-1,-1,-1,-1,0);
2212 fp = gmx_fio_open(fn,"r");
2213 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2214 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2215 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2216 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2217 &(state.dfhist.nlambda),&state.flags,
2218 &flags_eks,&flags_enh,&flags_dfh,out);
2219 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2224 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2225 flags_eks,&state.ekinstate,out);
2230 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2231 flags_enh,&state.enerhist,out);
2235 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2236 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2237 flags_dfh,&state.dfhist,out);
2241 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2246 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2253 if( gmx_fio_close(fp) != 0)
2255 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2262 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2266 /* Check if the output file name stored in the checkpoint file
2267 * is one of the output file names of mdrun.
2271 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2276 return (i < nfile && gmx_fexist(fnm_cp));
2279 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2280 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2281 gmx_large_int_t *cpt_step,t_commrec *cr,
2282 gmx_bool bAppendReq,
2283 int nfile,const t_filenm fnm[],
2284 const char *part_suffix,gmx_bool *bAddPart)
2287 gmx_large_int_t step=0;
2291 gmx_file_position_t *outputfiles;
2294 char *fn,suf_up[STRLEN];
2298 if (SIMMASTER(cr)) {
2299 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2301 *simulation_part = 0;
2305 init_state(&state,0,0,0,0,0);
2307 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2308 &nfiles,&outputfiles);
2309 if( gmx_fio_close(fp) != 0)
2311 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2318 for(f=0; f<nfiles; f++)
2320 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2325 if (nexist == nfiles)
2327 bAppend = bAppendReq;
2329 else if (nexist > 0)
2332 "Output file appending has been requested,\n"
2333 "but some output files listed in the checkpoint file %s\n"
2334 "are not present or are named differently by the current program:\n",
2336 fprintf(stderr,"output files present:");
2337 for(f=0; f<nfiles; f++)
2339 if (exist_output_file(outputfiles[f].filename,
2342 fprintf(stderr," %s",outputfiles[f].filename);
2345 fprintf(stderr,"\n");
2346 fprintf(stderr,"output files not present or named differently:");
2347 for(f=0; f<nfiles; f++)
2349 if (!exist_output_file(outputfiles[f].filename,
2352 fprintf(stderr," %s",outputfiles[f].filename);
2355 fprintf(stderr,"\n");
2357 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2365 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2367 fn = outputfiles[0].filename;
2368 if (strlen(fn) < 4 ||
2369 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2371 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2373 /* Set bAddPart to whether the suffix string '.part' is present
2374 * in the log file name.
2376 strcpy(suf_up,part_suffix);
2378 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2379 strstr(fn,suf_up) != NULL);
2387 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2389 if (*simulation_part > 0 && bAppendReq)
2391 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2392 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2395 if (NULL != cpt_step)