2 * This file is part of the GROMACS molecular simulation package.
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
6 * Copyright (c) 2012, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
45 #include "gmx_header_config.h"
50 #ifdef HAVE_SYS_TIME_H
55 #ifdef GMX_NATIVE_WINDOWS
58 #include <sys/locking.h>
72 #include "gmx_random.h"
73 #include "checkpoint.h"
78 #include "buildinfo.h"
85 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
87 gmx_ctime_r(const time_t *clock,char *buf, int n);
90 #define CPT_MAGIC1 171817
91 #define CPT_MAGIC2 171819
92 #define CPTSTRLEN 1024
95 #define GMX_CPT_BUILD_DP 1
97 #define GMX_CPT_BUILD_DP 0
100 /* cpt_version should normally only be changed
101 * when the header of footer format changes.
102 * The state data format itself is backward and forward compatible.
103 * But old code can not read a new entry that is present in the file
104 * (but can read a new format when new entries are not present).
106 static const int cpt_version = 14;
109 const char *est_names[estNR]=
112 "box", "box-rel", "box-v", "pres_prev",
113 "nosehoover-xi", "thermostat-integral",
114 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
115 "disre_initf", "disre_rm3tav",
116 "orire_initf", "orire_Dtav",
117 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
120 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
122 const char *eeks_names[eeksNR]=
124 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
125 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
128 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
129 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
130 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
131 eenhENERGY_DELTA_H_NN,
132 eenhENERGY_DELTA_H_LIST,
133 eenhENERGY_DELTA_H_STARTTIME,
134 eenhENERGY_DELTA_H_STARTLAMBDA,
137 const char *eenh_names[eenhNR]=
139 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
140 "energy_sum_sim", "energy_nsum_sim",
141 "energy_nsteps", "energy_nsteps_sim",
143 "energy_delta_h_list",
144 "energy_delta_h_start_time",
145 "energy_delta_h_start_lambda"
148 /* free energy history variables -- need to be preserved over checkpoint */
149 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
150 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
151 /* free energy history variable names */
152 const char *edfh_names[edfhNR]=
154 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
155 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
158 #ifdef GMX_NATIVE_WINDOWS
160 gmx_wintruncate(const char *filename, __int64 size)
163 /*we do this elsewhere*/
169 fp=fopen(filename,"rb+");
176 return _chsize_s( fileno(fp), size);
182 enum { ecprREAL, ecprRVEC, ecprMATRIX };
184 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
185 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
186 cptpEST - state variables.
187 cptpEEKS - Kinetic energy state variables.
188 cptpEENH - Energy history state variables.
189 cptpEDFH - free energy history variables.
193 static const char *st_names(int cptp,int ecpt)
197 case cptpEST: return est_names [ecpt]; break;
198 case cptpEEKS: return eeks_names[ecpt]; break;
199 case cptpEENH: return eenh_names[ecpt]; break;
200 case cptpEDFH: return edfh_names[ecpt]; break;
206 static void cp_warning(FILE *fp)
208 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
211 static void cp_error()
213 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
216 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
224 res = xdr_string(xd,s,CPTSTRLEN);
231 fprintf(list,"%s = %s\n",desc,*s);
236 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
247 fprintf(list,"%s = %d\n",desc,*i);
252 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
258 fprintf(list,"%s = ",desc);
260 for (j=0; j<n && res; j++)
262 res &= xdr_u_char(xd,&i[j]);
265 fprintf(list,"%02x",i[j]);
280 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
282 if (do_cpt_int(xd,desc,i,list) < 0)
288 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
291 char buf[STEPSTRSIZE];
293 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
300 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
304 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
308 res = xdr_double(xd,f);
315 fprintf(list,"%s = %f\n",desc,*f);
319 /* If nval >= 0, nval is used; on read this should match the passed value.
320 * If nval n<0, *nptr is used; on read the value is stored in nptr
322 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
323 int nval,int *nptr,real **v,
324 FILE *list,int erealtype)
328 int dtc=xdr_datatype_float;
330 int dtc=xdr_datatype_double;
347 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
352 res = xdr_int(xd,&nf);
363 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
372 res = xdr_int(xd,&dt);
379 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
380 st_names(cptp,ecpt),xdr_datatype_names[dtc],
381 xdr_datatype_names[dt]);
383 if (list || !(sflags & (1<<ecpt)))
396 if (dt == xdr_datatype_float)
398 if (dtc == xdr_datatype_float)
406 res = xdr_vector(xd,(char *)vf,nf,
407 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
412 if (dtc != xdr_datatype_float)
423 if (dtc == xdr_datatype_double)
431 res = xdr_vector(xd,(char *)vd,nf,
432 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
437 if (dtc != xdr_datatype_double)
452 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
455 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
458 gmx_incons("Unknown checkpoint real type");
470 /* This function stores n along with the reals for reading,
471 * but on reading it assumes that n matches the value in the checkpoint file,
472 * a fatal error is generated when this is not the case.
474 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
475 int n,real **v,FILE *list)
477 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
480 /* This function does the same as do_cpte_reals,
481 * except that on reading it ignores the passed value of *n
482 * and stored the value read from the checkpoint file in *n.
484 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
485 int *n,real **v,FILE *list)
487 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
490 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
495 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
498 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
499 int n,int **v,FILE *list)
502 int dtc=xdr_datatype_int;
507 res = xdr_int(xd,&nf);
512 if (list == NULL && v != NULL && nf != n)
514 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
517 res = xdr_int(xd,&dt);
524 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
525 st_names(cptp,ecpt),xdr_datatype_names[dtc],
526 xdr_datatype_names[dt]);
528 if (list || !(sflags & (1<<ecpt)) || v == NULL)
541 res = xdr_vector(xd,(char *)vp,nf,
542 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
549 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
559 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
562 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
565 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
566 int n,double **v,FILE *list)
569 int dtc=xdr_datatype_double;
574 res = xdr_int(xd,&nf);
579 if (list == NULL && nf != n)
581 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
584 res = xdr_int(xd,&dt);
591 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
592 st_names(cptp,ecpt),xdr_datatype_names[dtc],
593 xdr_datatype_names[dt]);
595 if (list || !(sflags & (1<<ecpt)))
608 res = xdr_vector(xd,(char *)vp,nf,
609 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
616 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
626 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
627 double *r,FILE *list)
629 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
633 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
634 int n,rvec **v,FILE *list)
638 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
639 n*DIM,NULL,(real **)v,list,ecprRVEC);
642 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
648 vr = (real *)&(v[0][0]);
649 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
650 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
652 if (list && ret == 0)
654 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
661 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
662 int n, real **v,FILE *list)
667 char name[CPTSTRLEN];
678 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
679 if (list && reti == 0)
681 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
682 pr_reals(list,0,name,v[i],n);
692 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
693 int n,matrix **v,FILE *list)
702 res = xdr_int(xd,&nf);
707 if (list == NULL && nf != n)
709 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
711 if (list || !(sflags & (1<<ecpt)))
731 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
735 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
736 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
743 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
749 if (list && ret == 0)
753 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
764 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
765 char **version,char **btime,char **buser,char **bhost,
767 char **fprog,char **ftime,
768 int *eIntegrator,int *simulation_part,
769 gmx_large_int_t *step,double *t,
770 int *nnodes,int *dd_nc,int *npme,
771 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
772 int *nlambda, int *flags_state,
773 int *flags_eks,int *flags_enh, int *flags_dfh,
790 res = xdr_int(xd,&magic);
793 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
795 if (magic != CPT_MAGIC1)
797 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
798 "The checkpoint file is corrupted or not a checkpoint file",
805 if (gethostname(fhost,255) != 0)
807 sprintf(fhost,"unknown");
810 sprintf(fhost,"unknown");
813 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
814 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
815 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
816 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
817 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
818 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
819 *file_version = cpt_version;
820 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
821 if (*file_version > cpt_version)
823 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
825 if (*file_version >= 13)
827 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
833 if (*file_version >= 12)
835 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
841 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
842 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
843 if (*file_version >= 10)
845 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
851 if (*file_version >= 11)
853 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
859 if (*file_version >= 14)
861 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
867 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
868 if (*file_version >= 3)
870 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
874 *simulation_part = 1;
876 if (*file_version >= 5)
878 do_cpt_step_err(xd,"step" ,step ,list);
882 do_cpt_int_err(xd,"step" ,&idum ,list);
885 do_cpt_double_err(xd,"t" ,t ,list);
886 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
888 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
889 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
890 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
891 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
892 do_cpt_int_err(xd,"state flags",flags_state,list);
893 if (*file_version >= 4)
895 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
896 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
901 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
902 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
903 (1<<(estORIRE_DTAV+2)) |
904 (1<<(estORIRE_DTAV+3))));
906 if (*file_version >= 14)
908 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
914 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
919 if (file_version >= 2)
922 res = xdr_int(xd,&magic);
927 if (magic != CPT_MAGIC2)
936 static int do_cpt_state(XDR *xd,gmx_bool bRead,
937 int fflags,t_state *state,
938 gmx_bool bReadRNG,FILE *list)
941 int **rng_p,**rngi_p;
948 nnht = state->nhchainlength*state->ngtc;
949 nnhtp = state->nhchainlength*state->nnhpres;
953 rng_p = (int **)&state->ld_rng;
954 rngi_p = &state->ld_rngi;
958 /* Do not read the RNG data */
962 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
964 sflags = state->flags;
965 for(i=0; (i<estNR && ret == 0); i++)
971 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
972 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
973 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
974 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
975 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
976 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
977 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
978 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
979 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
980 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
981 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
982 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
983 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
984 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
985 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
986 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
987 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
988 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
989 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
990 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
991 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
992 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
993 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
994 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
995 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
996 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
998 gmx_fatal(FARGS,"Unknown state entry %d\n"
999 "You are probably reading a new checkpoint file with old code",i);
1007 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
1008 int fflags,ekinstate_t *ekins,
1016 for(i=0; (i<eeksNR && ret == 0); i++)
1018 if (fflags & (1<<i))
1023 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1024 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1025 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1026 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1027 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1028 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1029 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1030 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1031 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1032 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1034 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1035 "You are probably reading a new checkpoint file with old code",i);
1044 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1045 int fflags,energyhistory_t *enerhist,
1056 enerhist->nsteps = 0;
1058 enerhist->nsteps_sim = 0;
1059 enerhist->nsum_sim = 0;
1060 enerhist->dht = NULL;
1062 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1064 snew(enerhist->dht,1);
1065 enerhist->dht->ndh = NULL;
1066 enerhist->dht->dh = NULL;
1067 enerhist->dht->start_lambda_set=FALSE;
1071 for(i=0; (i<eenhNR && ret == 0); i++)
1073 if (fflags & (1<<i))
1077 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1078 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1079 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1080 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1081 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1082 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1083 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1084 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1085 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1086 if (bRead) /* now allocate memory for it */
1088 snew(enerhist->dht->dh, enerhist->dht->nndh);
1089 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1090 for(j=0;j<enerhist->dht->nndh;j++)
1092 enerhist->dht->ndh[j] = 0;
1093 enerhist->dht->dh[j] = NULL;
1097 case eenhENERGY_DELTA_H_LIST:
1098 for(j=0;j<enerhist->dht->nndh;j++)
1100 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1103 case eenhENERGY_DELTA_H_STARTTIME:
1104 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1105 case eenhENERGY_DELTA_H_STARTLAMBDA:
1106 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1108 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1109 "You are probably reading a new checkpoint file with old code",i);
1114 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1116 /* Assume we have an old file format and copy sum to sum_sim */
1117 srenew(enerhist->ener_sum_sim,enerhist->nener);
1118 for(i=0; i<enerhist->nener; i++)
1120 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1122 fflags |= (1<<eenhENERGY_SUM_SIM);
1125 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1126 !(fflags & (1<<eenhENERGY_NSTEPS)))
1128 /* Assume we have an old file format and copy nsum to nsteps */
1129 enerhist->nsteps = enerhist->nsum;
1130 fflags |= (1<<eenhENERGY_NSTEPS);
1132 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1133 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1135 /* Assume we have an old file format and copy nsum to nsteps */
1136 enerhist->nsteps_sim = enerhist->nsum_sim;
1137 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1143 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1148 nlambda = dfhist->nlambda;
1151 for(i=0; (i<edfhNR && ret == 0); i++)
1153 if (fflags & (1<<i))
1157 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1158 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1159 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1160 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1161 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1162 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1163 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1164 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1165 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1166 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1167 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1168 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1169 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1170 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1173 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1174 "You are probably reading a new checkpoint file with old code",i);
1182 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1183 gmx_file_position_t **p_outputfiles, int *nfiles,
1184 FILE *list, int file_version)
1188 gmx_off_t mask = 0xFFFFFFFFL;
1189 int offset_high,offset_low;
1191 gmx_file_position_t *outputfiles;
1193 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1200 snew(*p_outputfiles,*nfiles);
1203 outputfiles = *p_outputfiles;
1205 for(i=0;i<*nfiles;i++)
1207 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1210 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1211 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1217 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1221 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1225 #if (SIZEOF_GMX_OFF_T > 4)
1226 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1228 outputfiles[i].offset = offset_low;
1233 buf = outputfiles[i].filename;
1234 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1236 offset = outputfiles[i].offset;
1244 #if (SIZEOF_GMX_OFF_T > 4)
1245 offset_low = (int) (offset & mask);
1246 offset_high = (int) ((offset >> 32) & mask);
1248 offset_low = offset;
1252 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1256 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1261 if (file_version >= 8)
1263 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1268 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1275 outputfiles[i].chksum_size = -1;
1282 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1283 FILE *fplog,t_commrec *cr,
1284 int eIntegrator,int simulation_part,
1285 gmx_bool bExpanded, int elamstats,
1286 gmx_large_int_t step,double t,t_state *state)
1296 char *fntemp; /* the temporary checkpoint file name */
1298 char timebuf[STRLEN];
1299 int nppnodes,npmenodes,flag_64bit;
1300 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1301 gmx_file_position_t *outputfiles;
1304 int flags_eks,flags_enh,flags_dfh,i;
1309 if (DOMAINDECOMP(cr))
1311 nppnodes = cr->dd->nnodes;
1312 npmenodes = cr->npmenodes;
1316 nppnodes = cr->nnodes;
1326 /* make the new temporary filename */
1327 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1329 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1330 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1331 strcat(fntemp,suffix);
1332 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1335 gmx_ctime_r(&now,timebuf,STRLEN);
1339 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1340 gmx_step_str(step,buf),timebuf);
1343 /* Get offsets for open files */
1344 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1346 fp = gmx_fio_open(fntemp,"w");
1348 if (state->ekinstate.bUpToDate)
1351 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1352 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1353 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1361 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1363 flags_enh |= (1<<eenhENERGY_N);
1364 if (state->enerhist.nsum > 0)
1366 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1367 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1369 if (state->enerhist.nsum_sim > 0)
1371 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1372 (1<<eenhENERGY_NSUM_SIM));
1374 if (state->enerhist.dht)
1376 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1377 (1<< eenhENERGY_DELTA_H_LIST) |
1378 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1379 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1385 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1386 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1389 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1391 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1393 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1394 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1400 /* We can check many more things now (CPU, acceleration, etc), but
1401 * it is highly unlikely to have two separate builds with exactly
1402 * the same version, user, time, and build host!
1405 version = gmx_strdup(VERSION);
1406 btime = gmx_strdup(BUILD_TIME);
1407 buser = gmx_strdup(BUILD_USER);
1408 bhost = gmx_strdup(BUILD_HOST);
1410 double_prec = GMX_CPT_BUILD_DP;
1411 fprog = gmx_strdup(Program());
1413 ftime = &(timebuf[0]);
1415 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1416 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1417 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1418 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1419 &state->natoms,&state->ngtc,&state->nnhpres,
1420 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1429 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1430 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1431 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1432 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1433 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1436 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1439 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1441 /* we really, REALLY, want to make sure to physically write the checkpoint,
1442 and all the files it depends on, out to disk. Because we've
1443 opened the checkpoint with gmx_fio_open(), it's in our list
1445 ret=gmx_fio_all_output_fsync();
1451 "Cannot fsync '%s'; maybe you are out of disk space?",
1452 gmx_fio_getname(ret));
1454 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1464 if( gmx_fio_close(fp) != 0)
1466 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1469 /* we don't move the checkpoint if the user specified they didn't want it,
1470 or if the fsyncs failed */
1471 if (!bNumberAndKeep && !ret)
1475 /* Rename the previous checkpoint file */
1477 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1478 strcat(buf,"_prev");
1479 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1481 /* we copy here so that if something goes wrong between now and
1482 * the rename below, there's always a state.cpt.
1483 * If renames are atomic (such as in POSIX systems),
1484 * this copying should be unneccesary.
1486 gmx_file_copy(fn, buf, FALSE);
1487 /* We don't really care if this fails:
1488 * there's already a new checkpoint.
1491 gmx_file_rename(fn, buf);
1494 if (gmx_file_rename(fntemp, fn) != 0)
1496 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1504 /*code for alternate checkpointing scheme. moved from top of loop over
1506 fcRequestCheckPoint();
1507 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1508 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1510 #endif /* end GMX_FAHCORE block */
1513 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1517 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1518 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1519 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1520 for(i=0; i<estNR; i++)
1522 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1524 fprintf(fplog," %24s %11s %11s\n",
1526 (sflags & (1<<i)) ? " present " : "not present",
1527 (fflags & (1<<i)) ? " present " : "not present");
1532 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1534 FILE *fp = fplog ? fplog : stderr;
1538 fprintf(fp," %s mismatch,\n",type);
1539 fprintf(fp," current program: %d\n",p);
1540 fprintf(fp," checkpoint file: %d\n",f);
1546 static void check_string(FILE *fplog,const char *type,const char *p,
1547 const char *f,gmx_bool *mm)
1549 FILE *fp = fplog ? fplog : stderr;
1551 if (strcmp(p,f) != 0)
1553 fprintf(fp," %s mismatch,\n",type);
1554 fprintf(fp," current program: %s\n",p);
1555 fprintf(fp," checkpoint file: %s\n",f);
1561 static void check_match(FILE *fplog,
1563 char *btime,char *buser,char *bhost,int double_prec,
1565 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1566 ivec dd_nc,ivec dd_nc_f)
1573 check_string(fplog,"Version" ,VERSION ,version,&mm);
1574 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1575 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1576 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1577 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1578 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1580 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1589 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1592 if (cr->npmenodes >= 0)
1594 npp -= cr->npmenodes;
1598 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1599 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1600 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1607 "Gromacs binary or parallel settings not identical to previous run.\n"
1608 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1609 fplog ? ",\n see the log file for details" : "");
1614 "Gromacs binary or parallel settings not identical to previous run.\n"
1615 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1620 static void read_checkpoint(const char *fn,FILE **pfplog,
1621 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1622 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1623 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1624 int *simulation_part,
1625 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1630 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1632 char filename[STRLEN],buf[STEPSTRSIZE];
1633 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1635 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1638 gmx_file_position_t *outputfiles;
1640 t_fileio *chksum_file;
1641 FILE* fplog = *pfplog;
1642 unsigned char digest[16];
1643 #ifndef GMX_NATIVE_WINDOWS
1644 struct flock fl; /* don't initialize here: the struct order is OS
1648 const char *int_warn=
1649 "WARNING: The checkpoint file was generated with integrator %s,\n"
1650 " while the simulation uses integrator %s\n\n";
1651 const char *sd_note=
1652 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1653 " while the simulation uses %d SD or BD nodes,\n"
1654 " continuation will be exact, except for the random state\n\n";
1656 #ifndef GMX_NATIVE_WINDOWS
1658 fl.l_whence=SEEK_SET;
1667 "read_checkpoint not (yet) supported with particle decomposition");
1670 fp = gmx_fio_open(fn,"r");
1671 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1672 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1673 &eIntegrator_f,simulation_part,step,t,
1674 &nppnodes_f,dd_nc_f,&npmenodes_f,
1675 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1676 &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
1678 if (bAppendOutputFiles &&
1679 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1681 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1684 if (cr == NULL || MASTER(cr))
1686 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1690 /* This will not be written if we do appending, since fplog is still NULL then */
1693 fprintf(fplog,"\n");
1694 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1695 fprintf(fplog," file generated by: %s\n",fprog);
1696 fprintf(fplog," file generated at: %s\n",ftime);
1697 fprintf(fplog," GROMACS build time: %s\n",btime);
1698 fprintf(fplog," GROMACS build user: %s\n",buser);
1699 fprintf(fplog," GROMACS build host: %s\n",bhost);
1700 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1701 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1702 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1703 fprintf(fplog," time: %f\n",*t);
1704 fprintf(fplog,"\n");
1707 if (natoms != state->natoms)
1709 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1711 if (ngtc != state->ngtc)
1713 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1715 if (nnhpres != state->nnhpres)
1717 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1720 if (nlambda != state->dfhist.nlambda)
1722 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1725 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1726 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1728 if (eIntegrator_f != eIntegrator)
1732 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1734 if(bAppendOutputFiles)
1737 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1738 "Stopping the run to prevent you from ruining all your data...\n"
1739 "If you _really_ know what you are doing, try with the -noappend option.\n");
1743 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1752 else if (bPartDecomp)
1754 nppnodes = cr->nnodes;
1757 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1759 if (cr->npmenodes < 0)
1761 cr->npmenodes = npmenodes_f;
1763 nppnodes = cr->nnodes - cr->npmenodes;
1764 if (nppnodes == nppnodes_f)
1766 for(d=0; d<DIM; d++)
1770 dd_nc[d] = dd_nc_f[d];
1777 /* The number of PP nodes has not been set yet */
1781 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1783 /* Correct the RNG state size for the number of PP nodes.
1784 * Such assignments should all be moved to one central function.
1786 state->nrng = nppnodes*gmx_rng_n();
1787 state->nrngi = nppnodes;
1791 if (fflags != state->flags)
1796 if(bAppendOutputFiles)
1799 "Output file appending requested, but input and checkpoint states are not identical.\n"
1800 "Stopping the run to prevent you from ruining all your data...\n"
1801 "You can try with the -noappend option, and get more info in the log file.\n");
1804 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1806 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1811 "WARNING: The checkpoint state entries do not match the simulation,\n"
1812 " see the log file for details\n\n");
1818 print_flag_mismatch(fplog,state->flags,fflags);
1823 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1824 nppnodes != nppnodes_f)
1829 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1833 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1838 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1839 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1842 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1843 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1844 Investigate for 5.0. */
1849 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1850 flags_eks,&state->ekinstate,NULL);
1855 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1856 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1858 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1859 flags_enh,&state->enerhist,NULL);
1865 if (file_version < 6)
1867 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1869 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1872 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1874 state->enerhist.nsum = *step;
1875 state->enerhist.nsum_sim = *step;
1878 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1879 flags_dfh,&state->dfhist,NULL);
1885 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1891 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1896 if( gmx_fio_close(fp) != 0)
1898 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1907 /* If the user wants to append to output files,
1908 * we use the file pointer positions of the output files stored
1909 * in the checkpoint file and truncate the files such that any frames
1910 * written after the checkpoint time are removed.
1911 * All files are md5sum checked such that we can be sure that
1912 * we do not truncate other (maybe imprortant) files.
1914 if (bAppendOutputFiles)
1916 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1918 /* make sure first file is log file so that it is OK to use it for
1921 gmx_fatal(FARGS,"The first output file should always be the log "
1922 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1924 for(i=0;i<nfiles;i++)
1926 if (outputfiles[i].offset < 0)
1928 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1929 "is larger than 2 GB, but mdrun did not support large file"
1930 " offsets. Can not append. Run mdrun with -noappend",
1931 outputfiles[i].filename);
1934 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1937 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1942 /* Note that there are systems where the lock operation
1943 * will succeed, but a second process can also lock the file.
1944 * We should probably try to detect this.
1946 #ifndef GMX_NATIVE_WINDOWS
1947 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1950 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1953 if (errno == ENOSYS)
1957 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1961 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1964 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1968 else if (errno == EACCES || errno == EAGAIN)
1970 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1971 "simulation?", outputfiles[i].filename);
1975 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1976 outputfiles[i].filename, strerror(errno));
1981 /* compute md5 chksum */
1982 if (outputfiles[i].chksum_size != -1)
1984 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1985 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1987 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1988 outputfiles[i].chksum_size,
1989 outputfiles[i].filename);
1992 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1994 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1996 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
2001 if (i==0) /*open log file here - so that lock is never lifted
2002 after chksum is calculated */
2004 *pfplog = gmx_fio_getfp(chksum_file);
2008 gmx_fio_close(chksum_file);
2011 /* compare md5 chksum */
2012 if (outputfiles[i].chksum_size != -1 &&
2013 memcmp(digest,outputfiles[i].chksum,16)!=0)
2017 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2018 for (j=0; j<16; j++)
2020 fprintf(debug,"%02x",digest[j]);
2022 fprintf(debug,"\n");
2024 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2025 outputfiles[i].filename);
2030 if (i!=0) /*log file is already seeked to correct position */
2032 #ifdef GMX_NATIVE_WINDOWS
2033 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2035 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2039 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2049 void load_checkpoint(const char *fn,FILE **fplog,
2050 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2051 t_inputrec *ir,t_state *state,
2052 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2053 gmx_bool bAppend,gmx_bool bForceAppend)
2055 gmx_large_int_t step;
2058 if (SIMMASTER(cr)) {
2059 /* Read the state from the checkpoint file */
2060 read_checkpoint(fn,fplog,
2061 cr,bPartDecomp,dd_nc,
2062 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2063 &ir->simulation_part,bAppend,bForceAppend);
2066 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2067 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2068 gmx_bcast(sizeof(step),&step,cr);
2069 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2070 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2072 ir->bContinuation = TRUE;
2073 if (ir->nsteps >= 0)
2075 ir->nsteps += ir->init_step - step;
2077 ir->init_step = step;
2078 ir->simulation_part += 1;
2081 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2082 gmx_large_int_t *step,double *t,t_state *state,
2084 int *nfiles,gmx_file_position_t **outputfiles)
2087 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2092 int flags_eks,flags_enh,flags_dfh;
2094 gmx_file_position_t *files_loc=NULL;
2097 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2098 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2099 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2100 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2101 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
2103 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2108 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2109 flags_eks,&state->ekinstate,NULL);
2114 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2115 flags_enh,&state->enerhist,NULL);
2120 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2121 flags_dfh,&state->dfhist,NULL);
2127 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2128 outputfiles != NULL ? outputfiles : &files_loc,
2129 outputfiles != NULL ? nfiles : &nfiles_loc,
2131 if (files_loc != NULL)
2141 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2155 read_checkpoint_state(const char *fn,int *simulation_part,
2156 gmx_large_int_t *step,double *t,t_state *state)
2160 fp = gmx_fio_open(fn,"r");
2161 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2162 if( gmx_fio_close(fp) != 0)
2164 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2168 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2171 int simulation_part;
2172 gmx_large_int_t step;
2175 init_state(&state,0,0,0,0,0);
2177 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2179 fr->natoms = state.natoms;
2182 fr->step = gmx_large_int_to_int(step,
2183 "conversion of checkpoint to trajectory");
2187 fr->lambda = state.lambda[efptFEP];
2188 fr->fep_state = state.fep_state;
2190 fr->bX = (state.flags & (1<<estX));
2196 fr->bV = (state.flags & (1<<estV));
2203 fr->bBox = (state.flags & (1<<estBOX));
2206 copy_mat(state.box,fr->box);
2211 void list_checkpoint(const char *fn,FILE *out)
2215 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2217 int eIntegrator,simulation_part,nppnodes,npme;
2218 gmx_large_int_t step;
2222 int flags_eks,flags_enh,flags_dfh;
2226 gmx_file_position_t *outputfiles;
2229 init_state(&state,-1,-1,-1,-1,0);
2231 fp = gmx_fio_open(fn,"r");
2232 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2233 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2234 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2235 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2236 &(state.dfhist.nlambda),&state.flags,
2237 &flags_eks,&flags_enh,&flags_dfh,out);
2238 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2243 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2244 flags_eks,&state.ekinstate,out);
2249 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2250 flags_enh,&state.enerhist,out);
2254 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2255 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2256 flags_dfh,&state.dfhist,out);
2260 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2265 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2272 if( gmx_fio_close(fp) != 0)
2274 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2281 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2285 /* Check if the output file name stored in the checkpoint file
2286 * is one of the output file names of mdrun.
2290 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2295 return (i < nfile && gmx_fexist(fnm_cp));
2298 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2299 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2300 gmx_large_int_t *cpt_step,t_commrec *cr,
2301 gmx_bool bAppendReq,
2302 int nfile,const t_filenm fnm[],
2303 const char *part_suffix,gmx_bool *bAddPart)
2306 gmx_large_int_t step=0;
2310 gmx_file_position_t *outputfiles;
2313 char *fn,suf_up[STRLEN];
2317 if (SIMMASTER(cr)) {
2318 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2320 *simulation_part = 0;
2324 init_state(&state,0,0,0,0,0);
2326 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2327 &nfiles,&outputfiles);
2328 if( gmx_fio_close(fp) != 0)
2330 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2337 for(f=0; f<nfiles; f++)
2339 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2344 if (nexist == nfiles)
2346 bAppend = bAppendReq;
2348 else if (nexist > 0)
2351 "Output file appending has been requested,\n"
2352 "but some output files listed in the checkpoint file %s\n"
2353 "are not present or are named differently by the current program:\n",
2355 fprintf(stderr,"output files present:");
2356 for(f=0; f<nfiles; f++)
2358 if (exist_output_file(outputfiles[f].filename,
2361 fprintf(stderr," %s",outputfiles[f].filename);
2364 fprintf(stderr,"\n");
2365 fprintf(stderr,"output files not present or named differently:");
2366 for(f=0; f<nfiles; f++)
2368 if (!exist_output_file(outputfiles[f].filename,
2371 fprintf(stderr," %s",outputfiles[f].filename);
2374 fprintf(stderr,"\n");
2376 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2384 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2386 fn = outputfiles[0].filename;
2387 if (strlen(fn) < 4 ||
2388 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2390 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2392 /* Set bAddPart to whether the suffix string '.part' is present
2393 * in the log file name.
2395 strcpy(suf_up,part_suffix);
2397 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2398 strstr(fn,suf_up) != NULL);
2406 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2408 if (*simulation_part > 0 && bAppendReq)
2410 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2411 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2414 if (NULL != cpt_step)