2 * This file is part of the GROMACS molecular simulation package.
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
6 * Copyright (c) 2012, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* The source code in this file should be thread-safe.
39 Please keep it that way. */
45 #include "gmx_header_config.h"
50 #ifdef HAVE_SYS_TIME_H
55 #ifdef GMX_NATIVE_WINDOWS
58 #include <sys/locking.h>
72 #include "gmx_random.h"
73 #include "checkpoint.h"
78 #include "buildinfo.h"
85 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
87 gmx_ctime_r(const time_t *clock,char *buf, int n);
90 #define CPT_MAGIC1 171817
91 #define CPT_MAGIC2 171819
92 #define CPTSTRLEN 1024
95 #define GMX_CPT_BUILD_DP 1
97 #define GMX_CPT_BUILD_DP 0
100 /* cpt_version should normally only be changed
101 * when the header of footer format changes.
102 * The state data format itself is backward and forward compatible.
103 * But old code can not read a new entry that is present in the file
104 * (but can read a new format when new entries are not present).
106 static const int cpt_version = 15;
109 const char *est_names[estNR]=
112 "box", "box-rel", "box-v", "pres_prev",
113 "nosehoover-xi", "thermostat-integral",
114 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
115 "disre_initf", "disre_rm3tav",
116 "orire_initf", "orire_Dtav",
117 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
120 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
122 const char *eeks_names[eeksNR]=
124 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
125 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
128 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
129 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
130 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
131 eenhENERGY_DELTA_H_NN,
132 eenhENERGY_DELTA_H_LIST,
133 eenhENERGY_DELTA_H_STARTTIME,
134 eenhENERGY_DELTA_H_STARTLAMBDA,
137 const char *eenh_names[eenhNR]=
139 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
140 "energy_sum_sim", "energy_nsum_sim",
141 "energy_nsteps", "energy_nsteps_sim",
143 "energy_delta_h_list",
144 "energy_delta_h_start_time",
145 "energy_delta_h_start_lambda"
148 /* free energy history variables -- need to be preserved over checkpoint */
149 enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
150 edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
151 /* free energy history variable names */
152 const char *edfh_names[edfhNR]=
154 "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
155 "accumulated_plus", "accumulated_minus", "accumulated_plus_2", "accumulated_minus_2", "Tij", "Tij_empirical"
158 #ifdef GMX_NATIVE_WINDOWS
160 gmx_wintruncate(const char *filename, __int64 size)
163 /*we do this elsewhere*/
169 fp=fopen(filename,"rb+");
176 return _chsize_s( fileno(fp), size);
182 enum { ecprREAL, ecprRVEC, ecprMATRIX };
184 enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
185 /* enums for the different components of checkpoint variables, replacing the hard coded ones.
186 cptpEST - state variables.
187 cptpEEKS - Kinetic energy state variables.
188 cptpEENH - Energy history state variables.
189 cptpEDFH - free energy history variables.
193 static const char *st_names(int cptp,int ecpt)
197 case cptpEST: return est_names [ecpt]; break;
198 case cptpEEKS: return eeks_names[ecpt]; break;
199 case cptpEENH: return eenh_names[ecpt]; break;
200 case cptpEDFH: return edfh_names[ecpt]; break;
206 static void cp_warning(FILE *fp)
208 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
211 static void cp_error()
213 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
216 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
224 res = xdr_string(xd,s,CPTSTRLEN);
231 fprintf(list,"%s = %s\n",desc,*s);
236 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
247 fprintf(list,"%s = %d\n",desc,*i);
252 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
258 fprintf(list,"%s = ",desc);
260 for (j=0; j<n && res; j++)
262 res &= xdr_u_char(xd,&i[j]);
265 fprintf(list,"%02x",i[j]);
280 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
282 if (do_cpt_int(xd,desc,i,list) < 0)
288 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
291 char buf[STEPSTRSIZE];
293 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
300 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
304 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
308 res = xdr_double(xd,f);
315 fprintf(list,"%s = %f\n",desc,*f);
319 static void do_cpt_real_err(XDR *xd,const char *desc,real *f)
324 res = xdr_double(xd,f);
326 res = xdr_float(xd,f);
334 static void do_cpt_n_rvecs_err(XDR *xd,const char *desc,int n, rvec f[],FILE *list)
340 for (j=0; j<DIM; j++)
342 do_cpt_real_err(xd, desc, &f[i][j]);
348 pr_rvecs(list,0,desc,f,n);
352 /* If nval >= 0, nval is used; on read this should match the passed value.
353 * If nval n<0, *nptr is used; on read the value is stored in nptr
355 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
356 int nval,int *nptr,real **v,
357 FILE *list,int erealtype)
361 int dtc=xdr_datatype_float;
363 int dtc=xdr_datatype_double;
380 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
385 res = xdr_int(xd,&nf);
396 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
405 res = xdr_int(xd,&dt);
412 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
413 st_names(cptp,ecpt),xdr_datatype_names[dtc],
414 xdr_datatype_names[dt]);
416 if (list || !(sflags & (1<<ecpt)))
429 if (dt == xdr_datatype_float)
431 if (dtc == xdr_datatype_float)
439 res = xdr_vector(xd,(char *)vf,nf,
440 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
445 if (dtc != xdr_datatype_float)
456 if (dtc == xdr_datatype_double)
464 res = xdr_vector(xd,(char *)vd,nf,
465 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
470 if (dtc != xdr_datatype_double)
485 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
488 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
491 gmx_incons("Unknown checkpoint real type");
503 /* This function stores n along with the reals for reading,
504 * but on reading it assumes that n matches the value in the checkpoint file,
505 * a fatal error is generated when this is not the case.
507 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
508 int n,real **v,FILE *list)
510 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
513 /* This function does the same as do_cpte_reals,
514 * except that on reading it ignores the passed value of *n
515 * and stored the value read from the checkpoint file in *n.
517 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
518 int *n,real **v,FILE *list)
520 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
523 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
528 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
531 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
532 int n,int **v,FILE *list)
535 int dtc=xdr_datatype_int;
540 res = xdr_int(xd,&nf);
545 if (list == NULL && v != NULL && nf != n)
547 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
550 res = xdr_int(xd,&dt);
557 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
558 st_names(cptp,ecpt),xdr_datatype_names[dtc],
559 xdr_datatype_names[dt]);
561 if (list || !(sflags & (1<<ecpt)) || v == NULL)
574 res = xdr_vector(xd,(char *)vp,nf,
575 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
582 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
592 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
595 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
598 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
599 int n,double **v,FILE *list)
602 int dtc=xdr_datatype_double;
607 res = xdr_int(xd,&nf);
612 if (list == NULL && nf != n)
614 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
617 res = xdr_int(xd,&dt);
624 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
625 st_names(cptp,ecpt),xdr_datatype_names[dtc],
626 xdr_datatype_names[dt]);
628 if (list || !(sflags & (1<<ecpt)))
641 res = xdr_vector(xd,(char *)vp,nf,
642 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
649 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
659 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
660 double *r,FILE *list)
662 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
666 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
667 int n,rvec **v,FILE *list)
671 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
672 n*DIM,NULL,(real **)v,list,ecprRVEC);
675 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
681 vr = (real *)&(v[0][0]);
682 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
683 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
685 if (list && ret == 0)
687 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
694 static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
695 int n, real **v,FILE *list)
700 char name[CPTSTRLEN];
711 reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
712 if (list && reti == 0)
714 sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
715 pr_reals(list,0,name,v[i],n);
725 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
726 int n,matrix **v,FILE *list)
735 res = xdr_int(xd,&nf);
740 if (list == NULL && nf != n)
742 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
744 if (list || !(sflags & (1<<ecpt)))
764 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
768 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
769 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
776 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
782 if (list && ret == 0)
786 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
797 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
798 char **version,char **btime,char **buser,char **bhost,
800 char **fprog,char **ftime,
801 int *eIntegrator,int *simulation_part,
802 gmx_large_int_t *step,double *t,
803 int *nnodes,int *dd_nc,int *npme,
804 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
805 int *nlambda, int *flags_state,
806 int *flags_eks,int *flags_enh, int *flags_dfh,
824 res = xdr_int(xd,&magic);
827 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
829 if (magic != CPT_MAGIC1)
831 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
832 "The checkpoint file is corrupted or not a checkpoint file",
839 if (gethostname(fhost,255) != 0)
841 sprintf(fhost,"unknown");
844 sprintf(fhost,"unknown");
847 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
848 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
849 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
850 do_cpt_string_err(xd,bRead,"GROMACS build host" ,bhost,list);
851 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
852 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
853 *file_version = cpt_version;
854 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
855 if (*file_version > cpt_version)
857 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
859 if (*file_version >= 13)
861 do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
867 if (*file_version >= 12)
869 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
875 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
876 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
877 if (*file_version >= 10)
879 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
885 if (*file_version >= 11)
887 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
893 if (*file_version >= 14)
895 do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
901 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
902 if (*file_version >= 3)
904 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
908 *simulation_part = 1;
910 if (*file_version >= 5)
912 do_cpt_step_err(xd,"step" ,step ,list);
916 do_cpt_int_err(xd,"step" ,&idum ,list);
919 do_cpt_double_err(xd,"t" ,t ,list);
920 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
922 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
923 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
924 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
925 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
926 do_cpt_int_err(xd,"state flags",flags_state,list);
927 if (*file_version >= 4)
929 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
930 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
935 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
936 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
937 (1<<(estORIRE_DTAV+2)) |
938 (1<<(estORIRE_DTAV+3))));
940 if (*file_version >= 14)
942 do_cpt_int_err(xd,"df history flags",flags_dfh,list);
947 if (*file_version >= 15)
949 do_cpt_int_err(xd,"ED data sets",nED,list);
957 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
962 if (file_version >= 2)
965 res = xdr_int(xd,&magic);
970 if (magic != CPT_MAGIC2)
979 static int do_cpt_state(XDR *xd,gmx_bool bRead,
980 int fflags,t_state *state,
981 gmx_bool bReadRNG,FILE *list)
984 int **rng_p,**rngi_p;
991 nnht = state->nhchainlength*state->ngtc;
992 nnhtp = state->nhchainlength*state->nnhpres;
996 rng_p = (int **)&state->ld_rng;
997 rngi_p = &state->ld_rngi;
1001 /* Do not read the RNG data */
1005 /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
1007 sflags = state->flags;
1008 for(i=0; (i<estNR && ret == 0); i++)
1010 if (fflags & (1<<i))
1014 case estLAMBDA: ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
1015 case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
1016 case estBOX: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
1017 case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
1018 case estBOXV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
1019 case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
1020 case estSVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
1021 case estFVIR_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
1022 case estNH_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
1023 case estNH_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
1024 case estNHPRES_XI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
1025 case estNHPRES_VXI: ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
1026 case estTC_INT: ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
1027 case estVETA: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
1028 case estVOL0: ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
1029 case estX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
1030 case estV: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
1031 case estSDX: ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
1032 case estLD_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
1033 case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
1034 case estMC_RNG: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
1035 case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
1036 case estDISRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
1037 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
1038 case estORIRE_INITF: ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
1039 case estORIRE_DTAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
1041 gmx_fatal(FARGS,"Unknown state entry %d\n"
1042 "You are probably reading a new checkpoint file with old code",i);
1050 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
1051 int fflags,ekinstate_t *ekins,
1059 for(i=0; (i<eeksNR && ret == 0); i++)
1061 if (fflags & (1<<i))
1066 case eeksEKIN_N: ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
1067 case eeksEKINH : ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
1068 case eeksEKINF: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
1069 case eeksEKINO: ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
1070 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
1071 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
1072 case eeksVSCALE: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
1073 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
1074 case eeksDEKINDL : ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
1075 case eeksMVCOS: ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
1077 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
1078 "You are probably reading a new checkpoint file with old code",i);
1087 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
1088 int fflags,energyhistory_t *enerhist,
1099 enerhist->nsteps = 0;
1101 enerhist->nsteps_sim = 0;
1102 enerhist->nsum_sim = 0;
1103 enerhist->dht = NULL;
1105 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
1107 snew(enerhist->dht,1);
1108 enerhist->dht->ndh = NULL;
1109 enerhist->dht->dh = NULL;
1110 enerhist->dht->start_lambda_set=FALSE;
1114 for(i=0; (i<eenhNR && ret == 0); i++)
1116 if (fflags & (1<<i))
1120 case eenhENERGY_N: ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
1121 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
1122 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
1123 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
1124 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
1125 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
1126 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
1127 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
1128 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
1129 if (bRead) /* now allocate memory for it */
1131 snew(enerhist->dht->dh, enerhist->dht->nndh);
1132 snew(enerhist->dht->ndh, enerhist->dht->nndh);
1133 for(j=0;j<enerhist->dht->nndh;j++)
1135 enerhist->dht->ndh[j] = 0;
1136 enerhist->dht->dh[j] = NULL;
1140 case eenhENERGY_DELTA_H_LIST:
1141 for(j=0;j<enerhist->dht->nndh;j++)
1143 ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
1146 case eenhENERGY_DELTA_H_STARTTIME:
1147 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
1148 case eenhENERGY_DELTA_H_STARTLAMBDA:
1149 ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
1151 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1152 "You are probably reading a new checkpoint file with old code",i);
1157 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1159 /* Assume we have an old file format and copy sum to sum_sim */
1160 srenew(enerhist->ener_sum_sim,enerhist->nener);
1161 for(i=0; i<enerhist->nener; i++)
1163 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1165 fflags |= (1<<eenhENERGY_SUM_SIM);
1168 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1169 !(fflags & (1<<eenhENERGY_NSTEPS)))
1171 /* Assume we have an old file format and copy nsum to nsteps */
1172 enerhist->nsteps = enerhist->nsum;
1173 fflags |= (1<<eenhENERGY_NSTEPS);
1175 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1176 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1178 /* Assume we have an old file format and copy nsum to nsteps */
1179 enerhist->nsteps_sim = enerhist->nsum_sim;
1180 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1186 static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
1191 nlambda = dfhist->nlambda;
1194 for(i=0; (i<edfhNR && ret == 0); i++)
1196 if (fflags & (1<<i))
1200 case edfhBEQUIL: ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
1201 case edfhNATLAMBDA: ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
1202 case edfhWLHISTO: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
1203 case edfhWLDELTA: ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
1204 case edfhSUMWEIGHTS: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
1205 case edfhSUMDG: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
1206 case edfhSUMMINVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
1207 case edfhSUMVAR: ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
1208 case edfhACCUMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
1209 case edfhACCUMM: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
1210 case edfhACCUMP2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
1211 case edfhACCUMM2: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
1212 case edfhTIJ: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
1213 case edfhTIJEMP: ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
1216 gmx_fatal(FARGS,"Unknown df history entry %d\n"
1217 "You are probably reading a new checkpoint file with old code",i);
1226 /* This function stores the last whole configuration of the reference and
1227 * average structure in the .cpt file
1229 static int do_cpt_EDstate(XDR *xd,gmx_bool bRead,
1230 edsamstate_t *EDstate, FILE *list)
1237 EDstate->bFromCpt = bRead;
1239 if (EDstate->nED <= 0)
1244 /* When reading, init_edsam has not been called yet,
1245 * so we have to allocate memory first. */
1248 snew(EDstate->nref , EDstate->nED);
1249 snew(EDstate->old_sref, EDstate->nED);
1250 snew(EDstate->nav , EDstate->nED);
1251 snew(EDstate->old_sav , EDstate->nED);
1254 /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
1255 for (i=0; i< EDstate->nED; i++)
1257 /* Reference structure SREF */
1258 sprintf(buf, "ED%d # of atoms in reference structure", i+1);
1259 do_cpt_int_err(xd, buf, &EDstate->nref[i],list);
1260 sprintf(buf, "ED%d x_ref", i+1);
1263 snew(EDstate->old_sref[i], EDstate->nref[i]);
1264 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
1268 do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
1271 /* Average structure SAV */
1272 sprintf(buf, "ED%d # of atoms in average structure", i+1);
1273 do_cpt_int_err(xd, buf, &EDstate->nav[i] ,list);
1274 sprintf(buf, "ED%d x_av", i+1);
1277 snew(EDstate->old_sav[i], EDstate->nav[i]);
1278 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
1282 do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
1290 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1291 gmx_file_position_t **p_outputfiles, int *nfiles,
1292 FILE *list, int file_version)
1296 gmx_off_t mask = 0xFFFFFFFFL;
1297 int offset_high,offset_low;
1299 gmx_file_position_t *outputfiles;
1301 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1308 snew(*p_outputfiles,*nfiles);
1311 outputfiles = *p_outputfiles;
1313 for(i=0;i<*nfiles;i++)
1315 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1318 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1319 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1325 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1329 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1333 #if (SIZEOF_GMX_OFF_T > 4)
1334 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1336 outputfiles[i].offset = offset_low;
1341 buf = outputfiles[i].filename;
1342 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1344 offset = outputfiles[i].offset;
1352 #if (SIZEOF_GMX_OFF_T > 4)
1353 offset_low = (int) (offset & mask);
1354 offset_high = (int) ((offset >> 32) & mask);
1356 offset_low = offset;
1360 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1364 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1369 if (file_version >= 8)
1371 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1376 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1383 outputfiles[i].chksum_size = -1;
1390 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1391 FILE *fplog,t_commrec *cr,
1392 int eIntegrator,int simulation_part,
1393 gmx_bool bExpanded, int elamstats,
1394 gmx_large_int_t step,double t,t_state *state)
1404 char *fntemp; /* the temporary checkpoint file name */
1406 char timebuf[STRLEN];
1407 int nppnodes,npmenodes,flag_64bit;
1408 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1409 gmx_file_position_t *outputfiles;
1412 int flags_eks,flags_enh,flags_dfh,i;
1417 if (DOMAINDECOMP(cr))
1419 nppnodes = cr->dd->nnodes;
1420 npmenodes = cr->npmenodes;
1424 nppnodes = cr->nnodes;
1434 /* make the new temporary filename */
1435 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1437 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1438 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1439 strcat(fntemp,suffix);
1440 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1443 gmx_ctime_r(&now,timebuf,STRLEN);
1447 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1448 gmx_step_str(step,buf),timebuf);
1451 /* Get offsets for open files */
1452 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1454 fp = gmx_fio_open(fntemp,"w");
1456 if (state->ekinstate.bUpToDate)
1459 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1460 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1461 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1469 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1471 flags_enh |= (1<<eenhENERGY_N);
1472 if (state->enerhist.nsum > 0)
1474 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1475 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1477 if (state->enerhist.nsum_sim > 0)
1479 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1480 (1<<eenhENERGY_NSUM_SIM));
1482 if (state->enerhist.dht)
1484 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1485 (1<< eenhENERGY_DELTA_H_LIST) |
1486 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1487 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1493 flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) | (1<<edfhSUMDG) |
1494 (1<<edfhTIJ) | (1<<edfhTIJEMP));
1497 flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
1499 if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
1501 flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
1502 | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
1508 /* We can check many more things now (CPU, acceleration, etc), but
1509 * it is highly unlikely to have two separate builds with exactly
1510 * the same version, user, time, and build host!
1513 version = gmx_strdup(VERSION);
1514 btime = gmx_strdup(BUILD_TIME);
1515 buser = gmx_strdup(BUILD_USER);
1516 bhost = gmx_strdup(BUILD_HOST);
1518 double_prec = GMX_CPT_BUILD_DP;
1519 fprog = gmx_strdup(Program());
1521 ftime = &(timebuf[0]);
1523 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1524 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1525 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1526 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1527 &state->natoms,&state->ngtc,&state->nnhpres,
1528 &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
1529 &state->edsamstate.nED,
1538 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1539 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1540 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1541 (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0) ||
1542 (do_cpt_EDstate(gmx_fio_getxdr(fp),FALSE,&state->edsamstate,NULL) < 0) ||
1543 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1546 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1549 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1551 /* we really, REALLY, want to make sure to physically write the checkpoint,
1552 and all the files it depends on, out to disk. Because we've
1553 opened the checkpoint with gmx_fio_open(), it's in our list
1555 ret=gmx_fio_all_output_fsync();
1561 "Cannot fsync '%s'; maybe you are out of disk space?",
1562 gmx_fio_getname(ret));
1564 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1574 if( gmx_fio_close(fp) != 0)
1576 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1579 /* we don't move the checkpoint if the user specified they didn't want it,
1580 or if the fsyncs failed */
1581 if (!bNumberAndKeep && !ret)
1585 /* Rename the previous checkpoint file */
1587 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1588 strcat(buf,"_prev");
1589 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1591 /* we copy here so that if something goes wrong between now and
1592 * the rename below, there's always a state.cpt.
1593 * If renames are atomic (such as in POSIX systems),
1594 * this copying should be unneccesary.
1596 gmx_file_copy(fn, buf, FALSE);
1597 /* We don't really care if this fails:
1598 * there's already a new checkpoint.
1601 gmx_file_rename(fn, buf);
1604 if (gmx_file_rename(fntemp, fn) != 0)
1606 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1614 /*code for alternate checkpointing scheme. moved from top of loop over
1616 fcRequestCheckPoint();
1617 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1618 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1620 #endif /* end GMX_FAHCORE block */
1623 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1627 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1628 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1629 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1630 for(i=0; i<estNR; i++)
1632 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1634 fprintf(fplog," %24s %11s %11s\n",
1636 (sflags & (1<<i)) ? " present " : "not present",
1637 (fflags & (1<<i)) ? " present " : "not present");
1642 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1644 FILE *fp = fplog ? fplog : stderr;
1648 fprintf(fp," %s mismatch,\n",type);
1649 fprintf(fp," current program: %d\n",p);
1650 fprintf(fp," checkpoint file: %d\n",f);
1656 static void check_string(FILE *fplog,const char *type,const char *p,
1657 const char *f,gmx_bool *mm)
1659 FILE *fp = fplog ? fplog : stderr;
1661 if (strcmp(p,f) != 0)
1663 fprintf(fp," %s mismatch,\n",type);
1664 fprintf(fp," current program: %s\n",p);
1665 fprintf(fp," checkpoint file: %s\n",f);
1671 static void check_match(FILE *fplog,
1673 char *btime,char *buser,char *bhost,int double_prec,
1675 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1676 ivec dd_nc,ivec dd_nc_f)
1683 check_string(fplog,"Version" ,VERSION ,version,&mm);
1684 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1685 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1686 check_string(fplog,"Build host" ,BUILD_HOST ,bhost ,&mm);
1687 check_int (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
1688 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1690 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1699 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1702 if (cr->npmenodes >= 0)
1704 npp -= cr->npmenodes;
1708 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1709 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1710 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1717 "Gromacs binary or parallel settings not identical to previous run.\n"
1718 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1719 fplog ? ",\n see the log file for details" : "");
1724 "Gromacs binary or parallel settings not identical to previous run.\n"
1725 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1730 static void read_checkpoint(const char *fn,FILE **pfplog,
1731 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1732 int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
1733 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1734 int *simulation_part,
1735 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1740 char *version,*btime,*buser,*bhost,*fprog,*ftime;
1742 char filename[STRLEN],buf[STEPSTRSIZE];
1743 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1745 int natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
1748 gmx_file_position_t *outputfiles;
1750 t_fileio *chksum_file;
1751 FILE* fplog = *pfplog;
1752 unsigned char digest[16];
1753 #ifndef GMX_NATIVE_WINDOWS
1754 struct flock fl; /* don't initialize here: the struct order is OS
1758 const char *int_warn=
1759 "WARNING: The checkpoint file was generated with integrator %s,\n"
1760 " while the simulation uses integrator %s\n\n";
1761 const char *sd_note=
1762 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1763 " while the simulation uses %d SD or BD nodes,\n"
1764 " continuation will be exact, except for the random state\n\n";
1766 #ifndef GMX_NATIVE_WINDOWS
1768 fl.l_whence=SEEK_SET;
1777 "read_checkpoint not (yet) supported with particle decomposition");
1780 fp = gmx_fio_open(fn,"r");
1781 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1782 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
1783 &eIntegrator_f,simulation_part,step,t,
1784 &nppnodes_f,dd_nc_f,&npmenodes_f,
1785 &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
1786 &fflags,&flags_eks,&flags_enh,&flags_dfh,
1787 &state->edsamstate.nED,NULL);
1789 if (bAppendOutputFiles &&
1790 file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
1792 gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
1795 if (cr == NULL || MASTER(cr))
1797 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1801 /* This will not be written if we do appending, since fplog is still NULL then */
1804 fprintf(fplog,"\n");
1805 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1806 fprintf(fplog," file generated by: %s\n",fprog);
1807 fprintf(fplog," file generated at: %s\n",ftime);
1808 fprintf(fplog," GROMACS build time: %s\n",btime);
1809 fprintf(fplog," GROMACS build user: %s\n",buser);
1810 fprintf(fplog," GROMACS build host: %s\n",bhost);
1811 fprintf(fplog," GROMACS double prec.: %d\n",double_prec);
1812 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1813 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1814 fprintf(fplog," time: %f\n",*t);
1815 fprintf(fplog,"\n");
1818 if (natoms != state->natoms)
1820 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1822 if (ngtc != state->ngtc)
1824 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1826 if (nnhpres != state->nnhpres)
1828 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1831 if (nlambda != state->dfhist.nlambda)
1833 gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
1836 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1837 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1839 if (eIntegrator_f != eIntegrator)
1843 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1845 if(bAppendOutputFiles)
1848 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1849 "Stopping the run to prevent you from ruining all your data...\n"
1850 "If you _really_ know what you are doing, try with the -noappend option.\n");
1854 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1863 else if (bPartDecomp)
1865 nppnodes = cr->nnodes;
1868 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1870 if (cr->npmenodes < 0)
1872 cr->npmenodes = npmenodes_f;
1874 nppnodes = cr->nnodes - cr->npmenodes;
1875 if (nppnodes == nppnodes_f)
1877 for(d=0; d<DIM; d++)
1881 dd_nc[d] = dd_nc_f[d];
1888 /* The number of PP nodes has not been set yet */
1892 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1894 /* Correct the RNG state size for the number of PP nodes.
1895 * Such assignments should all be moved to one central function.
1897 state->nrng = nppnodes*gmx_rng_n();
1898 state->nrngi = nppnodes;
1902 if (fflags != state->flags)
1907 if(bAppendOutputFiles)
1910 "Output file appending requested, but input and checkpoint states are not identical.\n"
1911 "Stopping the run to prevent you from ruining all your data...\n"
1912 "You can try with the -noappend option, and get more info in the log file.\n");
1915 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1917 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1922 "WARNING: The checkpoint state entries do not match the simulation,\n"
1923 " see the log file for details\n\n");
1929 print_flag_mismatch(fplog,state->flags,fflags);
1934 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1935 nppnodes != nppnodes_f)
1940 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1944 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1949 check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
1950 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1953 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1954 *init_fep_state = state->fep_state; /* there should be a better way to do this than setting it here.
1955 Investigate for 5.0. */
1960 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1961 flags_eks,&state->ekinstate,NULL);
1966 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1967 ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
1969 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1970 flags_enh,&state->enerhist,NULL);
1976 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state->edsamstate,NULL);
1982 if (file_version < 6)
1984 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1986 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1989 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1991 state->enerhist.nsum = *step;
1992 state->enerhist.nsum_sim = *step;
1995 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
1996 flags_dfh,&state->dfhist,NULL);
2002 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
2008 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2013 if( gmx_fio_close(fp) != 0)
2015 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2024 /* If the user wants to append to output files,
2025 * we use the file pointer positions of the output files stored
2026 * in the checkpoint file and truncate the files such that any frames
2027 * written after the checkpoint time are removed.
2028 * All files are md5sum checked such that we can be sure that
2029 * we do not truncate other (maybe imprortant) files.
2031 if (bAppendOutputFiles)
2033 if (fn2ftp(outputfiles[0].filename)!=efLOG)
2035 /* make sure first file is log file so that it is OK to use it for
2038 gmx_fatal(FARGS,"The first output file should always be the log "
2039 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
2041 for(i=0;i<nfiles;i++)
2043 if (outputfiles[i].offset < 0)
2045 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
2046 "is larger than 2 GB, but mdrun did not support large file"
2047 " offsets. Can not append. Run mdrun with -noappend",
2048 outputfiles[i].filename);
2051 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
2054 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
2059 /* Note that there are systems where the lock operation
2060 * will succeed, but a second process can also lock the file.
2061 * We should probably try to detect this.
2063 #ifndef GMX_NATIVE_WINDOWS
2064 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
2067 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
2070 if (errno == ENOSYS)
2074 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
2078 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
2081 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
2085 else if (errno == EACCES || errno == EAGAIN)
2087 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
2088 "simulation?", outputfiles[i].filename);
2092 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
2093 outputfiles[i].filename, strerror(errno));
2098 /* compute md5 chksum */
2099 if (outputfiles[i].chksum_size != -1)
2101 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
2102 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
2104 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2105 outputfiles[i].chksum_size,
2106 outputfiles[i].filename);
2109 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
2111 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
2113 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
2118 if (i==0) /*open log file here - so that lock is never lifted
2119 after chksum is calculated */
2121 *pfplog = gmx_fio_getfp(chksum_file);
2125 gmx_fio_close(chksum_file);
2128 /* compare md5 chksum */
2129 if (outputfiles[i].chksum_size != -1 &&
2130 memcmp(digest,outputfiles[i].chksum,16)!=0)
2134 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
2135 for (j=0; j<16; j++)
2137 fprintf(debug,"%02x",digest[j]);
2139 fprintf(debug,"\n");
2141 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
2142 outputfiles[i].filename);
2147 if (i!=0) /*log file is already seeked to correct position */
2149 #ifdef GMX_NATIVE_WINDOWS
2150 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
2152 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
2156 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
2166 void load_checkpoint(const char *fn,FILE **fplog,
2167 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
2168 t_inputrec *ir,t_state *state,
2169 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
2170 gmx_bool bAppend,gmx_bool bForceAppend)
2172 gmx_large_int_t step;
2175 if (SIMMASTER(cr)) {
2176 /* Read the state from the checkpoint file */
2177 read_checkpoint(fn,fplog,
2178 cr,bPartDecomp,dd_nc,
2179 ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
2180 &ir->simulation_part,bAppend,bForceAppend);
2183 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
2184 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
2185 gmx_bcast(sizeof(step),&step,cr);
2186 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
2187 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
2189 ir->bContinuation = TRUE;
2190 if (ir->nsteps >= 0)
2192 ir->nsteps += ir->init_step - step;
2194 ir->init_step = step;
2195 ir->simulation_part += 1;
2198 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
2199 gmx_large_int_t *step,double *t,t_state *state,
2201 int *nfiles,gmx_file_position_t **outputfiles)
2204 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2209 int flags_eks,flags_enh,flags_dfh;
2211 gmx_file_position_t *files_loc=NULL;
2214 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2215 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2216 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
2217 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
2218 &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
2219 &state->edsamstate.nED,NULL);
2221 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
2226 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2227 flags_eks,&state->ekinstate,NULL);
2232 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2233 flags_enh,&state->enerhist,NULL);
2238 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2239 flags_dfh,&state->dfhist,NULL);
2245 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state->edsamstate,NULL);
2251 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
2252 outputfiles != NULL ? outputfiles : &files_loc,
2253 outputfiles != NULL ? nfiles : &nfiles_loc,
2255 if (files_loc != NULL)
2265 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2279 read_checkpoint_state(const char *fn,int *simulation_part,
2280 gmx_large_int_t *step,double *t,t_state *state)
2284 fp = gmx_fio_open(fn,"r");
2285 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
2286 if( gmx_fio_close(fp) != 0)
2288 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2292 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
2295 int simulation_part;
2296 gmx_large_int_t step;
2299 init_state(&state,0,0,0,0,0);
2301 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
2303 fr->natoms = state.natoms;
2306 fr->step = gmx_large_int_to_int(step,
2307 "conversion of checkpoint to trajectory");
2311 fr->lambda = state.lambda[efptFEP];
2312 fr->fep_state = state.fep_state;
2314 fr->bX = (state.flags & (1<<estX));
2320 fr->bV = (state.flags & (1<<estV));
2327 fr->bBox = (state.flags & (1<<estBOX));
2330 copy_mat(state.box,fr->box);
2335 void list_checkpoint(const char *fn,FILE *out)
2339 char *version,*btime,*buser,*bhost,*fprog,*ftime;
2341 int eIntegrator,simulation_part,nppnodes,npme;
2342 gmx_large_int_t step;
2346 int flags_eks,flags_enh,flags_dfh;
2350 gmx_file_position_t *outputfiles;
2353 init_state(&state,-1,-1,-1,-1,0);
2355 fp = gmx_fio_open(fn,"r");
2356 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2357 &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
2358 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2359 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2360 &(state.dfhist.nlambda),&state.flags,
2361 &flags_eks,&flags_enh,&flags_dfh,&state.edsamstate.nED,out);
2362 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2367 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2368 flags_eks,&state.ekinstate,out);
2373 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2374 flags_enh,&state.enerhist,out);
2378 init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
2379 ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
2380 flags_dfh,&state.dfhist,out);
2385 ret = do_cpt_EDstate(gmx_fio_getxdr(fp),TRUE,&state.edsamstate,out);
2390 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2395 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2402 if( gmx_fio_close(fp) != 0)
2404 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2411 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2415 /* Check if the output file name stored in the checkpoint file
2416 * is one of the output file names of mdrun.
2420 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2425 return (i < nfile && gmx_fexist(fnm_cp));
2428 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2429 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2430 gmx_large_int_t *cpt_step,t_commrec *cr,
2431 gmx_bool bAppendReq,
2432 int nfile,const t_filenm fnm[],
2433 const char *part_suffix,gmx_bool *bAddPart)
2436 gmx_large_int_t step=0;
2440 gmx_file_position_t *outputfiles;
2443 char *fn,suf_up[STRLEN];
2447 if (SIMMASTER(cr)) {
2448 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2450 *simulation_part = 0;
2454 init_state(&state,0,0,0,0,0);
2456 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2457 &nfiles,&outputfiles);
2458 if( gmx_fio_close(fp) != 0)
2460 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2467 for(f=0; f<nfiles; f++)
2469 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2474 if (nexist == nfiles)
2476 bAppend = bAppendReq;
2478 else if (nexist > 0)
2481 "Output file appending has been requested,\n"
2482 "but some output files listed in the checkpoint file %s\n"
2483 "are not present or are named differently by the current program:\n",
2485 fprintf(stderr,"output files present:");
2486 for(f=0; f<nfiles; f++)
2488 if (exist_output_file(outputfiles[f].filename,
2491 fprintf(stderr," %s",outputfiles[f].filename);
2494 fprintf(stderr,"\n");
2495 fprintf(stderr,"output files not present or named differently:");
2496 for(f=0; f<nfiles; f++)
2498 if (!exist_output_file(outputfiles[f].filename,
2501 fprintf(stderr," %s",outputfiles[f].filename);
2504 fprintf(stderr,"\n");
2506 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2514 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2516 fn = outputfiles[0].filename;
2517 if (strlen(fn) < 4 ||
2518 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2520 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2522 /* Set bAddPart to whether the suffix string '.part' is present
2523 * in the log file name.
2525 strcpy(suf_up,part_suffix);
2527 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2528 strstr(fn,suf_up) != NULL);
2536 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2538 if (*simulation_part > 0 && bAppendReq)
2540 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2541 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2544 if (NULL != cpt_step)