1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
38 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
40 * \author Jochen Hub <jhub@gwdg.de>
58 #include "gmx_random.h"
65 //! longest file names allowed in input files
66 #define WHAM_MAXFILELEN 2048
69 * enum for energy units
71 enum { enSel, en_kJ, en_kCal, en_kT, enNr };
73 * enum for type of input files (pdos, tpr, or pullf)
75 enum { whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo };
77 /*! \brief enum for bootstrapping method
79 * These bootstrap methods are supported:
80 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
81 * (bsMethod_BayesianHist)
82 * - bootstrap complete histograms (bsMethod_hist)
83 * - bootstrap trajectories from given umbrella histograms. This generates new
84 * "synthetic" histograms (bsMethod_traj)
85 * - bootstrap trajectories from Gaussian with mu/sigma computed from
86 * the respective histogram (bsMethod_trajGauss). This gives very similar
87 * results compared to bsMethod_traj.
89 * ********************************************************************
90 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
91 * JS Hub, BL de Groot, D van der Spoel
92 * g_wham - A free weighted histogram analysis implementation including
93 * robust error and autocorrelation estimates,
94 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
95 * ********************************************************************
97 enum { bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
98 bsMethod_traj, bsMethod_trajGauss };
101 //! Parameters of the umbrella potentials
105 * \name Using umbrella pull code since gromacs 4.x
108 int npullgrps; //!< nr of pull groups in tpr file
109 int pull_geometry; //!< such as distance, position
110 ivec pull_dim; //!< pull dimension with geometry distance
111 int pull_ndim; //!< nr of pull_dim != 0
112 real *k; //!< force constants in tpr file
113 rvec *init_dist; //!< reference displacements
114 real *umbInitDist; //!< reference displacement in umbrella direction
117 * \name Using PDO files common until gromacs 3.x
125 char PullName[4][256];
127 double UmbCons[4][3];
131 //! Data in the umbrella histograms
134 int nPull; //!< nr of pull groups in this pdo or pullf/x file
135 double **Histo; //!< nPull histograms
136 double **cum; //!< nPull cumulative distribution functions
137 int nBin; //!< nr of bins. identical to opt->bins
138 double *k; //!< force constants for the nPull groups
139 double *pos; //!< umbrella positions for the nPull groups
140 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
141 int *N; //!< nr of data points in nPull histograms.
142 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
144 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
146 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
147 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
150 double *tau; //!< intetrated autocorrelation time (IACT)
151 double *tausmooth; //!< smoothed IACT
153 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
155 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
157 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
159 /*! \brief average force estimated from average displacement, fAv=dzAv*k
161 * Used for integration to guess the potential.
164 real *aver; //!< average of histograms
165 real *sigma; //!< stddev of histograms
166 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
169 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
172 int n; //!< total nr of pull groups in this tpr file
173 int nUse; //!< nr of pull groups used
174 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
177 //! Parameters of WHAM
184 const char *fnTpr,*fnPullf,*fnGroupsel;
185 const char *fnPdo,*fnPullx; //!< file names of input
186 gmx_bool bTpr,bPullf,bPdo,bPullx;//!< input file types given?
187 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
189 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
190 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
191 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
192 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
195 * \name Basic WHAM options
198 int bins; //!< nr of bins, min, max, and dz of profile
200 real Temperature,Tolerance; //!< temperature, converged when probability changes less than Tolerance
201 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
204 * \name Output control
207 gmx_bool bLog; //!< energy output (instead of probability) for profile
208 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
209 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
210 /*! \brief after wham, set prof to zero at this z-position.
211 * When bootstrapping, set zProf0 to a "stable" reference position.
214 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
216 gmx_bool bBoundsOnly,bHistOnly; //!< determine min and max, or write histograms and exit
217 gmx_bool bAuto; //!< determine min and max automatically but do not exit
219 gmx_bool verbose; //!< more noisy wham mode
220 int stepchange; //!< print maximum change in prof after how many interations
221 output_env_t oenv; //!< xvgr options
224 * \name Autocorrelation stuff
227 gmx_bool bTauIntGiven,bCalcTauInt; //!< IACT given or should be calculated?
228 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
229 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
230 real acTrestart; //!< when computing ACT, time between restarting points
232 /* \brief Enforce the same weight for each umbella window, that is
233 * calculate with the same number of data points for
234 * each window. That can be reasonable, if the histograms
235 * have different length, but due to autocorrelation,
236 * a longer simulation should not have larger weightin wham.
242 * \name Bootstrapping stuff
245 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
247 /* \brief bootstrap method
249 * if == bsMethod_hist, consider complete histograms as independent
250 * data points and, hence, only mix complete histograms.
251 * if == bsMethod_BayesianHist, consider complete histograms
252 * as independent data points, but assign random weights
253 * to the histograms during the bootstrapping ("Bayesian bootstrap")
254 * In case of long correlations (e.g., inside a channel), these
255 * will yield a more realistic error.
256 * if == bsMethod_traj(Gauss), generate synthetic histograms
258 * histogram by generating an autocorrelated random sequence
259 * that is distributed according to the respective given
260 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
261 * (instead of from the umbrella histogram) to generate a new
266 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
269 /* \brief when mixing histograms, mix only histograms withing blocks
270 long the reaction coordinate xi. Avoids gaps along xi. */
271 int histBootStrapBlockLength;
273 int bsSeed; //!< random seed for bootstrapping
275 /* \brief Write cumulative distribution functions (CDFs) of histograms
276 and write the generated histograms for each bootstrap */
280 * \name tabulated umbrella potential stuff
284 double *tabX,*tabY,tabMin,tabMax,tabDz;
287 gmx_rng_t rng; //!< gromacs random number generator
290 //! Make an umbrella window (may contain several histograms)
291 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
293 t_UmbrellaWindow *win;
296 for (i=0; i<nwin; i++)
298 win[i].Histo = win[i].cum = 0;
299 win[i].k = win[i].pos = win[i].z =0;
300 win[i].N = win[i].Ntot = 0;
301 win[i].g = win[i].tau = win[i].tausmooth = 0;
305 win[i].aver = win[i].sigma = 0;
311 //! Delete an umbrella window (may contain several histograms)
312 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
315 for (i=0; i<nwin; i++)
318 for (j=0;j<win[i].nPull;j++)
319 sfree(win[i].Histo[j]);
321 for (j=0;j<win[i].nPull;j++)
322 sfree(win[i].cum[j]);
324 for (j=0;j<win[i].nPull;j++)
325 sfree(win[i].bContrib[j]);
335 sfree(win[i].tausmooth);
336 sfree(win[i].bContrib);
338 sfree(win[i].forceAv);
341 sfree(win[i].bsWeight);
347 * Read and setup tabulated umbrella potential
349 void setup_tab(const char *fn,t_UmbrellaOptions *opt)
354 printf("Setting up tabulated potential from file %s\n",fn);
355 nl=read_xvg(fn,&y,&ny);
358 gmx_fatal(FARGS,"Found %d columns in %s. Expected 2.\n",ny,fn);
360 opt->tabMax=y[0][nl-1];
361 opt->tabDz=(opt->tabMax-opt->tabMin)/(nl-1);
363 gmx_fatal(FARGS,"The tabulated potential in %s must be provided in \n"
364 "ascending z-direction",fn);
366 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
367 gmx_fatal(FARGS,"z-values in %s are not equally spaced.\n",ny,fn);
372 opt->tabX[i]=y[0][i];
373 opt->tabY[i]=y[1][i];
375 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
376 opt->tabMin,opt->tabMax,opt->tabDz);
379 //! Read the header of an PDO file (position, force const, nr of groups)
380 void read_pdo_header(FILE * file,t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
383 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
385 std::istringstream ist;
388 if (fgets(line,2048,file) == NULL)
389 gmx_fatal(FARGS,"Error reading header from pdo file\n");
391 ist >> Buffer0 >> Buffer1 >> Buffer2;
392 if(strcmp(Buffer1,"UMBRELLA"))
393 gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s\n"
394 "(Found in first line: `%s')\n",
395 Buffer1, "UMBRELLA",line);
396 if(strcmp(Buffer2,"3.0"))
397 gmx_fatal(FARGS,"This does not appear to be a version 3.0 pdo file");
400 if (fgets(line,2048,file) == NULL)
401 gmx_fatal(FARGS,"Error reading header from pdo file\n");
403 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
404 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
406 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
408 gmx_fatal(FARGS,"Currently only supports one dimension");
411 if (fgets(line,2048,file) == NULL)
412 gmx_fatal(FARGS,"Error reading header from pdo file\n");
414 ist >> Buffer0 >> Buffer1 >> header->nSkip;
417 if (fgets(line,2048,file) == NULL)
418 gmx_fatal(FARGS,"Error reading header from pdo file\n");
420 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
423 if (fgets(line,2048,file) == NULL)
424 gmx_fatal(FARGS,"Error reading header from pdo file\n");
426 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
429 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
432 for(i=0;i<header->nPull;++i)
434 if (fgets(line,2048,file) == NULL)
435 gmx_fatal(FARGS,"Error reading header from pdo file\n");
437 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
438 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
439 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
442 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
443 i,header->PullName[i],header->UmbPos[i][0],header->UmbCons[i][0]);
446 if (fgets(line,2048,file) == NULL)
447 gmx_fatal(FARGS,"Cannot read from file\n");
450 if (strcmp(Buffer3,"#####") != 0)
451 gmx_fatal(FARGS,"Expected '#####', found %s. Hick.\n",Buffer3);
455 static char *fgets3(FILE *fp,char ptr[],int *len)
460 if (fgets(ptr,*len-1,fp) == NULL)
463 while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
465 /* This line is longer than len characters, let's increase len! */
469 if (fgets(p-1,STRLEN,fp) == NULL)
473 if (ptr[slen-1] == '\n')
479 /*! \brief Read the data columns of and PDO file.
481 * TO DO: Get rid of the scanf function to avoid the clang warning.
482 * At the moment, this warning is avoided by hiding the format string
483 * the variable fmtlf.
485 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
486 int fileno, t_UmbrellaWindow * win,
487 t_UmbrellaOptions *opt,
488 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
490 int i,inttemp,bins,count,ntot;
491 real min,max,minfound=1e20,maxfound=-1e20;
492 double temp,time,time0=0,dt;
494 t_UmbrellaWindow * window=0;
495 gmx_bool timeok,dt_ok=1;
496 char *tmpbuf=0,fmt[256],fmtign[256],fmtlf[5]="%lf";
497 int len=STRLEN,dstep=1;
498 const int blocklen=4096;
508 /* Need to alocate memory and set up structure */
509 window->nPull=header->nPull;
512 snew(window->Histo,window->nPull);
513 snew(window->z,window->nPull);
514 snew(window->k,window->nPull);
515 snew(window->pos,window->nPull);
516 snew(window->N, window->nPull);
517 snew(window->Ntot, window->nPull);
518 snew(window->g, window->nPull);
519 snew(window->bsWeight, window->nPull);
523 if (opt->bCalcTauInt)
524 snew(window->ztime, window->nPull);
527 snew(lennow,window->nPull);
529 for(i=0;i<window->nPull;++i)
532 window->bsWeight[i]=1.;
533 snew(window->Histo[i],bins);
534 window->k[i]=header->UmbCons[i][0];
535 window->pos[i]=header->UmbPos[i][0];
539 if (opt->bCalcTauInt)
543 /* Done with setup */
549 min=max=bins=0; /* Get rid of warnings */
554 while ( (ptr=fgets3(file,tmpbuf,&len)) != NULL)
558 if (ptr[0] == '#' || strlen(ptr)<2)
561 /* Initiate format string */
563 strcat(fmtign,"%*s");
565 sscanf(ptr,fmtlf,&time); /* printf("Time %f\n",time); */
566 /* Round time to fs */
567 time=1.0/1000*( static_cast<int> (time*1000+0.5) );
569 /* get time step of pdo file */
577 dstep=static_cast<int>(opt->dt/dt+0.5);
586 dt_ok=((count-1)%dstep == 0);
587 timeok=(dt_ok && time >= opt->tmin && time <= opt->tmax);
589 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
590 time,opt->tmin, opt->tmax, dt_ok,timeok); */
594 for(i=0;i<header->nPull;++i)
597 strcat(fmt,"%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
598 strcat(fmtign,"%*s"); /* ignoring one more entry in the next loop */
599 if(sscanf(ptr,fmt,&temp))
601 temp+=header->UmbPos[i][0];
610 if (opt->bCalcTauInt)
612 /* save time series for autocorrelation analysis */
613 ntot=window->Ntot[i];
617 srenew(window->ztime[i],lennow[i]);
619 window->ztime[i][ntot]=temp;
627 inttemp = static_cast<int> (temp);
632 else if (inttemp >= bins)
636 if(inttemp >= 0 && inttemp < bins)
638 window->Histo[i][inttemp]+=1.;
649 printf("time %f larger than tmax %f, stop reading pdo file\n",time,opt->tmax);
664 /*! \brief Set identical weights for all histograms
666 * Normally, the weight is given by the number data points in each
667 * histogram, together with the autocorrelation time. This can be overwritten
668 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
669 * an appropriate autocorrelation times instead of using this function.
671 void enforceEqualWeights(t_UmbrellaWindow * window,int nWindows)
676 NEnforced=window[0].Ntot[0];
677 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
678 "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
679 /* enforce all histograms to have the same weight as the very first histogram */
681 for(j=0;j<nWindows;++j)
683 for(k=0;k<window[j].nPull;++k)
685 ratio=1.0*NEnforced/window[j].Ntot[k];
686 for(i=0;i<window[0].nBin;++i)
688 window[j].Histo[k][i]*=ratio;
690 window[j].N[k]=static_cast<int>(ratio*window[j].N[k] + 0.5);
695 /*! \brief Simple linear interpolation between two given tabulated points
697 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
702 jl=static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
704 if (jl<0 || ju>=opt->tabNbins)
706 gmx_fatal(FARGS,"Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
707 "Provide an extended table.",dist,jl,ju);
711 dz=dist-opt->tabX[jl];
712 dp=(pu-pl)*dz/opt->tabDz;
718 * Check which bins substiantially contribute (accelerates WHAM)
720 * Don't worry, that routine does not mean we compute the PMF in limited precision.
721 * After rapid convergence (using only substiantal contributions), we always switch to
724 void setup_acc_wham(double *profile,t_UmbrellaWindow * window,int nWindows,
725 t_UmbrellaOptions *opt)
727 int i,j,k,nGrptot=0,nContrib=0,nTot=0;
728 double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib1,contrib2;
729 gmx_bool bAnyContrib;
731 static double wham_contrib_lim;
735 for(i=0;i<nWindows;++i)
737 nGrptot+=window[i].nPull;
739 wham_contrib_lim=opt->Tolerance/nGrptot;
742 ztot=opt->max-opt->min;
745 for(i=0;i<nWindows;++i)
747 if ( ! window[i].bContrib)
749 snew(window[i].bContrib,window[i].nPull);
751 for(j=0;j<window[i].nPull;++j)
753 if ( ! window[i].bContrib[j])
754 snew(window[i].bContrib[j],opt->bins);
756 for(k=0;k<opt->bins;++k)
758 temp=(1.0*k+0.5)*dz+min;
759 distance = temp - window[i].pos[j]; /* distance to umbrella center */
761 { /* in cyclic wham: */
762 if (distance > ztot_half) /* |distance| < ztot_half */
764 else if (distance < -ztot_half)
767 /* Note: there are two contributions to bin k in the wham equations:
768 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
769 ii) exp(- U/(8.314e-3*opt->Temperature))
770 where U is the umbrella potential
771 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
775 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
777 U=tabulated_pot(distance,opt); /* Use tabulated potential */
779 contrib1=profile[k]*exp(- U/(8.314e-3*opt->Temperature));
780 contrib2=window[i].N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j]);
781 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
782 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
783 if (window[i].bContrib[j][k])
787 /* If this histo is far outside min and max all bContrib may be FALSE,
788 causing a floating point exception later on. To avoid that, switch
791 for(k=0;k<opt->bins;++k)
792 window[i].bContrib[j][k]=TRUE;
796 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
797 "Evaluating only %d of %d expressions.\n\n",wham_contrib_lim,nContrib, nTot);
800 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
805 //! Compute the PMF (one of the two main WHAM routines)
806 void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows,
807 t_UmbrellaOptions *opt, gmx_bool bExact)
810 double num,ztot_half,ztot,distance,min=opt->min,dz=opt->dz;
811 double denom,U=0,temp=0,invg;
813 ztot=opt->max-opt->min;
816 for(i=0;i<opt->bins;++i)
819 for(j=0;j<nWindows;++j)
821 for(k=0;k<window[j].nPull;++k)
823 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
824 temp = (1.0*i+0.5)*dz+min;
825 num += invg*window[j].Histo[k][i];
827 if (! (bExact || window[j].bContrib[k][i]))
829 distance = temp - window[j].pos[k]; /* distance to umbrella center */
831 { /* in cyclic wham: */
832 if (distance > ztot_half) /* |distance| < ztot_half */
834 else if (distance < -ztot_half)
839 U=0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
841 U=tabulated_pot(distance,opt); /* Use tabulated potential */
842 denom+=invg*window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
845 profile[i]=num/denom;
849 //! Compute the free energy offsets z (one of the two main WHAM routines)
850 double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows,
851 t_UmbrellaOptions *opt, gmx_bool bExact)
854 double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot;
855 double MAX=-1e20, total=0;
857 ztot=opt->max-opt->min;
860 for(i=0;i<nWindows;++i)
862 for(j=0;j<window[i].nPull;++j)
865 for(k=0;k<window[i].nBin;++k)
867 if (! (bExact || window[i].bContrib[j][k]))
869 temp=(1.0*k+0.5)*dz+min;
870 distance = temp - window[i].pos[j]; /* distance to umbrella center */
872 { /* in cyclic wham: */
873 if (distance > ztot_half) /* |distance| < ztot_half */
875 else if (distance < -ztot_half)
880 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
882 U=tabulated_pot(distance,opt); /* Use tabulated potential */
884 total+=profile[k]*exp(-U/(8.314e-3*opt->Temperature));
886 /* Avoid floating point exception if window is far outside min and max */
891 temp = fabs(total - window[i].z[j]);
895 window[i].z[j] = total;
901 //! Make PMF symmetric around 0 (useful e.g. for membranes)
902 void symmetrizeProfile(double* profile,t_UmbrellaOptions *opt)
904 int i,j,bins=opt->bins;
905 double *prof2,min=opt->min,max=opt->max,dz=opt->dz,zsym,deltaz,profsym;
908 if (min>0. || max<0.)
909 gmx_fatal(FARGS,"Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
918 /* bin left of zsym */
919 j=static_cast<int> (floor((zsym-min)/dz-0.5));
920 if (j>=0 && (j+1)<bins)
922 /* interpolate profile linearly between bins j and j+1 */
925 profsym=profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
926 /* average between left and right */
927 prof2[i]=0.5*(profsym+profile[i]);
935 memcpy(profile,prof2,bins*sizeof(double));
939 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
940 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
943 double unit_factor=1., R_MolarGasConst, diff;
945 R_MolarGasConst=8.314472e-3; /* in kJ/(mol*K) */
948 /* Not log? Nothing to do! */
952 /* Get profile in units of kT, kJ/mol, or kCal/mol */
953 if (opt->unit == en_kT)
955 else if (opt->unit == en_kJ)
956 unit_factor=R_MolarGasConst*opt->Temperature;
957 else if (opt->unit == en_kCal)
958 unit_factor=R_MolarGasConst*opt->Temperature/4.1868;
960 gmx_fatal(FARGS,"Sorry, I don't know this energy unit.");
964 profile[i]=-log(profile[i])*unit_factor;
966 /* shift to zero at z=opt->zProf0 */
973 /* Get bin with shortest distance to opt->zProf0
974 (-0.5 from bin position and +0.5 from rounding cancel) */
975 imin=static_cast<int>((opt->zProf0-opt->min)/opt->dz);
988 //! Make an array of random integers (used for bootstrapping)
989 void getRandomIntArray(int nPull,int blockLength,int* randomArray,gmx_rng_t rng)
991 int ipull,blockBase,nr,ipullRandom;
996 for (ipull=0; ipull<nPull; ipull++)
998 blockBase=(ipull/blockLength)*blockLength;
1000 { /* make sure nothing bad happens in the last block */
1001 nr=static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1002 ipullRandom = blockBase + nr;
1003 } while (ipullRandom >= nPull);
1004 if (ipullRandom<0 || ipullRandom>=nPull)
1005 gmx_fatal(FARGS,"Ups, random iWin = %d, nPull = %d, nr = %d, "
1006 "blockLength = %d, blockBase = %d\n",
1007 ipullRandom,nPull,nr,blockLength,blockBase);
1008 randomArray[ipull]=ipullRandom;
1010 /*for (ipull=0; ipull<nPull; ipull++)
1011 printf("%d ",randomArray[ipull]); printf("\n"); */
1014 /*! \brief Set pull group information of a synthetic histogram
1016 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1017 * but it is not required if we bootstrap complete histograms.
1019 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1020 t_UmbrellaWindow *thisWindow,int pullid)
1022 synthWindow->N [0]=thisWindow->N [pullid];
1023 synthWindow->Histo [0]=thisWindow->Histo [pullid];
1024 synthWindow->pos [0]=thisWindow->pos [pullid];
1025 synthWindow->z [0]=thisWindow->z [pullid];
1026 synthWindow->k [0]=thisWindow->k [pullid];
1027 synthWindow->bContrib[0]=thisWindow->bContrib [pullid];
1028 synthWindow->g [0]=thisWindow->g [pullid];
1029 synthWindow->bsWeight[0]=thisWindow->bsWeight [pullid];
1032 /*! \brief Calculate cumulative distribution function of of all histograms.
1034 * This allow to create random number sequences
1035 * which are distributed according to the histograms. Required to generate
1036 * the "synthetic" histograms for the Bootstrap method
1038 void calc_cumulatives(t_UmbrellaWindow *window,int nWindows,
1039 t_UmbrellaOptions *opt,const char *fnhist)
1046 if (opt->bs_verbose)
1048 snew(fn,strlen(fnhist)+10);
1049 snew(buf,strlen(fnhist)+10);
1050 sprintf(fn,"%s_cumul.xvg",strncpy(buf,fnhist,strlen(fnhist)-4));
1051 fp=xvgropen(fn,"CDFs of umbrella windows","z","CDF",opt->oenv);
1055 for (i=0; i<nWindows; i++)
1057 snew(window[i].cum,window[i].nPull);
1058 for (j=0; j<window[i].nPull; j++)
1060 snew(window[i].cum[j],nbin+1);
1061 window[i].cum[j][0]=0.;
1062 for (k=1; k<=nbin; k++)
1063 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1065 /* normalize CDFs. Ensure cum[nbin]==1 */
1066 last = window[i].cum[j][nbin];
1067 for (k=0; k<=nbin; k++)
1068 window[i].cum[j][k] /= last;
1072 printf("Cumulative distriubtion functions of all histograms created.\n");
1073 if (opt->bs_verbose)
1075 for (k=0; k<=nbin; k++)
1077 fprintf(fp,"%g\t",opt->min+k*opt->dz);
1078 for (i=0; i<nWindows; i++)
1079 for (j=0; j<window[i].nPull; j++)
1080 fprintf(fp,"%g\t",window[i].cum[j][k]);
1083 printf("Wrote cumulative distribution functions to %s\n",fn);
1091 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1093 * This is used to generate a random sequence distributed according to a histogram
1095 void searchCumulative(double xx[], int n, double x, int *j)
1111 else if (x==xx[n-1])
1117 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1118 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1119 int pullid,t_UmbrellaOptions *opt)
1121 int N,i,nbins,r_index,ibin;
1122 double r,tausteps=0.0,a,ap,dt,x,invsqrt2,g,y,sig=0.,z,mu=0.;
1125 N=thisWindow->N[pullid];
1127 nbins=thisWindow->nBin;
1129 /* tau = autocorrelation time */
1130 if (opt->tauBootStrap>0.0)
1131 tausteps=opt->tauBootStrap/dt;
1132 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1134 /* calc tausteps from g=1+2tausteps */
1135 g=thisWindow->g[pullid];
1141 "When generating hypothetical trajctories from given umbrella histograms,\n"
1142 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1143 "cannot be predicted. You have 3 options:\n"
1144 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1145 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1147 " with option -iiact for all umbrella windows.\n"
1148 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1149 " Use option (3) only if you are sure what you're doing, you may severely\n"
1150 " underestimate the error if a too small ACT is given.\n");
1151 gmx_fatal(FARGS,errstr);
1154 synthWindow->N [0]=N;
1155 synthWindow->pos [0]=thisWindow->pos[pullid];
1156 synthWindow->z [0]=thisWindow->z[pullid];
1157 synthWindow->k [0]=thisWindow->k[pullid];
1158 synthWindow->bContrib[0]=thisWindow->bContrib[pullid];
1159 synthWindow->g [0]=thisWindow->g [pullid];
1160 synthWindow->bsWeight[0]=thisWindow->bsWeight[pullid];
1162 for (i=0;i<nbins;i++)
1163 synthWindow->Histo[0][i]=0.;
1165 if (opt->bsMethod==bsMethod_trajGauss)
1167 sig = thisWindow->sigma [pullid];
1168 mu = thisWindow->aver [pullid];
1171 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1173 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1175 z = a*x + sqrt(1-a^2)*y
1176 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1177 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1179 C(t) = exp(-t/tau) with tau=-1/ln(a)
1181 Then, use error function to turn the Gaussian random variable into a uniformly
1182 distributed one in [0,1]. Eventually, use cumulative distribution function of
1183 histogram to get random variables distributed according to histogram.
1184 Note: The ACT of the flat distribution and of the generated histogram is not
1185 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1187 a=exp(-1.0/tausteps);
1189 invsqrt2=1./sqrt(2.0);
1191 /* init random sequence */
1192 x=gmx_rng_gaussian_table(opt->rng);
1194 if (opt->bsMethod==bsMethod_traj)
1196 /* bootstrap points from the umbrella histograms */
1199 y=gmx_rng_gaussian_table(opt->rng);
1201 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1202 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1204 r=0.5*(1+gmx_erf(x*invsqrt2));
1205 searchCumulative(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
1206 synthWindow->Histo[0][r_index]+=1.;
1209 else if (opt->bsMethod==bsMethod_trajGauss)
1211 /* bootstrap points from a Gaussian with the same average and sigma
1212 as the respective umbrella histogram. The idea was, that -given
1213 limited sampling- the bootstrapped histograms are otherwise biased
1214 from the limited sampling of the US histos. However, bootstrapping from
1215 the Gaussian seems to yield a similar estimate. */
1219 y=gmx_rng_gaussian_table(opt->rng);
1222 ibin=static_cast<int> (floor((z-opt->min)/opt->dz));
1226 while ( (ibin+=nbins) < 0);
1227 else if (ibin>=nbins)
1228 while ( (ibin-=nbins) >= nbins);
1231 if (ibin>=0 && ibin<nbins)
1233 synthWindow->Histo[0][ibin]+=1.;
1240 gmx_fatal(FARGS,"Unknown bsMethod (id %d). That should not happen.\n",opt->bsMethod);
1244 /*! \brief Write all histograms to a file
1246 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1247 * sets of bootstrapped histograms.
1249 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1250 int bs_index,t_UmbrellaOptions *opt)
1252 char *fn=0,*buf=0,title[256];
1258 snew(fn,strlen(fnhist)+10);
1259 snew(buf,strlen(fnhist)+1);
1260 sprintf(fn,"%s_bs%d.xvg",strncpy(buf,fnhist,strlen(fnhist)-4),bs_index);
1261 sprintf(title,"Umbrella histograms. Bootstrap #%d",bs_index);
1266 strcpy(title,"Umbrella histograms");
1269 fp=xvgropen(fn,title,"z","count",opt->oenv);
1272 /* Write histograms */
1275 fprintf(fp,"%e\t",(double)(l+0.5)*opt->dz+opt->min);
1276 for(i=0;i<nWindows;++i)
1278 for(j=0;j<window[i].nPull;++j)
1280 fprintf(fp,"%e\t",window[i].Histo[j][l]);
1287 printf("Wrote %s\n",fn);
1295 //! Used for qsort to sort random numbers
1296 int func_wham_is_larger(const void *a, const void *b)
1309 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1310 void setRandomBsWeights(t_UmbrellaWindow *synthwin,int nAllPull, t_UmbrellaOptions *opt)
1317 /* generate ordered random numbers between 0 and nAllPull */
1318 for (i=0; i<nAllPull-1; i++)
1320 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1322 qsort((void *)r,nAllPull-1, sizeof(double), &func_wham_is_larger);
1323 r[nAllPull-1]=1.0*nAllPull;
1325 synthwin[0].bsWeight[0]=r[0];
1326 for (i=1; i<nAllPull; i++)
1328 synthwin[i].bsWeight[0]=r[i]-r[i-1];
1331 /* avoid to have zero weight by adding a tiny value */
1332 for (i=0; i<nAllPull; i++)
1333 if (synthwin[i].bsWeight[0] < 1e-5)
1334 synthwin[i].bsWeight[0] = 1e-5;
1339 //! The main bootstrapping routine
1340 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1341 char* ylabel, double *profile,
1342 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1344 t_UmbrellaWindow * synthWindow;
1345 double *bsProfile,*bsProfiles_av, *bsProfiles_av2,maxchange=1e20,tmp,stddev;
1346 int i,j,*randomArray=0,winid,pullid,ib;
1347 int iAllPull,nAllPull,*allPull_winId,*allPull_pullId;
1349 gmx_bool bExact=FALSE;
1351 /* init random generator */
1352 if (opt->bsSeed==-1)
1353 opt->rng=gmx_rng_init(gmx_rng_make_seed());
1355 opt->rng=gmx_rng_init(opt->bsSeed);
1357 snew(bsProfile, opt->bins);
1358 snew(bsProfiles_av, opt->bins);
1359 snew(bsProfiles_av2,opt->bins);
1361 /* Create array of all pull groups. Note that different windows
1362 may have different nr of pull groups
1363 First: Get total nr of pull groups */
1365 for (i=0;i<nWindows;i++)
1366 nAllPull+=window[i].nPull;
1367 snew(allPull_winId,nAllPull);
1368 snew(allPull_pullId,nAllPull);
1370 /* Setup one array of all pull groups */
1371 for (i=0;i<nWindows;i++)
1373 for (j=0;j<window[i].nPull;j++)
1375 allPull_winId[iAllPull]=i;
1376 allPull_pullId[iAllPull]=j;
1381 /* setup stuff for synthetic windows */
1382 snew(synthWindow,nAllPull);
1383 for (i=0;i<nAllPull;i++)
1385 synthWindow[i].nPull=1;
1386 synthWindow[i].nBin=opt->bins;
1387 snew(synthWindow[i].Histo,1);
1388 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1389 snew(synthWindow[i].Histo[0],opt->bins);
1390 snew(synthWindow[i].N,1);
1391 snew(synthWindow[i].pos,1);
1392 snew(synthWindow[i].z,1);
1393 snew(synthWindow[i].k,1);
1394 snew(synthWindow[i].bContrib,1);
1395 snew(synthWindow[i].g,1);
1396 snew(synthWindow[i].bsWeight,1);
1399 switch(opt->bsMethod)
1402 snew(randomArray,nAllPull);
1403 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1404 please_cite(stdout,"Hub2006");
1406 case bsMethod_BayesianHist :
1407 /* just copy all histogams into synthWindow array */
1408 for (i=0;i<nAllPull;i++)
1410 winid =allPull_winId [i];
1411 pullid=allPull_pullId[i];
1412 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1416 case bsMethod_trajGauss:
1417 calc_cumulatives(window,nWindows,opt,fnhist);
1420 gmx_fatal(FARGS,"Unknown bootstrap method. That should not have happened.\n");
1423 /* do bootstrapping */
1424 fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,opt->oenv);
1425 for (ib=0;ib<opt->nBootStrap;ib++)
1427 printf(" *******************************************\n"
1428 " ******** Start bootstrap nr %d ************\n"
1429 " *******************************************\n",ib+1);
1431 switch(opt->bsMethod)
1434 /* bootstrap complete histograms from given histograms */
1435 getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray,opt->rng);
1436 for (i=0;i<nAllPull;i++){
1437 winid =allPull_winId [randomArray[i]];
1438 pullid=allPull_pullId[randomArray[i]];
1439 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1442 case bsMethod_BayesianHist:
1443 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1444 setRandomBsWeights(synthWindow,nAllPull,opt);
1447 case bsMethod_trajGauss:
1448 /* create new histos from given histos, that is generate new hypothetical
1450 for (i=0;i<nAllPull;i++)
1452 winid=allPull_winId[i];
1453 pullid=allPull_pullId[i];
1454 create_synthetic_histo(synthWindow+i,window+winid,pullid,opt);
1459 /* write histos in case of verbose output */
1460 if (opt->bs_verbose)
1461 print_histograms(fnhist,synthWindow,nAllPull,ib,opt);
1467 memcpy(bsProfile,profile,opt->bins*sizeof(double)); /* use profile as guess */
1470 if ( (i%opt->stepUpdateContrib) == 0)
1471 setup_acc_wham(bsProfile,synthWindow,nAllPull,opt);
1472 if (maxchange<opt->Tolerance)
1474 if (((i%opt->stepchange) == 0 || i==1) && !i==0)
1475 printf("\t%4d) Maximum change %e\n",i,maxchange);
1476 calc_profile(bsProfile,synthWindow,nAllPull,opt,bExact);
1478 } while( (maxchange=calc_z(bsProfile, synthWindow, nAllPull, opt,bExact)) > opt->Tolerance || !bExact);
1479 printf("\tConverged in %d iterations. Final maximum change %g\n",i,maxchange);
1482 prof_normalization_and_unit(bsProfile,opt);
1484 /* symmetrize profile around z=0 */
1486 symmetrizeProfile(bsProfile,opt);
1488 /* save stuff to get average and stddev */
1489 for (i=0;i<opt->bins;i++)
1492 bsProfiles_av[i]+=tmp;
1493 bsProfiles_av2[i]+=tmp*tmp;
1494 fprintf(fp,"%e\t%e\n",(i+0.5)*opt->dz+opt->min,tmp);
1500 /* write average and stddev */
1501 fp=xvgropen(fnres,"Average and stddev from bootstrapping","z",ylabel,opt->oenv);
1502 fprintf(fp,"@TYPE xydy\n");
1503 for (i=0;i<opt->bins;i++)
1505 bsProfiles_av [i]/=opt->nBootStrap;
1506 bsProfiles_av2[i]/=opt->nBootStrap;
1507 tmp=bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1508 stddev=(tmp>=0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1509 fprintf(fp,"%e\t%e\t%e\n",(i+0.5)*opt->dz+opt->min,bsProfiles_av [i],stddev);
1512 printf("Wrote boot strap result to %s\n",fnres);
1515 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1516 int whaminFileType(char *fn)
1520 if (strcmp(fn+len-3,"tpr")==0)
1522 else if (strcmp(fn+len-3,"xvg")==0 || strcmp(fn+len-6,"xvg.gz")==0)
1523 return whamin_pullxf;
1524 else if (strcmp(fn+len-3,"pdo")==0 || strcmp(fn+len-6,"pdo.gz")==0)
1527 gmx_fatal(FARGS,"Unknown file type of %s. Should be tpr, xvg, or pdo.\n",fn);
1528 return whamin_unknown;
1531 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1532 void read_wham_in(const char *fn,char ***filenamesRet, int *nfilesRet,
1533 t_UmbrellaOptions *opt)
1535 char **filename=0,tmp[WHAM_MAXFILELEN+2];
1536 int nread,sizenow,i,block=1;
1542 while (fgets(tmp,sizeof(tmp),fp) != NULL)
1544 if (strlen(tmp)>=WHAM_MAXFILELEN)
1545 gmx_fatal(FARGS,"Filename too long in %s. Only %d characters allowed.\n",fn,WHAM_MAXFILELEN);
1549 srenew(filename,sizenow);
1550 for (i=sizenow-block;i<sizenow;i++)
1551 snew(filename[i],WHAM_MAXFILELEN);
1553 /* remove newline if there is one */
1554 if (tmp[strlen(tmp)-1] == '\n')
1556 tmp[strlen(tmp)-1]='\0';
1558 strcpy(filename[nread],tmp);
1560 printf("Found file %s in %s\n",filename[nread],fn);
1563 *filenamesRet=filename;
1567 //! Open a file or a pipe to a gzipped file
1568 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt,gmx_bool *bPipeOpen)
1570 char Buffer[1024],gunzip[1024],*Path=0;
1572 static gmx_bool bFirst=1;
1574 /* gzipped pdo file? */
1575 if ((strcmp(fn+strlen(fn)-3,".gz")==0))
1577 /* search gunzip executable */
1578 if(!(Path=getenv("GMX_PATH_GZIP")))
1580 if (gmx_fexist("/bin/gunzip"))
1581 sprintf(gunzip,"%s","/bin/gunzip");
1582 else if (gmx_fexist("/usr/bin/gunzip"))
1583 sprintf(gunzip,"%s","/usr/bin/gunzip");
1585 gmx_fatal(FARGS,"Cannot find executable gunzip in /bin or /usr/bin.\n"
1586 "You may want to define the path to gunzip "
1587 "with the environment variable GMX_PATH_GZIP.",gunzip);
1591 sprintf(gunzip,"%s/gunzip",Path);
1592 if (!gmx_fexist(gunzip))
1593 gmx_fatal(FARGS,"Cannot find executable %s. Please define the path to gunzip"
1594 " in the environmental varialbe GMX_PATH_GZIP.",gunzip);
1598 printf("Using gunzig executable %s\n",gunzip);
1601 if (!gmx_fexist(fn))
1603 gmx_fatal(FARGS,"File %s does not exist.\n",fn);
1605 sprintf(Buffer,"%s -c < %s",gunzip,fn);
1607 printf("Executing command '%s'\n",Buffer);
1609 if((pipe=popen(Buffer,"r"))==NULL)
1611 gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
1614 gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
1619 pipe=ffopen(fn,"r");
1626 //! Close file or pipe
1627 void pdo_close_file(FILE *fp)
1636 //! Reading all pdo files
1637 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1638 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1641 real mintmp,maxtmp,done=0.;
1644 /* char Buffer0[1000]; */
1647 gmx_fatal(FARGS,"No files found. Hick.");
1649 /* if min and max are not given, get min and max from the input files */
1652 printf("Automatic determination of boundaries from %d pdo files...\n",nfiles);
1655 for(i=0;i<nfiles;++i)
1657 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1658 /*fgets(Buffer0,999,file);
1659 fprintf(stderr,"First line '%s'\n",Buffer0); */
1660 done=100.0*(i+1)/nfiles;
1661 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1664 read_pdo_header(file,header,opt);
1665 /* here only determine min and max of this window */
1666 read_pdo_data(file,header,i,NULL,opt,TRUE,&mintmp,&maxtmp);
1667 if (maxtmp>opt->max)
1669 if (mintmp<opt->min)
1672 pdo_close_file(file);
1677 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
1678 if (opt->bBoundsOnly)
1680 printf("Found option -boundsonly, now exiting.\n");
1684 /* store stepsize in profile */
1685 opt->dz=(opt->max-opt->min)/opt->bins;
1687 /* Having min and max, we read in all files */
1688 /* Loop over all files */
1689 for(i=0;i<nfiles;++i)
1691 done=100.0*(i+1)/nfiles;
1692 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1695 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1696 read_pdo_header(file,header,opt);
1697 /* load data into window */
1698 read_pdo_data(file,header,i,window,opt,FALSE,NULL,NULL);
1699 if ((window+i)->Ntot[0] == 0)
1700 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1702 pdo_close_file(file);
1707 for(i=0;i<nfiles;++i)
1712 //! translate 0/1 to N/Y to write pull dimensions
1713 #define int2YN(a) (((a)==0)?("N"):("Y"))
1715 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1716 void read_tpr_header(const char *fn,t_UmbrellaHeader* header,t_UmbrellaOptions *opt)
1723 /* printf("Reading %s \n",fn); */
1724 read_tpx_state(fn,&ir,&state,NULL,NULL);
1726 if (ir.ePull != epullUMBRELLA)
1727 gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1728 " (ir.ePull = %d)\n", epull_names[ir.ePull],ir.ePull);
1730 /* nr of pull groups */
1733 gmx_fatal(FARGS,"This is not a tpr of umbrella simulation. Found only %d pull groups\n",ngrp);
1735 header->npullgrps=ir.pull->ngrp;
1736 header->pull_geometry=ir.pull->eGeom;
1737 copy_ivec(ir.pull->dim,header->pull_dim);
1738 header->pull_ndim=header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
1739 if (header->pull_geometry==epullgPOS && header->pull_ndim>1)
1741 gmx_fatal(FARGS,"Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
1742 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
1743 "If you have some special umbrella setup you may want to write your own pdo files\n"
1744 "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
1746 snew(header->k,ngrp);
1747 snew(header->init_dist,ngrp);
1748 snew(header->umbInitDist,ngrp);
1750 /* only z-direction with epullgCYL? */
1751 if (header->pull_geometry == epullgCYL)
1753 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
1754 gmx_fatal(FARGS,"With pull geometry 'cylinder', expected pulling in Z direction only.\n"
1755 "However, found dimensions [%s %s %s]\n",
1756 int2YN(header->pull_dim[XX]),int2YN(header->pull_dim[YY]),
1757 int2YN(header->pull_dim[ZZ]));
1760 for (i=0;i<ngrp;i++)
1762 header->k[i]=ir.pull->grp[i+1].k;
1763 if (header->k[i]==0.0)
1764 gmx_fatal(FARGS,"Pull group %d has force constant of of 0.0 in %s.\n"
1765 "That doesn't seem to be an Umbrella tpr.\n",
1767 copy_rvec(ir.pull->grp[i+1].init,header->init_dist[i]);
1769 /* initial distance to reference */
1770 switch(header->pull_geometry)
1774 if (header->pull_dim[d])
1775 header->umbInitDist[i]=header->init_dist[i][d];
1778 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
1782 header->umbInitDist[i]=header->init_dist[i][0];
1785 gmx_fatal(FARGS,"Pull geometry %s not supported\n",epullg_names[header->pull_geometry]);
1789 if (opt->verbose || first)
1791 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
1792 fn,header->npullgrps,epullg_names[header->pull_geometry],
1793 int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
1795 for (i=0;i<ngrp;i++)
1796 printf("\tgrp %d) k = %-5g position = %g\n",i,header->k[i],header->umbInitDist[i]);
1798 if (!opt->verbose && first)
1799 printf("\tUse option -v to see this output for all input tpr files\n");
1804 //! 2-norm in a ndim-dimensional space
1805 double dist_ndim(double **dx,int ndim,int line)
1809 for (i=0;i<ndim;i++)
1810 r2+=sqr(dx[i][line]);
1814 //! Read pullx.xvg or pullf.xvg
1815 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
1816 t_UmbrellaWindow * window,
1817 t_UmbrellaOptions *opt,
1818 gmx_bool bGetMinMax,real *mintmp,real *maxtmp,
1819 t_groupselection *groupsel)
1821 double **y=0,pos=0.,t,force,time0=0.,dt;
1822 int ny,nt,bins,ibin,i,g,gUsed,dstep=1,nColPerGrp,nColRefOnce,nColRefEachGrp,nColExpect,ntot;
1823 real min,max,minfound=1e20,maxfound=-1e20;
1824 gmx_bool dt_ok,timeok,bHaveForce;
1825 const char *quantity;
1826 const int blocklen=4096;
1828 static gmx_bool bFirst=TRUE;
1831 in force output pullf.xvg:
1832 No reference, one column per pull group
1833 in position output pullx.xvg (not cylinder)
1834 ndim reference, ndim columns per pull group
1835 in position output pullx.xvg (in geometry cylinder):
1836 ndim*2 columns per pull group (ndim for ref, ndim for group)
1839 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
1840 quantity = opt->bPullx ? "position" : "force";
1844 if (header->pull_geometry == epullgCYL)
1846 /* Geometry cylinder -> reference group before each pull group */
1847 nColRefEachGrp=header->pull_ndim;
1852 /* Geometry NOT cylinder -> reference group only once after time column */
1854 nColRefOnce=header->pull_ndim;
1857 else /* read forces, no reference groups */
1863 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
1864 bHaveForce = opt->bPullf;
1866 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
1867 That avoids the somewhat tedious extraction of the right columns from the pullx files
1868 to compute the distances projection on the vector. Sorry for the laziness. */
1869 if ( (header->pull_geometry==epullgDIR || header->pull_geometry==epullgDIRPBC)
1872 gmx_fatal(FARGS,"With pull geometries \"direction\" and \"direction_periodic\", only pull force "
1873 "reading \n(option -if) is supported at present, "
1874 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
1875 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
1876 epullg_names[header->pull_geometry]);
1879 nt=read_xvg(fn,&y,&ny);
1881 /* Check consistency */
1884 gmx_fatal(FARGS,"Empty pull %s file %s\n",quantity,fn);
1888 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
1889 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
1891 printf("Expecting these columns in pull file:\n"
1892 "\t%d reference columns for all pull groups together\n"
1893 "\t%d reference columns for each individual pull group\n"
1894 "\t%d data columns for each pull group\n", nColRefOnce, nColRefEachGrp, nColPerGrp);
1895 printf("With %d pull groups, expect %d columns (including the time column)\n",header->npullgrps,nColExpect);
1898 if (ny != nColExpect)
1900 gmx_fatal(FARGS,"Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n"
1901 "\nMaybe you confused options -ix and -if ?\n",
1902 header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
1906 printf("Found %d times and %d %s sets %s\n",nt,(ny-1)/nColPerGrp,quantity,fn);
1915 window->dt=y[0][1]-y[0][0];
1917 else if (opt->nBootStrap && opt->tauBootStrap!=0.0)
1919 fprintf(stderr,"\n *** WARNING, Could not determine time step in %s\n",fn);
1922 /* Need to alocate memory and set up structure */
1926 /* Use only groups selected with option -is file */
1927 if (header->npullgrps != groupsel->n)
1928 gmx_fatal(FARGS,"tpr file contains %d pull groups, but expected %d from group selection file\n",
1929 header->npullgrps,groupsel->n);
1930 window->nPull = groupsel->nUse;
1934 window->nPull = header->npullgrps;
1938 snew(window->Histo,window->nPull);
1939 snew(window->z,window->nPull);
1940 snew(window->k,window->nPull);
1941 snew(window->pos,window->nPull);
1942 snew(window->N, window->nPull);
1943 snew(window->Ntot, window->nPull);
1944 snew(window->g, window->nPull);
1945 snew(window->bsWeight, window->nPull);
1948 if (opt->bCalcTauInt)
1949 snew(window->ztime,window->nPull);
1952 snew(lennow,window->nPull);
1954 for(g=0;g<window->nPull;++g)
1957 window->bsWeight[g]=1.;
1958 snew(window->Histo[g],bins);
1962 if (opt->bCalcTauInt)
1963 window->ztime[g]=NULL;
1966 /* Copying umbrella center and force const is more involved since not
1967 all pull groups from header (tpr file) may be used in window variable */
1968 for(g=0, gUsed=0 ;g<header->npullgrps; ++g)
1970 if (groupsel && (groupsel->bUse[g] == FALSE))
1972 window->k[gUsed]=header->k[g];
1973 window->pos[gUsed]=header->umbInitDist[g];
1978 { /* only determine min and max */
1981 min=max=bins=0; /* Get rid of warnings */
1987 /* Do you want that time frame? */
1988 t=1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
1990 /* get time step of pdo file and get dstep from opt->dt */
2000 dstep=static_cast<int>(opt->dt/dt+0.5);
2005 window->dt=dt*dstep;
2008 dt_ok=(i%dstep == 0);
2009 timeok=(dt_ok && t >= opt->tmin && t <= opt->tmax);
2011 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2012 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2016 /* Note: if groupsel == NULL:
2017 * all groups in pullf/x file are stored in this window, and gUsed == g
2018 * if groupsel != NULL:
2019 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2022 for(g=0;g<header->npullgrps;++g)
2024 /* was this group selected for application in WHAM? */
2025 if (groupsel && (groupsel->bUse[g] == FALSE))
2034 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
2036 pos= -force/header->k[g] + header->umbInitDist[g];
2040 switch (header->pull_geometry)
2043 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
2044 Distance to reference: */
2045 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
2046 pos=dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp,header->pull_ndim,i);
2050 Time ref[ndim] group1[ndim] group2[ndim] ... */
2053 Time ref1[ndim] group1[ndim] ref2[ndim] group2[ndim] ... */
2055 /* * with geometry==position, we have the reference once (nColRefOnce==ndim), but
2056 no extra reference group columns before each group (nColRefEachGrp==0)
2058 * with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
2059 but ndim ref group colums before every group (nColRefEachGrp==ndim)
2060 Distance to reference: */
2061 pos=y[1 + nColRefOnce + nColRefEachGrp + g*(nColRefEachGrp+nColPerGrp)][i];
2064 gmx_fatal(FARGS,"Bad error, this error should have been catched before. Ups.\n");
2068 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2078 if (gUsed>=window->nPull)
2079 gmx_fatal(FARGS,"gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2080 gUsed,window->nPull);
2082 if (opt->bCalcTauInt && !bGetMinMax)
2084 /* save time series for autocorrelation analysis */
2085 ntot=window->Ntot[gUsed];
2086 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2087 if (ntot>=lennow[gUsed])
2089 lennow[gUsed]+=blocklen;
2090 srenew(window->ztime[gUsed],lennow[gUsed]);
2092 window->ztime[gUsed][ntot]=pos;
2095 ibin=static_cast<int> (floor((pos-min)/(max-min)*bins));
2099 while ( (ibin+=bins) < 0);
2100 else if (ibin>=bins)
2101 while ( (ibin-=bins) >= bins);
2103 if(ibin >= 0 && ibin < bins)
2105 window->Histo[gUsed][ibin]+=1.;
2108 window->Ntot[gUsed]++;
2112 else if (t>opt->tmax)
2115 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n",t,opt->tmax);
2130 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2131 void read_tpr_pullxf_files(char **fnTprs,char **fnPull,int nfiles,
2132 t_UmbrellaHeader* header,
2133 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2138 printf("Reading %d tpr and pullf files\n",nfiles/2);
2140 /* min and max not given? */
2143 printf("Automatic determination of boundaries...\n");
2146 for (i=0;i<nfiles; i++)
2148 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2149 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2150 read_tpr_header(fnTprs[i],header,opt);
2151 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2152 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2153 read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp,
2154 (opt->nGroupsel>0) ? &opt->groupsel[i] : NULL);
2155 if (maxtmp>opt->max)
2157 if (mintmp<opt->min)
2160 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
2161 if (opt->bBoundsOnly)
2163 printf("Found option -boundsonly, now exiting.\n");
2167 /* store stepsize in profile */
2168 opt->dz=(opt->max-opt->min)/opt->bins;
2170 for (i=0;i<nfiles; i++)
2172 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2173 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2174 read_tpr_header(fnTprs[i],header,opt);
2175 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2176 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2177 read_pull_xf(fnPull[i],fnTprs[i],header,window+i,opt,FALSE,NULL,NULL,
2178 (opt->nGroupsel>0) ? &opt->groupsel[i] : NULL);
2179 if (window[i].Ntot[0] == 0)
2180 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2183 for (i=0;i<nfiles; i++)
2192 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2194 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2195 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2197 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt,
2203 printf("Readging Integrated autocorrelation times from %s ...\n",fn);
2204 nlines=read_xvg(fn,&iact,&ny);
2206 gmx_fatal(FARGS,"Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2208 for (i=0;i<nlines;i++){
2209 if (window[i].nPull != ny)
2210 gmx_fatal(FARGS,"You are providing autocorrelation times with option -iiact and the\n"
2211 "number of pull groups is different in different simulations. That is not\n"
2212 "supported yet. Sorry.\n");
2213 for (ig=0;ig<window[i].nPull;ig++){
2214 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2215 window[i].g[ig]=1+2*iact[ig][i]/window[i].dt;
2217 if (iact[ig][i] <= 0.0)
2218 fprintf(stderr,"\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i],i,ig);
2224 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2227 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2228 * that -in case of limited sampling- the ACT may be severely underestimated.
2229 * Note: the g=1+2tau are overwritten.
2230 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2233 void smoothIact(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2236 double pos,dpos2,siglim,siglim2,gaufact,invtwosig2,w,weight,tausm;
2238 /* only evaluate within +- 3sigma of the Gausian */
2239 siglim=3.0*opt->sigSmoothIact;
2240 siglim2=dsqr(siglim);
2241 /* pre-factor of Gaussian */
2242 gaufact=1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2243 invtwosig2=0.5/dsqr(opt->sigSmoothIact);
2245 for (i=0;i<nwins;i++)
2247 snew(window[i].tausmooth,window[i].nPull);
2248 for (ig=0;ig<window[i].nPull;ig++)
2252 pos=window[i].pos[ig];
2253 for (j=0;j<nwins;j++)
2255 for (jg=0;jg<window[j].nPull;jg++)
2257 dpos2=dsqr(window[j].pos[jg]-pos);
2259 w=gaufact*exp(-dpos2*invtwosig2);
2261 tausm+=w*window[j].tau[jg];
2262 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2263 w,dpos2,pos,gaufact,invtwosig2); */
2268 if (opt->bAllowReduceIact || tausm>window[i].tau[ig])
2269 window[i].tausmooth[ig]=tausm;
2271 window[i].tausmooth[ig]=window[i].tau[ig];
2272 window[i].g[ig] = 1+2*tausm/window[i].dt;
2277 //! Stop integrating autoccorelation function when ACF drops under this value
2278 #define WHAM_AC_ZERO_LIMIT 0.05
2280 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2282 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,
2283 t_UmbrellaOptions *opt, const char *fn)
2285 int i,ig,ncorr,ntot,j,k,*count,restart;
2286 real *corr,c0,dt,tmp;
2287 real *ztime,av,tausteps;
2291 fpcorr=xvgropen("hist_autocorr.xvg","Autocorrelation functions of umbrella windows",
2292 "time [ps]","autocorrelation function",opt->oenv);
2295 for (i=0;i<nwins;i++)
2297 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...",100.*(i+1)/nwins);
2299 ntot=window[i].Ntot[0];
2301 /* using half the maximum time as length of autocorrelation function */
2304 gmx_fatal(FARGS,"Tryig to estimtate autocorrelation time from only %d"
2305 " points. Provide more pull data!",ntot);
2307 /* snew(corrSq,ncorr); */
2310 snew(window[i].tau,window[i].nPull);
2311 restart=static_cast<int>(opt->acTrestart/dt+0.5);
2315 for (ig=0;ig<window[i].nPull;ig++)
2317 if (ntot != window[i].Ntot[ig])
2318 gmx_fatal(FARGS,"Encountered different nr of frames in different pull groups.\n"
2319 "That should not happen. (%d and %d)\n", ntot,window[i].Ntot[ig]);
2320 ztime=window[i].ztime[ig];
2322 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2323 for(j=0, av=0; (j<ntot); j++)
2326 for(k=0; (k<ncorr); k++)
2331 for(j=0; (j<ntot); j+=restart)
2332 for(k=0; (k<ncorr) && (j+k < ntot); k++)
2334 tmp=(ztime[j]-av)*(ztime[j+k]-av);
2336 /* corrSq[k] += tmp*tmp; */
2339 /* divide by nr of frames for each time displacement */
2340 for(k=0; (k<ncorr); k++)
2342 /* count probably = (ncorr-k+(restart-1))/restart; */
2343 corr[k] = corr[k]/count[k];
2344 /* variance of autocorrelation function */
2345 /* corrSq[k]=corrSq[k]/count[k]; */
2347 /* normalize such that corr[0] == 0 */
2349 for(k=0; (k<ncorr); k++)
2352 /* corrSq[k]*=c0*c0; */
2355 /* write ACFs in verbose mode */
2358 for(k=0; (k<ncorr); k++)
2359 fprintf(fpcorr,"%g %g\n",k*dt,corr[k]);
2360 fprintf(fpcorr,"&\n");
2363 /* esimate integrated correlation time, fitting is too unstable */
2364 tausteps = 0.5*corr[0];
2365 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2366 for(j=1; (j<ncorr) && (corr[j]>WHAM_AC_ZERO_LIMIT); j++)
2367 tausteps += corr[j];
2369 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2370 Kumar et al, eq. 28 ff. */
2371 window[i].tau[ig] = tausteps*dt;
2372 window[i].g[ig] = 1+2*tausteps;
2373 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2382 /* plot IACT along reaction coordinate */
2383 fp=xvgropen(fn,"Integrated autocorrelation times","z","IACT [ps]",opt->oenv);
2384 fprintf(fp,"@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2385 fprintf(fp,"# WIN tau(gr1) tau(gr2) ...\n");
2386 for (i=0;i<nwins;i++)
2388 fprintf(fp,"# %3d ",i);
2389 for (ig=0;ig<window[i].nPull;ig++)
2390 fprintf(fp," %11g",window[i].tau[ig]);
2393 for (i=0;i<nwins;i++)
2394 for (ig=0;ig<window[i].nPull;ig++)
2395 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tau[ig]);
2396 if (opt->sigSmoothIact > 0.0)
2398 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2399 opt->sigSmoothIact);
2400 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2401 smoothIact(window,nwins,opt);
2402 fprintf(fp,"&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2403 fprintf(fp,"@ s1 symbol color 2\n");
2404 for (i=0;i<nwins;i++)
2405 for (ig=0;ig<window[i].nPull;ig++)
2406 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tausmooth[ig]);
2409 printf("Wrote %s\n",fn);
2413 * compute average and sigma of each umbrella histogram
2415 void averageSigma(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2418 real av,sum2,sig,diff,*ztime,nSamplesIndep;
2420 for (i=0;i<nwins;i++)
2422 snew(window[i].aver, window[i].nPull);
2423 snew(window[i].sigma,window[i].nPull);
2425 ntot=window[i].Ntot[0];
2426 for (ig=0;ig<window[i].nPull;ig++)
2428 ztime=window[i].ztime[ig];
2429 for (k=0, av=0.; k<ntot; k++)
2432 for (k=0, sum2=0.; k<ntot; k++)
2437 sig=sqrt(sum2/ntot);
2438 window[i].aver[ig]=av;
2440 /* Note: This estimate for sigma is biased from the limited sampling.
2441 Correct sigma by n/(n-1) where n = number of independent
2442 samples. Only possible if IACT is known.
2446 nSamplesIndep=window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2447 window[i].sigma[ig]=sig * nSamplesIndep/(nSamplesIndep-1);
2450 window[i].sigma[ig]=sig;
2451 printf("win %d, aver = %f sig = %f\n",i,av,window[i].sigma[ig]);
2458 * Use histograms to compute average force on pull group.
2460 void computeAverageForce(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt)
2462 int i,j,bins=opt->bins,k;
2463 double dz,min=opt->min,max=opt->max,displAv,temp,distance,ztot,ztot_half,w,weight;
2470 /* Compute average displacement from histograms */
2471 for(j=0;j<nWindows;++j)
2473 snew(window[j].forceAv,window[j].nPull);
2474 for(k=0;k<window[j].nPull;++k)
2478 for(i=0;i<opt->bins;++i)
2480 temp=(1.0*i+0.5)*dz+min;
2481 distance = temp - window[j].pos[k];
2483 { /* in cyclic wham: */
2484 if (distance > ztot_half) /* |distance| < ztot_half */
2486 else if (distance < -ztot_half)
2489 w=window[j].Histo[k][i]/window[j].g[k];
2490 displAv += w*distance;
2492 /* Are we near min or max? We are getting wrong forces from the histgrams since
2493 the histograms are zero outside [min,max). Therefore, assume that the position
2494 on the other side of the histomgram center is equally likely. */
2497 posmirrored=window[j].pos[k]-distance;
2498 if (posmirrored>=max || posmirrored<min)
2500 displAv += -w*distance;
2507 /* average force from average displacement */
2508 window[j].forceAv[k] = displAv*window[j].k[k];
2509 /* sigma from average square displacement */
2510 /* window[j].sigma [k] = sqrt(displAv2); */
2511 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2517 * Check if the complete reaction coordinate is covered by the histograms
2519 void checkReactionCoordinateCovered(t_UmbrellaWindow *window,int nwins,
2520 t_UmbrellaOptions *opt)
2522 int i,ig,j,bins=opt->bins,bBoundary;
2523 real avcount=0,z,relcount,*count;
2524 snew(count,opt->bins);
2526 for(j=0;j<opt->bins;++j)
2528 for (i=0;i<nwins;i++){
2529 for (ig=0;ig<window[i].nPull;ig++)
2530 count[j]+=window[i].Histo[ig][j];
2532 avcount+=1.0*count[j];
2537 relcount=count[j]/avcount;
2538 z=(j+0.5)*opt->dz+opt->min;
2539 bBoundary=( j<bins/20 || (bins-j)>bins/20 );
2540 /* check for bins with no data */
2542 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2543 "You may not get a reasonable profile. Check your histograms!\n",j,z);
2544 /* and check for poor sampling */
2545 else if (relcount<0.005 && !bBoundary)
2546 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n",j,z);
2551 /*! \brief Compute initial potential by integrating the average force
2553 * This speeds up the convergence by roughly a factor of 2
2555 void guessPotByIntegration(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt,
2558 int i,j,ig,bins=opt->bins,nHist,winmin,groupmin;
2559 double dz,min=opt->min,*pot,pos,hispos,dist,diff,fAv,distmin,*f;
2562 dz=(opt->max-min)/bins;
2564 printf("Getting initial potential by integration.\n");
2566 /* Compute average displacement from histograms */
2567 computeAverageForce(window,nWindows,opt);
2569 /* Get force for each bin from all histograms in this bin, or, alternatively,
2570 if no histograms are inside this bin, from the closest histogram */
2573 for(j=0;j<opt->bins;++j)
2575 pos=(1.0*j+0.5)*dz+min;
2580 for (i=0;i<nWindows;i++)
2582 for (ig=0;ig<window[i].nPull;ig++)
2584 hispos=window[i].pos[ig];
2585 dist=fabs(hispos-pos);
2586 /* average force within bin */
2590 fAv+=window[i].forceAv[ig];
2592 /* at the same time, rememer closest histogram */
2600 /* if no histogram found in this bin, use closest histogram */
2604 fAv=window[winmin].forceAv[groupmin];
2608 for(j=1;j<opt->bins;++j)
2609 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2611 /* cyclic wham: linearly correct possible offset */
2614 diff=(pot[bins-1]-pot[0])/(bins-1);
2615 for(j=1;j<opt->bins;++j)
2620 fp=xvgropen("pmfintegrated.xvg","PMF from force integration","z","PMF [kJ/mol]",opt->oenv);
2621 for(j=0;j<opt->bins;++j)
2622 fprintf(fp,"%g %g\n",(j+0.5)*dz+opt->min,pot[j]);
2624 printf("verbose mode: wrote %s with PMF from interated forces\n","pmfintegrated.xvg");
2627 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2628 energy offsets which are usually determined by wham
2629 First: turn pot into probabilities:
2631 for(j=0;j<opt->bins;++j)
2632 pot[j]=exp(-pot[j]/(8.314e-3*opt->Temperature));
2633 calc_z(pot,window,nWindows,opt,TRUE);
2639 //! Count number of words in an line
2640 static int wordcount(char *ptr)
2645 if (strlen(ptr) == 0)
2647 /* fprintf(stderr,"ptr='%s'\n",ptr); */
2649 for(i=0; (ptr[i] != '\0'); i++) {
2650 is[cur] = isspace(ptr[i]);
2651 if ((i > 0) && (is[cur] && !is[1-cur]))
2658 /*! \brief Read input file for pull group selection (option -is)
2660 * TO DO: ptr=fgets(...) is never freed (small memory leak)
2662 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
2665 int i,iline,n,len=STRLEN,temp;
2666 char *ptr=0,*tmpbuf=0;
2667 char fmt[1024],fmtign[1024];
2668 int block=1,sizenow;
2670 fp=ffopen(opt->fnGroupsel,"r");
2676 while ( (ptr=fgets3(fp,tmpbuf,&len)) !=NULL)
2681 if (iline >= sizenow)
2684 srenew(opt->groupsel,sizenow);
2686 opt->groupsel[iline].n = n;
2687 opt->groupsel[iline].nUse = 0;
2688 snew(opt->groupsel[iline].bUse,n);
2695 if (sscanf(ptr,fmt,&temp))
2697 opt->groupsel[iline].bUse[i] = (temp > 0);
2698 if ( opt->groupsel[iline].bUse[i])
2699 opt->groupsel[iline].nUse++;
2701 strcat(fmtign,"%*s");
2705 opt->nGroupsel=iline;
2706 if (nTpr != opt->nGroupsel)
2707 gmx_fatal(FARGS,"Found %d tpr files but %d lines in %s\n",nTpr,opt->nGroupsel,
2710 printf("\nUse only these pull groups:\n");
2711 for (iline=0; iline<nTpr; iline++)
2713 printf("%s (%d of %d groups):",fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
2714 for (i=0; i < opt->groupsel[iline].n; i++)
2715 if (opt->groupsel[iline].bUse[i])
2725 #define WHAMBOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
2727 /*! Number of elements in fnm (used for command line parsing) */
2728 #define NFILE asize(fnm)
2730 //! The main g_wham routine
2731 int gmx_wham(int argc,char *argv[])
2733 const char *desc[] = {
2734 "This is an analysis program that implements the Weighted",
2735 "Histogram Analysis Method (WHAM). It is intended to analyze",
2736 "output files generated by umbrella sampling simulations to ",
2737 "compute a potential of mean force (PMF). [PAR] ",
2738 "At present, three input modes are supported.[BR]",
2739 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
2740 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
2741 " AND, with option [TT]-ix[tt], a file which contains file names of",
2742 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
2743 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
2744 " first pullx, etc.[BR]",
2745 "[TT]*[tt] Same as the previous input mode, except that the the user",
2746 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
2747 " From the pull force the position in the umbrella potential is",
2748 " computed. This does not work with tabulated umbrella potentials.[BR]"
2749 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
2750 " the GROMACS 3.3 umbrella output files. If you have some unusual"
2751 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
2752 " feed them with the [TT]-ip[tt] option into to [TT]g_wham[tt]. The [TT].pdo[tt] file header",
2753 " must be similar to the following:[PAR]",
2754 "[TT]# UMBRELLA 3.0[BR]",
2755 "# Component selection: 0 0 1[BR]",
2757 "# Ref. Group 'TestAtom'[BR]",
2758 "# Nr. of pull groups 2[BR]",
2759 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
2760 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
2762 "The number of pull groups, umbrella positions, force constants, and names ",
2763 "may (of course) differ. Following the header, a time column and ",
2764 "a data column for each pull group follows (i.e. the displacement",
2765 "with respect to the umbrella center). Up to four pull groups are possible ",
2766 "per [TT].pdo[tt] file at present.[PAR]",
2767 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
2768 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
2769 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
2770 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
2771 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
2772 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
2773 "used, groupsel.dat looks like this:[BR]",
2777 "By default, the output files are[BR]",
2778 " [TT]-o[tt] PMF output file[BR]",
2779 " [TT]-hist[tt] Histograms output file[BR]",
2780 "Always check whether the histograms sufficiently overlap.[PAR]",
2781 "The umbrella potential is assumed to be harmonic and the force constants are ",
2782 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
2783 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
2784 "WHAM OPTIONS[BR]------------[BR]",
2785 " [TT]-bins[tt] Number of bins used in analysis[BR]",
2786 " [TT]-temp[tt] Temperature in the simulations[BR]",
2787 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
2788 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
2789 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
2790 "The data points that are used to compute the profile",
2791 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
2792 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
2793 "umbrella window.[PAR]",
2794 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
2795 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
2796 "With energy output, the energy in the first bin is defined to be zero. ",
2797 "If you want the free energy at a different ",
2798 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
2799 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
2800 "without osmotic gradient), the option [TT]-cycl[tt] is useful. [TT]g_wham[tt] will make use of the ",
2801 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
2802 "reaction coordinate will assumed be be neighbors.[PAR]",
2803 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
2804 "which may be useful for, e.g. membranes.[PAR]",
2805 "AUTOCORRELATIONS[BR]----------------[BR]",
2806 "With [TT]-ac[tt], [TT]g_wham[tt] estimates the integrated autocorrelation ",
2807 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
2808 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
2809 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
2810 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
2811 "Because the IACTs can be severely underestimated in case of limited ",
2812 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
2813 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
2814 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
2815 "integration of the ACFs while the ACFs are larger 0.05.",
2816 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
2817 "less robust) method such as fitting to a double exponential, you can ",
2818 "compute the IACTs with [TT]g_analyze[tt] and provide them to [TT]g_wham[tt] with the file ",
2819 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
2820 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
2821 "ERROR ANALYSIS[BR]--------------[BR]",
2822 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
2823 "otherwise the statistical error may be substantially underestimated. ",
2824 "More background and examples for the bootstrap technique can be found in ",
2825 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
2826 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
2827 "Four bootstrapping methods are supported and ",
2828 "selected with [TT]-bs-method[tt].[BR]",
2829 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
2830 "data points, and the bootstrap is carried out by assigning random weights to the ",
2831 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
2832 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
2833 "statistical error is underestimated.[BR]",
2834 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
2835 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
2836 "(allowing duplication, i.e. sampling with replacement).",
2837 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
2838 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
2839 "divided into blocks and only histograms within each block are mixed. Note that ",
2840 "the histograms within each block must be representative for all possible histograms, ",
2841 "otherwise the statistical error is underestimated.[BR]",
2842 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
2843 "such that the generated data points are distributed according the given histograms ",
2844 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
2845 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
2846 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
2847 "Note that this method may severely underestimate the error in case of limited sampling, ",
2848 "that is if individual histograms do not represent the complete phase space at ",
2849 "the respective positions.[BR]",
2850 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
2851 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
2852 "and width of the umbrella histograms. That method yields similar error estimates ",
2853 "like method [TT]traj[tt].[PAR]"
2854 "Bootstrapping output:[BR]",
2855 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
2856 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
2857 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
2858 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
2862 const char *en_unit[]={NULL,"kJ","kCal","kT",NULL};
2863 const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",NULL};
2864 const char *en_bsMethod[]={ NULL,"b-hist", "hist", "traj", "traj-gauss", NULL };
2866 static t_UmbrellaOptions opt;
2869 { "-min", FALSE, etREAL, {&opt.min},
2870 "Minimum coordinate in profile"},
2871 { "-max", FALSE, etREAL, {&opt.max},
2872 "Maximum coordinate in profile"},
2873 { "-auto", FALSE, etBOOL, {&opt.bAuto},
2874 "Determine min and max automatically"},
2875 { "-bins",FALSE, etINT, {&opt.bins},
2876 "Number of bins in profile"},
2877 { "-temp", FALSE, etREAL, {&opt.Temperature},
2879 { "-tol", FALSE, etREAL, {&opt.Tolerance},
2881 { "-v", FALSE, etBOOL, {&opt.verbose},
2883 { "-b", FALSE, etREAL, {&opt.tmin},
2884 "First time to analyse (ps)"},
2885 { "-e", FALSE, etREAL, {&opt.tmax},
2886 "Last time to analyse (ps)"},
2887 { "-dt", FALSE, etREAL, {&opt.dt},
2888 "Analyse only every dt ps"},
2889 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
2890 "Write histograms and exit"},
2891 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
2892 "Determine min and max and exit (with [TT]-auto[tt])"},
2893 { "-log", FALSE, etBOOL, {&opt.bLog},
2894 "Calculate the log of the profile before printing"},
2895 { "-unit", FALSE, etENUM, {en_unit},
2896 "Energy unit in case of log output" },
2897 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
2898 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
2899 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
2900 "Create cyclic/periodic profile. Assumes min and max are the same point."},
2901 { "-sym", FALSE, etBOOL, {&opt.bSym},
2902 "Symmetrize profile around z=0"},
2903 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
2904 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
2905 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
2906 "Calculate integrated autocorrelation times and use in wham"},
2907 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
2908 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
2909 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
2910 "When computing autocorrelation functions, restart computing every .. (ps)"},
2911 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
2912 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
2913 "during smoothing"},
2914 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
2915 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
2916 { "-bs-method", FALSE, etENUM, {en_bsMethod},
2917 "Bootstrap method" },
2918 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
2919 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
2920 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
2921 "Seed for bootstrapping. (-1 = use time)"},
2922 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
2923 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
2924 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
2925 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
2926 { "-stepout", FALSE, etINT, {&opt.stepchange},
2927 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
2928 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
2929 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
2933 { efDAT, "-ix","pullx-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2934 { efDAT, "-if","pullf-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2935 { efDAT, "-it","tpr-files",ffOPTRD}, /* wham input: tprs */
2936 { efDAT, "-ip","pdo-files",ffOPTRD}, /* wham input: pdo files (gmx3 style) */
2937 { efDAT, "-is","groupsel",ffOPTRD}, /* input: select pull groups to use */
2938 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
2939 { efXVG, "-hist","histo", ffWRITE}, /* output file for histograms */
2940 { efXVG, "-oiact","iact",ffOPTWR}, /* writing integrated autocorrelation times */
2941 { efDAT, "-iiact","iact-in",ffOPTRD}, /* reading integrated autocorrelation times */
2942 { efXVG, "-bsres","bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
2943 { efXVG, "-bsprof","bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
2944 { efDAT, "-tab","umb-pot",ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
2947 int i,j,l,nfiles,nwins,nfiles2;
2948 t_UmbrellaHeader header;
2949 t_UmbrellaWindow * window=NULL;
2950 double *profile,maxchange=1e20;
2951 gmx_bool bMinSet,bMaxSet,bAutoSet,bExact=FALSE;
2952 char **fninTpr,**fninPull,**fninPdo;
2954 FILE *histout,*profout;
2955 char ylabel[256],title[256];
2957 CopyRight(stderr,argv[0]);
2961 opt.bHistOnly=FALSE;
2971 /* bootstrapping stuff */
2973 opt.bsMethod=bsMethod_hist;
2974 opt.tauBootStrap=0.0;
2976 opt.histBootStrapBlockLength=8;
2977 opt.bs_verbose=FALSE;
2982 opt.Temperature=298;
2984 opt.bBoundsOnly=FALSE;
2986 opt.bCalcTauInt=FALSE;
2987 opt.sigSmoothIact=0.0;
2988 opt.bAllowReduceIact=TRUE;
2989 opt.bInitPotByIntegration=TRUE;
2992 opt.stepUpdateContrib=100;
2994 parse_common_args(&argc,argv,PCA_BE_NICE,
2995 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&opt.oenv);
2997 opt.unit=nenum(en_unit);
2998 opt.bsMethod=nenum(en_bsMethod);
3000 opt.bProf0Set=opt2parg_bSet("-zprof0", asize(pa), pa);
3002 opt.bTab=opt2bSet("-tab",NFILE,fnm);
3003 opt.bPdo=opt2bSet("-ip",NFILE,fnm);
3004 opt.bTpr=opt2bSet("-it",NFILE,fnm);
3005 opt.bPullx=opt2bSet("-ix",NFILE,fnm);
3006 opt.bPullf=opt2bSet("-if",NFILE,fnm);
3007 opt.bTauIntGiven=opt2bSet("-iiact",NFILE,fnm);
3008 if (opt.bTab && opt.bPullf)
3009 gmx_fatal(FARGS,"Force input does not work with tabulated potentials. "
3010 "Provide pullx.xvg or pdo files!");
3012 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx,opt.bPullf))
3013 gmx_fatal(FARGS,"Give either pullx (-ix) OR pullf (-if) data. Not both.");
3014 if ( !opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3015 gmx_fatal(FARGS,"g_wham supports three input modes, pullx, pullf, or pdo file input."
3016 "\n\n Check g_wham -h !");
3018 opt.fnPdo=opt2fn("-ip",NFILE,fnm);
3019 opt.fnTpr=opt2fn("-it",NFILE,fnm);
3020 opt.fnPullf=opt2fn("-if",NFILE,fnm);
3021 opt.fnPullx=opt2fn("-ix",NFILE,fnm);
3022 opt.fnGroupsel=opt2fn_null("-is",NFILE,fnm);
3024 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3025 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3026 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3027 if ( (bMinSet || bMaxSet) && bAutoSet)
3028 gmx_fatal(FARGS,"With -auto, do not give -min or -max\n");
3030 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3031 gmx_fatal(FARGS,"When giving -min, you must give -max (and vice versa), too\n");
3033 if (bMinSet && opt.bAuto)
3035 printf("Note: min and max given, switching off -auto.\n");
3039 if (opt.bTauIntGiven && opt.bCalcTauInt)
3040 gmx_fatal(FARGS,"Either read (option -iiact) or calculate (option -ac) the\n"
3041 "the autocorrelation times. Not both.");
3043 if (opt.tauBootStrap>0.0 && opt2parg_bSet("-ac",asize(pa), pa))
3044 gmx_fatal(FARGS,"Either compute autocorrelation times (ACTs) (option -ac) or "
3045 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3046 if (opt.tauBootStrap>0.0 && opt2bSet("-iiact",NFILE,fnm))
3047 gmx_fatal(FARGS,"Either provide autocorrelation times (ACTs) with file iact-in.dat "
3048 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3050 /* Reading gmx4 pull output and tpr files */
3051 if (opt.bTpr || opt.bPullf || opt.bPullx)
3053 read_wham_in(opt.fnTpr,&fninTpr,&nfiles,&opt);
3055 fnPull=opt.bPullf ? opt.fnPullf : opt.fnPullx;
3056 read_wham_in(fnPull,&fninPull,&nfiles2,&opt);
3057 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3058 nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
3059 if (nfiles!=nfiles2)
3060 gmx_fatal(FARGS,"Found %d file names in %s, but %d in %s\n",nfiles,
3061 opt.fnTpr,nfiles2,fnPull);
3063 /* Read file that selects the pull group to be used */
3064 if (opt.fnGroupsel != NULL)
3065 readPullGroupSelection(&opt,fninTpr,nfiles);
3067 window=initUmbrellaWindows(nfiles);
3068 read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, window, &opt);
3071 { /* reading pdo files */
3072 if (opt.fnGroupsel != NULL)
3073 gmx_fatal(FARGS,"Reading a -is file is not supported with PDO input files.\n"
3074 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3075 read_wham_in(opt.fnPdo,&fninPdo,&nfiles,&opt);
3076 printf("Found %d pdo files in %s\n",nfiles,opt.fnPdo);
3077 window=initUmbrellaWindows(nfiles);
3078 read_pdo_files(fninPdo,nfiles, &header, window, &opt);
3082 /* enforce equal weight for all histograms? */
3084 enforceEqualWeights(window,nwins);
3086 /* write histograms */
3087 histout=xvgropen(opt2fn("-hist",NFILE,fnm),"Umbrella histograms",
3088 "z","count",opt.oenv);
3089 for(l=0;l<opt.bins;++l)
3091 fprintf(histout,"%e\t",(double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3092 for(i=0;i<nwins;++i)
3094 for(j=0;j<window[i].nPull;++j)
3096 fprintf(histout,"%e\t",window[i].Histo[j][l]);
3099 fprintf(histout,"\n");
3102 printf("Wrote %s\n",opt2fn("-hist",NFILE,fnm));
3105 printf("Wrote histograms to %s, now exiting.\n",opt2fn("-hist",NFILE,fnm));
3109 /* Using tabulated umbrella potential */
3111 setup_tab(opt2fn("-tab",NFILE,fnm),&opt);
3113 /* Integrated autocorrelation times provided ? */
3114 if (opt.bTauIntGiven)
3115 readIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-iiact",NFILE,fnm));
3117 /* Compute integrated autocorrelation times */
3118 if (opt.bCalcTauInt)
3119 calcIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-oiact",NFILE,fnm));
3121 /* calc average and sigma for each histogram
3122 (maybe required for bootstrapping. If not, this is fast anyhow) */
3123 if (opt.nBootStrap && opt.bsMethod==bsMethod_trajGauss)
3124 averageSigma(window,nwins,&opt);
3126 /* Get initial potential by simple integration */
3127 if (opt.bInitPotByIntegration)
3128 guessPotByIntegration(window,nwins,&opt,0);
3130 /* Check if complete reaction coordinate is covered */
3131 checkReactionCoordinateCovered(window,nwins,&opt);
3133 /* Calculate profile */
3134 snew(profile,opt.bins);
3140 if ( (i%opt.stepUpdateContrib) == 0)
3141 setup_acc_wham(profile,window,nwins,&opt);
3142 if (maxchange<opt.Tolerance)
3145 /* if (opt.verbose) */
3146 printf("Switched to exact iteration in iteration %d\n",i);
3148 calc_profile(profile,window,nwins,&opt,bExact);
3149 if (((i%opt.stepchange) == 0 || i==1) && !i==0)
3150 printf("\t%4d) Maximum change %e\n",i,maxchange);
3152 } while ( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
3153 printf("Converged in %d iterations. Final maximum change %g\n",i,maxchange);
3155 /* calc error from Kumar's formula */
3156 /* Unclear how the error propagates along reaction coordinate, therefore
3158 /* calc_error_kumar(profile,window, nwins,&opt); */
3160 /* Write profile in energy units? */
3163 prof_normalization_and_unit(profile,&opt);
3164 strcpy(ylabel,en_unit_label[opt.unit]);
3165 strcpy(title,"Umbrella potential");
3169 strcpy(ylabel,"Density of states");
3170 strcpy(title,"Density of states");
3173 /* symmetrize profile around z=0? */
3175 symmetrizeProfile(profile,&opt);
3177 /* write profile or density of states */
3178 profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,opt.oenv);
3179 for(i=0;i<opt.bins;++i)
3180 fprintf(profout,"%e\t%e\n",(double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
3182 printf("Wrote %s\n",opt2fn("-o",NFILE,fnm));
3184 /* Bootstrap Method */
3186 do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm),
3187 opt2fn("-hist",NFILE,fnm),
3188 ylabel, profile, window, nwins, &opt);
3191 freeUmbrellaWindows(window,nfiles);
3193 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3194 please_cite(stdout,"Hub2010");