1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
38 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
40 * \author Jochen Hub <jhub@gwdg.de>
58 #include "gmx_random.h"
65 //! longest file names allowed in input files
66 #define WHAM_MAXFILELEN 2048
69 * enum for energy units
72 enSel, en_kJ, en_kCal, en_kT, enNr
75 * enum for type of input files (pdos, tpr, or pullf)
78 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
81 /*! \brief enum for bootstrapping method
83 * These bootstrap methods are supported:
84 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
85 * (bsMethod_BayesianHist)
86 * - bootstrap complete histograms (bsMethod_hist)
87 * - bootstrap trajectories from given umbrella histograms. This generates new
88 * "synthetic" histograms (bsMethod_traj)
89 * - bootstrap trajectories from Gaussian with mu/sigma computed from
90 * the respective histogram (bsMethod_trajGauss). This gives very similar
91 * results compared to bsMethod_traj.
93 * ********************************************************************
94 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
95 * JS Hub, BL de Groot, D van der Spoel
96 * g_wham - A free weighted histogram analysis implementation including
97 * robust error and autocorrelation estimates,
98 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
99 * ********************************************************************
102 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
103 bsMethod_traj, bsMethod_trajGauss
107 //! Parameters of the umbrella potentials
111 * \name Using umbrella pull code since gromacs 4.x
114 int npullgrps; //!< nr of pull groups in tpr file
115 int pull_geometry; //!< such as distance, position
116 ivec pull_dim; //!< pull dimension with geometry distance
117 int pull_ndim; //!< nr of pull_dim != 0
118 real *k; //!< force constants in tpr file
119 rvec *init_dist; //!< reference displacements
120 real *umbInitDist; //!< reference displacement in umbrella direction
123 * \name Using PDO files common until gromacs 3.x
131 char PullName[4][256];
133 double UmbCons[4][3];
137 //! Data in the umbrella histograms
140 int nPull; //!< nr of pull groups in this pdo or pullf/x file
141 double **Histo; //!< nPull histograms
142 double **cum; //!< nPull cumulative distribution functions
143 int nBin; //!< nr of bins. identical to opt->bins
144 double *k; //!< force constants for the nPull groups
145 double *pos; //!< umbrella positions for the nPull groups
146 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
147 int *N; //!< nr of data points in nPull histograms.
148 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
150 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
152 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
153 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
156 double *tau; //!< intetrated autocorrelation time (IACT)
157 double *tausmooth; //!< smoothed IACT
159 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
161 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
163 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
165 /*! \brief average force estimated from average displacement, fAv=dzAv*k
167 * Used for integration to guess the potential.
170 real *aver; //!< average of histograms
171 real *sigma; //!< stddev of histograms
172 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
175 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
178 int n; //!< total nr of pull groups in this tpr file
179 int nUse; //!< nr of pull groups used
180 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
183 //! Parameters of WHAM
190 const char *fnTpr, *fnPullf, *fnGroupsel;
191 const char *fnPdo, *fnPullx; //!< file names of input
192 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
193 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
195 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
196 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
197 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
198 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
201 * \name Basic WHAM options
204 int bins; //!< nr of bins, min, max, and dz of profile
206 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
207 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
210 * \name Output control
213 gmx_bool bLog; //!< energy output (instead of probability) for profile
214 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
215 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
216 /*! \brief after wham, set prof to zero at this z-position.
217 * When bootstrapping, set zProf0 to a "stable" reference position.
220 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
222 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
223 gmx_bool bAuto; //!< determine min and max automatically but do not exit
225 gmx_bool verbose; //!< more noisy wham mode
226 int stepchange; //!< print maximum change in prof after how many interations
227 output_env_t oenv; //!< xvgr options
230 * \name Autocorrelation stuff
233 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
234 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
235 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
236 real acTrestart; //!< when computing ACT, time between restarting points
238 /* \brief Enforce the same weight for each umbella window, that is
239 * calculate with the same number of data points for
240 * each window. That can be reasonable, if the histograms
241 * have different length, but due to autocorrelation,
242 * a longer simulation should not have larger weightin wham.
248 * \name Bootstrapping stuff
251 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
253 /* \brief bootstrap method
255 * if == bsMethod_hist, consider complete histograms as independent
256 * data points and, hence, only mix complete histograms.
257 * if == bsMethod_BayesianHist, consider complete histograms
258 * as independent data points, but assign random weights
259 * to the histograms during the bootstrapping ("Bayesian bootstrap")
260 * In case of long correlations (e.g., inside a channel), these
261 * will yield a more realistic error.
262 * if == bsMethod_traj(Gauss), generate synthetic histograms
264 * histogram by generating an autocorrelated random sequence
265 * that is distributed according to the respective given
266 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
267 * (instead of from the umbrella histogram) to generate a new
272 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
275 /* \brief when mixing histograms, mix only histograms withing blocks
276 long the reaction coordinate xi. Avoids gaps along xi. */
277 int histBootStrapBlockLength;
279 int bsSeed; //!< random seed for bootstrapping
281 /* \brief Write cumulative distribution functions (CDFs) of histograms
282 and write the generated histograms for each bootstrap */
286 * \name tabulated umbrella potential stuff
290 double *tabX, *tabY, tabMin, tabMax, tabDz;
293 gmx_rng_t rng; //!< gromacs random number generator
296 //! Make an umbrella window (may contain several histograms)
297 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
299 t_UmbrellaWindow *win;
302 for (i = 0; i < nwin; i++)
304 win[i].Histo = win[i].cum = 0;
305 win[i].k = win[i].pos = win[i].z = 0;
306 win[i].N = win[i].Ntot = 0;
307 win[i].g = win[i].tau = win[i].tausmooth = 0;
311 win[i].aver = win[i].sigma = 0;
317 //! Delete an umbrella window (may contain several histograms)
318 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
321 for (i = 0; i < nwin; i++)
325 for (j = 0; j < win[i].nPull; j++)
327 sfree(win[i].Histo[j]);
332 for (j = 0; j < win[i].nPull; j++)
334 sfree(win[i].cum[j]);
339 for (j = 0; j < win[i].nPull; j++)
341 sfree(win[i].bContrib[j]);
353 sfree(win[i].tausmooth);
354 sfree(win[i].bContrib);
356 sfree(win[i].forceAv);
359 sfree(win[i].bsWeight);
365 * Read and setup tabulated umbrella potential
367 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
372 printf("Setting up tabulated potential from file %s\n", fn);
373 nl = read_xvg(fn, &y, &ny);
377 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
379 opt->tabMin = y[0][0];
380 opt->tabMax = y[0][nl-1];
381 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
384 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
385 "ascending z-direction", fn);
387 for (i = 0; i < nl-1; i++)
389 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
391 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
396 for (i = 0; i < nl; i++)
398 opt->tabX[i] = y[0][i];
399 opt->tabY[i] = y[1][i];
401 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
402 opt->tabMin, opt->tabMax, opt->tabDz);
405 //! Read the header of an PDO file (position, force const, nr of groups)
406 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
409 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
411 std::istringstream ist;
414 if (fgets(line, 2048, file) == NULL)
416 gmx_fatal(FARGS, "Error reading header from pdo file\n");
419 ist >> Buffer0 >> Buffer1 >> Buffer2;
420 if (strcmp(Buffer1, "UMBRELLA"))
422 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
423 "(Found in first line: `%s')\n",
424 Buffer1, "UMBRELLA", line);
426 if (strcmp(Buffer2, "3.0"))
428 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
432 if (fgets(line, 2048, file) == NULL)
434 gmx_fatal(FARGS, "Error reading header from pdo file\n");
437 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
438 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
440 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
441 if (header->nDim != 1)
443 gmx_fatal(FARGS, "Currently only supports one dimension");
447 if (fgets(line, 2048, file) == NULL)
449 gmx_fatal(FARGS, "Error reading header from pdo file\n");
452 ist >> Buffer0 >> Buffer1 >> header->nSkip;
455 if (fgets(line, 2048, file) == NULL)
457 gmx_fatal(FARGS, "Error reading header from pdo file\n");
460 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
463 if (fgets(line, 2048, file) == NULL)
465 gmx_fatal(FARGS, "Error reading header from pdo file\n");
468 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
472 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
476 for (i = 0; i < header->nPull; ++i)
478 if (fgets(line, 2048, file) == NULL)
480 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
484 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
485 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
489 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
490 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
494 if (fgets(line, 2048, file) == NULL)
496 gmx_fatal(FARGS, "Cannot read from file\n");
500 if (strcmp(Buffer3, "#####") != 0)
502 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
507 static char *fgets3(FILE *fp, char ptr[], int *len)
512 if (fgets(ptr, *len-1, fp) == NULL)
517 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
519 /* This line is longer than len characters, let's increase len! */
523 if (fgets(p-1, STRLEN, fp) == NULL)
529 if (ptr[slen-1] == '\n')
537 /*! \brief Read the data columns of and PDO file.
539 * TO DO: Get rid of the scanf function to avoid the clang warning.
540 * At the moment, this warning is avoided by hiding the format string
541 * the variable fmtlf.
543 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
544 int fileno, t_UmbrellaWindow * win,
545 t_UmbrellaOptions *opt,
546 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
548 int i, inttemp, bins, count, ntot;
549 real min, max, minfound = 1e20, maxfound = -1e20;
550 double temp, time, time0 = 0, dt;
552 t_UmbrellaWindow * window = 0;
553 gmx_bool timeok, dt_ok = 1;
554 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
555 int len = STRLEN, dstep = 1;
556 const int blocklen = 4096;
566 /* Need to alocate memory and set up structure */
567 window->nPull = header->nPull;
570 snew(window->Histo, window->nPull);
571 snew(window->z, window->nPull);
572 snew(window->k, window->nPull);
573 snew(window->pos, window->nPull);
574 snew(window->N, window->nPull);
575 snew(window->Ntot, window->nPull);
576 snew(window->g, window->nPull);
577 snew(window->bsWeight, window->nPull);
579 window->bContrib = 0;
581 if (opt->bCalcTauInt)
583 snew(window->ztime, window->nPull);
589 snew(lennow, window->nPull);
591 for (i = 0; i < window->nPull; ++i)
594 window->bsWeight[i] = 1.;
595 snew(window->Histo[i], bins);
596 window->k[i] = header->UmbCons[i][0];
597 window->pos[i] = header->UmbPos[i][0];
601 if (opt->bCalcTauInt)
603 window->ztime[i] = 0;
607 /* Done with setup */
613 min = max = bins = 0; /* Get rid of warnings */
618 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
622 if (ptr[0] == '#' || strlen(ptr) < 2)
627 /* Initiate format string */
629 strcat(fmtign, "%*s");
631 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
632 /* Round time to fs */
633 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
635 /* get time step of pdo file */
645 dstep = static_cast<int>(opt->dt/dt+0.5);
653 window->dt = dt*dstep;
658 dt_ok = ((count-1)%dstep == 0);
659 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
661 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
662 time,opt->tmin, opt->tmax, dt_ok,timeok); */
666 for (i = 0; i < header->nPull; ++i)
669 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
670 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
671 if (sscanf(ptr, fmt, &temp))
673 temp += header->UmbPos[i][0];
687 if (opt->bCalcTauInt)
689 /* save time series for autocorrelation analysis */
690 ntot = window->Ntot[i];
691 if (ntot >= lennow[i])
693 lennow[i] += blocklen;
694 srenew(window->ztime[i], lennow[i]);
696 window->ztime[i][ntot] = temp;
704 inttemp = static_cast<int> (temp);
711 else if (inttemp >= bins)
717 if (inttemp >= 0 && inttemp < bins)
719 window->Histo[i][inttemp] += 1.;
727 if (time > opt->tmax)
731 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
747 /*! \brief Set identical weights for all histograms
749 * Normally, the weight is given by the number data points in each
750 * histogram, together with the autocorrelation time. This can be overwritten
751 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
752 * an appropriate autocorrelation times instead of using this function.
754 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
756 int i, k, j, NEnforced;
759 NEnforced = window[0].Ntot[0];
760 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
761 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
762 /* enforce all histograms to have the same weight as the very first histogram */
764 for (j = 0; j < nWindows; ++j)
766 for (k = 0; k < window[j].nPull; ++k)
768 ratio = 1.0*NEnforced/window[j].Ntot[k];
769 for (i = 0; i < window[0].nBin; ++i)
771 window[j].Histo[k][i] *= ratio;
773 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
778 /*! \brief Simple linear interpolation between two given tabulated points
780 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
783 double pl, pu, dz, dp;
785 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
787 if (jl < 0 || ju >= opt->tabNbins)
789 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
790 "Provide an extended table.", dist, jl, ju);
794 dz = dist-opt->tabX[jl];
795 dp = (pu-pl)*dz/opt->tabDz;
801 * Check which bins substiantially contribute (accelerates WHAM)
803 * Don't worry, that routine does not mean we compute the PMF in limited precision.
804 * After rapid convergence (using only substiantal contributions), we always switch to
807 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
808 t_UmbrellaOptions *opt)
810 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
811 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
812 gmx_bool bAnyContrib;
813 static int bFirst = 1;
814 static double wham_contrib_lim;
818 for (i = 0; i < nWindows; ++i)
820 nGrptot += window[i].nPull;
822 wham_contrib_lim = opt->Tolerance/nGrptot;
825 ztot = opt->max-opt->min;
828 for (i = 0; i < nWindows; ++i)
830 if (!window[i].bContrib)
832 snew(window[i].bContrib, window[i].nPull);
834 for (j = 0; j < window[i].nPull; ++j)
836 if (!window[i].bContrib[j])
838 snew(window[i].bContrib[j], opt->bins);
841 for (k = 0; k < opt->bins; ++k)
843 temp = (1.0*k+0.5)*dz+min;
844 distance = temp - window[i].pos[j]; /* distance to umbrella center */
846 { /* in cyclic wham: */
847 if (distance > ztot_half) /* |distance| < ztot_half */
851 else if (distance < -ztot_half)
856 /* Note: there are two contributions to bin k in the wham equations:
857 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
858 ii) exp(- U/(8.314e-3*opt->Temperature))
859 where U is the umbrella potential
860 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
865 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
869 U = tabulated_pot(distance, opt); /* Use tabulated potential */
872 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
873 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
874 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
875 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
876 if (window[i].bContrib[j][k])
882 /* If this histo is far outside min and max all bContrib may be FALSE,
883 causing a floating point exception later on. To avoid that, switch
887 for (k = 0; k < opt->bins; ++k)
889 window[i].bContrib[j][k] = TRUE;
896 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
897 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
902 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
908 //! Compute the PMF (one of the two main WHAM routines)
909 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
910 t_UmbrellaOptions *opt, gmx_bool bExact)
913 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
914 double denom, U = 0, temp = 0, invg;
916 ztot = opt->max-opt->min;
919 for (i = 0; i < opt->bins; ++i)
922 for (j = 0; j < nWindows; ++j)
924 for (k = 0; k < window[j].nPull; ++k)
926 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
927 temp = (1.0*i+0.5)*dz+min;
928 num += invg*window[j].Histo[k][i];
930 if (!(bExact || window[j].bContrib[k][i]))
934 distance = temp - window[j].pos[k]; /* distance to umbrella center */
936 { /* in cyclic wham: */
937 if (distance > ztot_half) /* |distance| < ztot_half */
941 else if (distance < -ztot_half)
949 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
953 U = tabulated_pot(distance, opt); /* Use tabulated potential */
955 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
958 profile[i] = num/denom;
962 //! Compute the free energy offsets z (one of the two main WHAM routines)
963 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
964 t_UmbrellaOptions *opt, gmx_bool bExact)
967 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
968 double MAX = -1e20, total = 0;
970 ztot = opt->max-opt->min;
973 for (i = 0; i < nWindows; ++i)
975 for (j = 0; j < window[i].nPull; ++j)
978 for (k = 0; k < window[i].nBin; ++k)
980 if (!(bExact || window[i].bContrib[j][k]))
984 temp = (1.0*k+0.5)*dz+min;
985 distance = temp - window[i].pos[j]; /* distance to umbrella center */
987 { /* in cyclic wham: */
988 if (distance > ztot_half) /* |distance| < ztot_half */
992 else if (distance < -ztot_half)
1000 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1004 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1007 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1009 /* Avoid floating point exception if window is far outside min and max */
1012 total = -log(total);
1018 temp = fabs(total - window[i].z[j]);
1023 window[i].z[j] = total;
1029 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1030 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1032 int i, j, bins = opt->bins;
1033 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1036 if (min > 0. || max < 0.)
1038 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1039 opt->min, opt->max);
1044 for (i = 0; i < bins; i++)
1048 /* bin left of zsym */
1049 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1050 if (j >= 0 && (j+1) < bins)
1052 /* interpolate profile linearly between bins j and j+1 */
1053 z1 = min+(j+0.5)*dz;
1055 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1056 /* average between left and right */
1057 prof2[i] = 0.5*(profsym+profile[i]);
1061 prof2[i] = profile[i];
1065 memcpy(profile, prof2, bins*sizeof(double));
1069 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1070 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1073 double unit_factor = 1., R_MolarGasConst, diff;
1075 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1078 /* Not log? Nothing to do! */
1084 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1085 if (opt->unit == en_kT)
1089 else if (opt->unit == en_kJ)
1091 unit_factor = R_MolarGasConst*opt->Temperature;
1093 else if (opt->unit == en_kCal)
1095 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1099 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1102 for (i = 0; i < bins; i++)
1104 if (profile[i] > 0.0)
1106 profile[i] = -log(profile[i])*unit_factor;
1110 /* shift to zero at z=opt->zProf0 */
1111 if (!opt->bProf0Set)
1117 /* Get bin with shortest distance to opt->zProf0
1118 (-0.5 from bin position and +0.5 from rounding cancel) */
1119 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1124 else if (imin >= bins)
1128 diff = profile[imin];
1132 for (i = 0; i < bins; i++)
1138 //! Make an array of random integers (used for bootstrapping)
1139 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1141 int ipull, blockBase, nr, ipullRandom;
1143 if (blockLength == 0)
1145 blockLength = nPull;
1148 for (ipull = 0; ipull < nPull; ipull++)
1150 blockBase = (ipull/blockLength)*blockLength;
1152 { /* make sure nothing bad happens in the last block */
1153 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1154 ipullRandom = blockBase + nr;
1156 while (ipullRandom >= nPull);
1157 if (ipullRandom < 0 || ipullRandom >= nPull)
1159 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1160 "blockLength = %d, blockBase = %d\n",
1161 ipullRandom, nPull, nr, blockLength, blockBase);
1163 randomArray[ipull] = ipullRandom;
1165 /*for (ipull=0; ipull<nPull; ipull++)
1166 printf("%d ",randomArray[ipull]); printf("\n"); */
1169 /*! \brief Set pull group information of a synthetic histogram
1171 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1172 * but it is not required if we bootstrap complete histograms.
1174 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1175 t_UmbrellaWindow *thisWindow, int pullid)
1177 synthWindow->N [0] = thisWindow->N [pullid];
1178 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1179 synthWindow->pos [0] = thisWindow->pos [pullid];
1180 synthWindow->z [0] = thisWindow->z [pullid];
1181 synthWindow->k [0] = thisWindow->k [pullid];
1182 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1183 synthWindow->g [0] = thisWindow->g [pullid];
1184 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1187 /*! \brief Calculate cumulative distribution function of of all histograms.
1189 * This allow to create random number sequences
1190 * which are distributed according to the histograms. Required to generate
1191 * the "synthetic" histograms for the Bootstrap method
1193 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1194 t_UmbrellaOptions *opt, const char *fnhist)
1198 char *fn = 0, *buf = 0;
1201 if (opt->bs_verbose)
1203 snew(fn, strlen(fnhist)+10);
1204 snew(buf, strlen(fnhist)+10);
1205 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1206 fp = xvgropen(fn, "CDFs of umbrella windows", "z", "CDF", opt->oenv);
1210 for (i = 0; i < nWindows; i++)
1212 snew(window[i].cum, window[i].nPull);
1213 for (j = 0; j < window[i].nPull; j++)
1215 snew(window[i].cum[j], nbin+1);
1216 window[i].cum[j][0] = 0.;
1217 for (k = 1; k <= nbin; k++)
1219 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1222 /* normalize CDFs. Ensure cum[nbin]==1 */
1223 last = window[i].cum[j][nbin];
1224 for (k = 0; k <= nbin; k++)
1226 window[i].cum[j][k] /= last;
1231 printf("Cumulative distriubtion functions of all histograms created.\n");
1232 if (opt->bs_verbose)
1234 for (k = 0; k <= nbin; k++)
1236 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1237 for (i = 0; i < nWindows; i++)
1239 for (j = 0; j < window[i].nPull; j++)
1241 fprintf(fp, "%g\t", window[i].cum[j][k]);
1246 printf("Wrote cumulative distribution functions to %s\n", fn);
1254 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1256 * This is used to generate a random sequence distributed according to a histogram
1258 void searchCumulative(double xx[], int n, double x, int *j)
1280 else if (x == xx[n-1])
1290 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1291 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1292 int pullid, t_UmbrellaOptions *opt)
1294 int N, i, nbins, r_index, ibin;
1295 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1298 N = thisWindow->N[pullid];
1299 dt = thisWindow->dt;
1300 nbins = thisWindow->nBin;
1302 /* tau = autocorrelation time */
1303 if (opt->tauBootStrap > 0.0)
1305 tausteps = opt->tauBootStrap/dt;
1307 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1309 /* calc tausteps from g=1+2tausteps */
1310 g = thisWindow->g[pullid];
1316 "When generating hypothetical trajctories from given umbrella histograms,\n"
1317 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1318 "cannot be predicted. You have 3 options:\n"
1319 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1320 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1322 " with option -iiact for all umbrella windows.\n"
1323 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1324 " Use option (3) only if you are sure what you're doing, you may severely\n"
1325 " underestimate the error if a too small ACT is given.\n");
1326 gmx_fatal(FARGS, errstr);
1329 synthWindow->N [0] = N;
1330 synthWindow->pos [0] = thisWindow->pos[pullid];
1331 synthWindow->z [0] = thisWindow->z[pullid];
1332 synthWindow->k [0] = thisWindow->k[pullid];
1333 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1334 synthWindow->g [0] = thisWindow->g [pullid];
1335 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1337 for (i = 0; i < nbins; i++)
1339 synthWindow->Histo[0][i] = 0.;
1342 if (opt->bsMethod == bsMethod_trajGauss)
1344 sig = thisWindow->sigma [pullid];
1345 mu = thisWindow->aver [pullid];
1348 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1350 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1352 z = a*x + sqrt(1-a^2)*y
1353 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1354 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1356 C(t) = exp(-t/tau) with tau=-1/ln(a)
1358 Then, use error function to turn the Gaussian random variable into a uniformly
1359 distributed one in [0,1]. Eventually, use cumulative distribution function of
1360 histogram to get random variables distributed according to histogram.
1361 Note: The ACT of the flat distribution and of the generated histogram is not
1362 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1364 a = exp(-1.0/tausteps);
1366 invsqrt2 = 1./sqrt(2.0);
1368 /* init random sequence */
1369 x = gmx_rng_gaussian_table(opt->rng);
1371 if (opt->bsMethod == bsMethod_traj)
1373 /* bootstrap points from the umbrella histograms */
1374 for (i = 0; i < N; i++)
1376 y = gmx_rng_gaussian_table(opt->rng);
1378 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1379 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1381 r = 0.5*(1+gmx_erf(x*invsqrt2));
1382 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1383 synthWindow->Histo[0][r_index] += 1.;
1386 else if (opt->bsMethod == bsMethod_trajGauss)
1388 /* bootstrap points from a Gaussian with the same average and sigma
1389 as the respective umbrella histogram. The idea was, that -given
1390 limited sampling- the bootstrapped histograms are otherwise biased
1391 from the limited sampling of the US histos. However, bootstrapping from
1392 the Gaussian seems to yield a similar estimate. */
1396 y = gmx_rng_gaussian_table(opt->rng);
1399 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1404 while ( (ibin += nbins) < 0)
1409 else if (ibin >= nbins)
1411 while ( (ibin -= nbins) >= nbins)
1418 if (ibin >= 0 && ibin < nbins)
1420 synthWindow->Histo[0][ibin] += 1.;
1427 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1431 /*! \brief Write all histograms to a file
1433 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1434 * sets of bootstrapped histograms.
1436 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1437 int bs_index, t_UmbrellaOptions *opt)
1439 char *fn = 0, *buf = 0, title[256];
1445 snew(fn, strlen(fnhist)+10);
1446 snew(buf, strlen(fnhist)+1);
1447 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1448 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1452 fn = strdup(fnhist);
1453 strcpy(title, "Umbrella histograms");
1456 fp = xvgropen(fn, title, "z", "count", opt->oenv);
1459 /* Write histograms */
1460 for (l = 0; l < bins; ++l)
1462 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1463 for (i = 0; i < nWindows; ++i)
1465 for (j = 0; j < window[i].nPull; ++j)
1467 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1474 printf("Wrote %s\n", fn);
1482 //! Used for qsort to sort random numbers
1483 int func_wham_is_larger(const void *a, const void *b)
1502 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1503 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1510 /* generate ordered random numbers between 0 and nAllPull */
1511 for (i = 0; i < nAllPull-1; i++)
1513 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1515 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1516 r[nAllPull-1] = 1.0*nAllPull;
1518 synthwin[0].bsWeight[0] = r[0];
1519 for (i = 1; i < nAllPull; i++)
1521 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1524 /* avoid to have zero weight by adding a tiny value */
1525 for (i = 0; i < nAllPull; i++)
1527 if (synthwin[i].bsWeight[0] < 1e-5)
1529 synthwin[i].bsWeight[0] = 1e-5;
1536 //! The main bootstrapping routine
1537 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1538 char* ylabel, double *profile,
1539 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1541 t_UmbrellaWindow * synthWindow;
1542 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1543 int i, j, *randomArray = 0, winid, pullid, ib;
1544 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1546 gmx_bool bExact = FALSE;
1548 /* init random generator */
1549 if (opt->bsSeed == -1)
1551 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1555 opt->rng = gmx_rng_init(opt->bsSeed);
1558 snew(bsProfile, opt->bins);
1559 snew(bsProfiles_av, opt->bins);
1560 snew(bsProfiles_av2, opt->bins);
1562 /* Create array of all pull groups. Note that different windows
1563 may have different nr of pull groups
1564 First: Get total nr of pull groups */
1566 for (i = 0; i < nWindows; i++)
1568 nAllPull += window[i].nPull;
1570 snew(allPull_winId, nAllPull);
1571 snew(allPull_pullId, nAllPull);
1573 /* Setup one array of all pull groups */
1574 for (i = 0; i < nWindows; i++)
1576 for (j = 0; j < window[i].nPull; j++)
1578 allPull_winId[iAllPull] = i;
1579 allPull_pullId[iAllPull] = j;
1584 /* setup stuff for synthetic windows */
1585 snew(synthWindow, nAllPull);
1586 for (i = 0; i < nAllPull; i++)
1588 synthWindow[i].nPull = 1;
1589 synthWindow[i].nBin = opt->bins;
1590 snew(synthWindow[i].Histo, 1);
1591 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1593 snew(synthWindow[i].Histo[0], opt->bins);
1595 snew(synthWindow[i].N, 1);
1596 snew(synthWindow[i].pos, 1);
1597 snew(synthWindow[i].z, 1);
1598 snew(synthWindow[i].k, 1);
1599 snew(synthWindow[i].bContrib, 1);
1600 snew(synthWindow[i].g, 1);
1601 snew(synthWindow[i].bsWeight, 1);
1604 switch (opt->bsMethod)
1607 snew(randomArray, nAllPull);
1608 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1609 please_cite(stdout, "Hub2006");
1611 case bsMethod_BayesianHist:
1612 /* just copy all histogams into synthWindow array */
1613 for (i = 0; i < nAllPull; i++)
1615 winid = allPull_winId [i];
1616 pullid = allPull_pullId[i];
1617 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1621 case bsMethod_trajGauss:
1622 calc_cumulatives(window, nWindows, opt, fnhist);
1625 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1628 /* do bootstrapping */
1629 fp = xvgropen(fnprof, "Boot strap profiles", "z", ylabel, opt->oenv);
1630 for (ib = 0; ib < opt->nBootStrap; ib++)
1632 printf(" *******************************************\n"
1633 " ******** Start bootstrap nr %d ************\n"
1634 " *******************************************\n", ib+1);
1636 switch (opt->bsMethod)
1639 /* bootstrap complete histograms from given histograms */
1640 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1641 for (i = 0; i < nAllPull; i++)
1643 winid = allPull_winId [randomArray[i]];
1644 pullid = allPull_pullId[randomArray[i]];
1645 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1648 case bsMethod_BayesianHist:
1649 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1650 setRandomBsWeights(synthWindow, nAllPull, opt);
1653 case bsMethod_trajGauss:
1654 /* create new histos from given histos, that is generate new hypothetical
1656 for (i = 0; i < nAllPull; i++)
1658 winid = allPull_winId[i];
1659 pullid = allPull_pullId[i];
1660 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1665 /* write histos in case of verbose output */
1666 if (opt->bs_verbose)
1668 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1675 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1678 if ( (i%opt->stepUpdateContrib) == 0)
1680 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1682 if (maxchange < opt->Tolerance)
1686 if (((i%opt->stepchange) == 0 || i == 1) && !i == 0)
1688 printf("\t%4d) Maximum change %e\n", i, maxchange);
1690 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1693 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1694 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1698 prof_normalization_and_unit(bsProfile, opt);
1701 /* symmetrize profile around z=0 */
1704 symmetrizeProfile(bsProfile, opt);
1707 /* save stuff to get average and stddev */
1708 for (i = 0; i < opt->bins; i++)
1711 bsProfiles_av[i] += tmp;
1712 bsProfiles_av2[i] += tmp*tmp;
1713 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1719 /* write average and stddev */
1720 fp = xvgropen(fnres, "Average and stddev from bootstrapping", "z", ylabel, opt->oenv);
1721 fprintf(fp, "@TYPE xydy\n");
1722 for (i = 0; i < opt->bins; i++)
1724 bsProfiles_av [i] /= opt->nBootStrap;
1725 bsProfiles_av2[i] /= opt->nBootStrap;
1726 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1727 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1728 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1731 printf("Wrote boot strap result to %s\n", fnres);
1734 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1735 int whaminFileType(char *fn)
1739 if (strcmp(fn+len-3, "tpr") == 0)
1743 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1745 return whamin_pullxf;
1747 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1753 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1755 return whamin_unknown;
1758 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1759 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1760 t_UmbrellaOptions *opt)
1762 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1763 int nread, sizenow, i, block = 1;
1766 fp = ffopen(fn, "r");
1769 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1771 if (strlen(tmp) >= WHAM_MAXFILELEN)
1773 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1775 if (nread >= sizenow)
1778 srenew(filename, sizenow);
1779 for (i = sizenow-block; i < sizenow; i++)
1781 snew(filename[i], WHAM_MAXFILELEN);
1784 /* remove newline if there is one */
1785 if (tmp[strlen(tmp)-1] == '\n')
1787 tmp[strlen(tmp)-1] = '\0';
1789 strcpy(filename[nread], tmp);
1792 printf("Found file %s in %s\n", filename[nread], fn);
1796 *filenamesRet = filename;
1800 //! Open a file or a pipe to a gzipped file
1801 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1803 char Buffer[1024], gunzip[1024], *Path = 0;
1805 static gmx_bool bFirst = 1;
1807 /* gzipped pdo file? */
1808 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1810 /* search gunzip executable */
1811 if (!(Path = getenv("GMX_PATH_GZIP")))
1813 if (gmx_fexist("/bin/gunzip"))
1815 sprintf(gunzip, "%s", "/bin/gunzip");
1817 else if (gmx_fexist("/usr/bin/gunzip"))
1819 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1823 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1824 "You may want to define the path to gunzip "
1825 "with the environment variable GMX_PATH_GZIP.", gunzip);
1830 sprintf(gunzip, "%s/gunzip", Path);
1831 if (!gmx_fexist(gunzip))
1833 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1834 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1839 printf("Using gunzig executable %s\n", gunzip);
1842 if (!gmx_fexist(fn))
1844 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1846 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1849 printf("Executing command '%s'\n", Buffer);
1852 if ((pipe = popen(Buffer, "r")) == NULL)
1854 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1857 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1863 pipe = ffopen(fn, "r");
1870 //! Close file or pipe
1871 void pdo_close_file(FILE *fp)
1880 //! Reading all pdo files
1881 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1882 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1885 real mintmp, maxtmp, done = 0.;
1888 /* char Buffer0[1000]; */
1892 gmx_fatal(FARGS, "No files found. Hick.");
1895 /* if min and max are not given, get min and max from the input files */
1898 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1901 for (i = 0; i < nfiles; ++i)
1903 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1904 /*fgets(Buffer0,999,file);
1905 fprintf(stderr,"First line '%s'\n",Buffer0); */
1906 done = 100.0*(i+1)/nfiles;
1907 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1912 read_pdo_header(file, header, opt);
1913 /* here only determine min and max of this window */
1914 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1915 if (maxtmp > opt->max)
1919 if (mintmp < opt->min)
1925 pdo_close_file(file);
1933 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1934 if (opt->bBoundsOnly)
1936 printf("Found option -boundsonly, now exiting.\n");
1940 /* store stepsize in profile */
1941 opt->dz = (opt->max-opt->min)/opt->bins;
1943 /* Having min and max, we read in all files */
1944 /* Loop over all files */
1945 for (i = 0; i < nfiles; ++i)
1947 done = 100.0*(i+1)/nfiles;
1948 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1953 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1954 read_pdo_header(file, header, opt);
1955 /* load data into window */
1956 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1957 if ((window+i)->Ntot[0] == 0)
1959 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1963 pdo_close_file(file);
1971 for (i = 0; i < nfiles; ++i)
1978 //! translate 0/1 to N/Y to write pull dimensions
1979 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1981 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1982 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
1987 static int first = 1;
1989 /* printf("Reading %s \n",fn); */
1990 read_tpx_state(fn, &ir, &state, NULL, NULL);
1992 if (ir.ePull != epullUMBRELLA)
1994 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1995 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
1998 /* nr of pull groups */
1999 ngrp = ir.pull->ngrp;
2002 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull groups\n", ngrp);
2005 header->npullgrps = ir.pull->ngrp;
2006 header->pull_geometry = ir.pull->eGeom;
2007 copy_ivec(ir.pull->dim, header->pull_dim);
2008 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2009 if (header->pull_geometry == epullgPOS && header->pull_ndim > 1)
2011 gmx_fatal(FARGS, "Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
2012 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
2013 "If you have some special umbrella setup you may want to write your own pdo files\n"
2014 "and feed them into g_wham. Check g_wham -h !\n", header->pull_ndim);
2016 snew(header->k, ngrp);
2017 snew(header->init_dist, ngrp);
2018 snew(header->umbInitDist, ngrp);
2020 /* only z-direction with epullgCYL? */
2021 if (header->pull_geometry == epullgCYL)
2023 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2025 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2026 "However, found dimensions [%s %s %s]\n",
2027 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2028 int2YN(header->pull_dim[ZZ]));
2032 for (i = 0; i < ngrp; i++)
2034 header->k[i] = ir.pull->grp[i+1].k;
2035 if (header->k[i] == 0.0)
2037 gmx_fatal(FARGS, "Pull group %d has force constant of of 0.0 in %s.\n"
2038 "That doesn't seem to be an Umbrella tpr.\n",
2041 copy_rvec(ir.pull->grp[i+1].init, header->init_dist[i]);
2043 /* initial distance to reference */
2044 switch (header->pull_geometry)
2047 for (d = 0; d < DIM; d++)
2049 if (header->pull_dim[d])
2051 header->umbInitDist[i] = header->init_dist[i][d];
2056 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
2060 header->umbInitDist[i] = header->init_dist[i][0];
2063 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2067 if (opt->verbose || first)
2069 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
2070 fn, header->npullgrps, epullg_names[header->pull_geometry],
2071 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2073 for (i = 0; i < ngrp; i++)
2075 printf("\tgrp %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2078 if (!opt->verbose && first)
2080 printf("\tUse option -v to see this output for all input tpr files\n");
2086 //! 2-norm in a ndim-dimensional space
2087 double dist_ndim(double **dx, int ndim, int line)
2091 for (i = 0; i < ndim; i++)
2093 r2 += sqr(dx[i][line]);
2098 //! Read pullx.xvg or pullf.xvg
2099 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2100 t_UmbrellaWindow * window,
2101 t_UmbrellaOptions *opt,
2102 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2103 t_groupselection *groupsel)
2105 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2106 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerGrp, nColRefOnce, nColRefEachGrp, nColExpect, ntot;
2107 real min, max, minfound = 1e20, maxfound = -1e20;
2108 gmx_bool dt_ok, timeok, bHaveForce;
2109 const char *quantity;
2110 const int blocklen = 4096;
2112 static gmx_bool bFirst = TRUE;
2115 in force output pullf.xvg:
2116 No reference, one column per pull group
2117 in position output pullx.xvg (not cylinder)
2118 ndim reference, ndim columns per pull group
2119 in position output pullx.xvg (in geometry cylinder):
2120 ndim*2 columns per pull group (ndim for ref, ndim for group)
2123 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
2124 quantity = opt->bPullx ? "position" : "force";
2128 if (header->pull_geometry == epullgCYL)
2130 /* Geometry cylinder -> reference group before each pull group */
2131 nColRefEachGrp = header->pull_ndim;
2136 /* Geometry NOT cylinder -> reference group only once after time column */
2138 nColRefOnce = header->pull_ndim;
2141 else /* read forces, no reference groups */
2147 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
2148 bHaveForce = opt->bPullf;
2150 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2151 That avoids the somewhat tedious extraction of the right columns from the pullx files
2152 to compute the distances projection on the vector. Sorry for the laziness. */
2153 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2156 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2157 "reading \n(option -if) is supported at present, "
2158 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2159 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2160 epullg_names[header->pull_geometry]);
2163 nt = read_xvg(fn, &y, &ny);
2165 /* Check consistency */
2168 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2172 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2173 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2175 printf("Expecting these columns in pull file:\n"
2176 "\t%d reference columns for all pull groups together\n"
2177 "\t%d reference columns for each individual pull group\n"
2178 "\t%d data columns for each pull group\n", nColRefOnce, nColRefEachGrp, nColPerGrp);
2179 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullgrps, nColExpect);
2182 if (ny != nColExpect)
2184 gmx_fatal(FARGS, "Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n"
2185 "\nMaybe you confused options -ix and -if ?\n",
2186 header->npullgrps, fntpr, ny-1, fn, nColExpect-1);
2191 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerGrp, quantity, fn);
2201 window->dt = y[0][1]-y[0][0];
2203 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2205 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2208 /* Need to alocate memory and set up structure */
2212 /* Use only groups selected with option -is file */
2213 if (header->npullgrps != groupsel->n)
2215 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2216 header->npullgrps, groupsel->n);
2218 window->nPull = groupsel->nUse;
2222 window->nPull = header->npullgrps;
2225 window->nBin = bins;
2226 snew(window->Histo, window->nPull);
2227 snew(window->z, window->nPull);
2228 snew(window->k, window->nPull);
2229 snew(window->pos, window->nPull);
2230 snew(window->N, window->nPull);
2231 snew(window->Ntot, window->nPull);
2232 snew(window->g, window->nPull);
2233 snew(window->bsWeight, window->nPull);
2234 window->bContrib = 0;
2236 if (opt->bCalcTauInt)
2238 snew(window->ztime, window->nPull);
2242 window->ztime = NULL;
2244 snew(lennow, window->nPull);
2246 for (g = 0; g < window->nPull; ++g)
2249 window->bsWeight[g] = 1.;
2250 snew(window->Histo[g], bins);
2252 window->Ntot[g] = 0;
2254 if (opt->bCalcTauInt)
2256 window->ztime[g] = NULL;
2260 /* Copying umbrella center and force const is more involved since not
2261 all pull groups from header (tpr file) may be used in window variable */
2262 for (g = 0, gUsed = 0; g < header->npullgrps; ++g)
2264 if (groupsel && (groupsel->bUse[g] == FALSE))
2268 window->k[gUsed] = header->k[g];
2269 window->pos[gUsed] = header->umbInitDist[g];
2274 { /* only determine min and max */
2277 min = max = bins = 0; /* Get rid of warnings */
2281 for (i = 0; i < nt; i++)
2283 /* Do you want that time frame? */
2284 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2286 /* get time step of pdo file and get dstep from opt->dt */
2296 dstep = static_cast<int>(opt->dt/dt+0.5);
2304 window->dt = dt*dstep;
2308 dt_ok = (i%dstep == 0);
2309 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2311 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2312 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2316 /* Note: if groupsel == NULL:
2317 * all groups in pullf/x file are stored in this window, and gUsed == g
2318 * if groupsel != NULL:
2319 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2322 for (g = 0; g < header->npullgrps; ++g)
2324 /* was this group selected for application in WHAM? */
2325 if (groupsel && (groupsel->bUse[g] == FALSE))
2334 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
2336 pos = -force/header->k[g] + header->umbInitDist[g];
2340 switch (header->pull_geometry)
2343 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
2344 Distance to reference: */
2345 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
2346 pos = dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp, header->pull_ndim, i);
2350 Time ref[ndim] group1[ndim] group2[ndim] ... */
2353 Time ref1[ndim] group1[ndim] ref2[ndim] group2[ndim] ... */
2355 /* * with geometry==position, we have the reference once (nColRefOnce==ndim), but
2356 no extra reference group columns before each group (nColRefEachGrp==0)
2358 * with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
2359 but ndim ref group colums before every group (nColRefEachGrp==ndim)
2360 Distance to reference: */
2361 pos = y[1 + nColRefOnce + nColRefEachGrp + g*(nColRefEachGrp+nColPerGrp)][i];
2364 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2368 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2382 if (gUsed >= window->nPull)
2384 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2385 gUsed, window->nPull);
2388 if (opt->bCalcTauInt && !bGetMinMax)
2390 /* save time series for autocorrelation analysis */
2391 ntot = window->Ntot[gUsed];
2392 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2393 if (ntot >= lennow[gUsed])
2395 lennow[gUsed] += blocklen;
2396 srenew(window->ztime[gUsed], lennow[gUsed]);
2398 window->ztime[gUsed][ntot] = pos;
2401 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2406 while ( (ibin += bins) < 0)
2411 else if (ibin >= bins)
2413 while ( (ibin -= bins) >= bins)
2419 if (ibin >= 0 && ibin < bins)
2421 window->Histo[gUsed][ibin] += 1.;
2424 window->Ntot[gUsed]++;
2428 else if (t > opt->tmax)
2432 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2444 for (i = 0; i < ny; i++)
2450 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2451 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2452 t_UmbrellaHeader* header,
2453 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2456 real mintmp, maxtmp;
2458 printf("Reading %d tpr and pullf files\n", nfiles/2);
2460 /* min and max not given? */
2463 printf("Automatic determination of boundaries...\n");
2466 for (i = 0; i < nfiles; i++)
2468 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2470 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2472 read_tpr_header(fnTprs[i], header, opt);
2473 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2475 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2477 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2478 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2479 if (maxtmp > opt->max)
2483 if (mintmp < opt->min)
2488 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2489 if (opt->bBoundsOnly)
2491 printf("Found option -boundsonly, now exiting.\n");
2495 /* store stepsize in profile */
2496 opt->dz = (opt->max-opt->min)/opt->bins;
2498 for (i = 0; i < nfiles; i++)
2500 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2502 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2504 read_tpr_header(fnTprs[i], header, opt);
2505 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2507 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2509 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2510 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2511 if (window[i].Ntot[0] == 0)
2513 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2517 for (i = 0; i < nfiles; i++)
2526 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2528 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2529 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2531 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt,
2534 int nlines, ny, i, ig;
2537 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2538 nlines = read_xvg(fn, &iact, &ny);
2539 if (nlines != nwins)
2541 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2544 for (i = 0; i < nlines; i++)
2546 if (window[i].nPull != ny)
2548 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2549 "number of pull groups is different in different simulations. That is not\n"
2550 "supported yet. Sorry.\n");
2552 for (ig = 0; ig < window[i].nPull; ig++)
2554 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2555 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2557 if (iact[ig][i] <= 0.0)
2559 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2566 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2569 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2570 * that -in case of limited sampling- the ACT may be severely underestimated.
2571 * Note: the g=1+2tau are overwritten.
2572 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2575 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2578 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2580 /* only evaluate within +- 3sigma of the Gausian */
2581 siglim = 3.0*opt->sigSmoothIact;
2582 siglim2 = dsqr(siglim);
2583 /* pre-factor of Gaussian */
2584 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2585 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2587 for (i = 0; i < nwins; i++)
2589 snew(window[i].tausmooth, window[i].nPull);
2590 for (ig = 0; ig < window[i].nPull; ig++)
2594 pos = window[i].pos[ig];
2595 for (j = 0; j < nwins; j++)
2597 for (jg = 0; jg < window[j].nPull; jg++)
2599 dpos2 = dsqr(window[j].pos[jg]-pos);
2600 if (dpos2 < siglim2)
2602 w = gaufact*exp(-dpos2*invtwosig2);
2604 tausm += w*window[j].tau[jg];
2605 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2606 w,dpos2,pos,gaufact,invtwosig2); */
2611 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2613 window[i].tausmooth[ig] = tausm;
2617 window[i].tausmooth[ig] = window[i].tau[ig];
2619 window[i].g[ig] = 1+2*tausm/window[i].dt;
2624 //! Stop integrating autoccorelation function when ACF drops under this value
2625 #define WHAM_AC_ZERO_LIMIT 0.05
2627 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2629 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2630 t_UmbrellaOptions *opt, const char *fn)
2632 int i, ig, ncorr, ntot, j, k, *count, restart;
2633 real *corr, c0, dt, tmp;
2634 real *ztime, av, tausteps;
2635 FILE *fp, *fpcorr = 0;
2639 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2640 "time [ps]", "autocorrelation function", opt->oenv);
2644 for (i = 0; i < nwins; i++)
2646 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2648 ntot = window[i].Ntot[0];
2650 /* using half the maximum time as length of autocorrelation function */
2654 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2655 " points. Provide more pull data!", ntot);
2658 /* snew(corrSq,ncorr); */
2661 snew(window[i].tau, window[i].nPull);
2662 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2668 for (ig = 0; ig < window[i].nPull; ig++)
2670 if (ntot != window[i].Ntot[ig])
2672 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2673 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2675 ztime = window[i].ztime[ig];
2677 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2678 for (j = 0, av = 0; (j < ntot); j++)
2683 for (k = 0; (k < ncorr); k++)
2688 for (j = 0; (j < ntot); j += restart)
2690 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2692 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2694 /* corrSq[k] += tmp*tmp; */
2698 /* divide by nr of frames for each time displacement */
2699 for (k = 0; (k < ncorr); k++)
2701 /* count probably = (ncorr-k+(restart-1))/restart; */
2702 corr[k] = corr[k]/count[k];
2703 /* variance of autocorrelation function */
2704 /* corrSq[k]=corrSq[k]/count[k]; */
2706 /* normalize such that corr[0] == 0 */
2708 for (k = 0; (k < ncorr); k++)
2711 /* corrSq[k]*=c0*c0; */
2714 /* write ACFs in verbose mode */
2717 for (k = 0; (k < ncorr); k++)
2719 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2721 fprintf(fpcorr, "&\n");
2724 /* esimate integrated correlation time, fitting is too unstable */
2725 tausteps = 0.5*corr[0];
2726 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2727 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2729 tausteps += corr[j];
2732 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2733 Kumar et al, eq. 28 ff. */
2734 window[i].tau[ig] = tausteps*dt;
2735 window[i].g[ig] = 1+2*tausteps;
2736 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2747 /* plot IACT along reaction coordinate */
2748 fp = xvgropen(fn, "Integrated autocorrelation times", "z", "IACT [ps]", opt->oenv);
2749 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2750 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2751 for (i = 0; i < nwins; i++)
2753 fprintf(fp, "# %3d ", i);
2754 for (ig = 0; ig < window[i].nPull; ig++)
2756 fprintf(fp, " %11g", window[i].tau[ig]);
2760 for (i = 0; i < nwins; i++)
2762 for (ig = 0; ig < window[i].nPull; ig++)
2764 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2767 if (opt->sigSmoothIact > 0.0)
2769 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2770 opt->sigSmoothIact);
2771 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2772 smoothIact(window, nwins, opt);
2773 fprintf(fp, "&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2774 fprintf(fp, "@ s1 symbol color 2\n");
2775 for (i = 0; i < nwins; i++)
2777 for (ig = 0; ig < window[i].nPull; ig++)
2779 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2784 printf("Wrote %s\n", fn);
2788 * compute average and sigma of each umbrella histogram
2790 void averageSigma(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2793 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2795 for (i = 0; i < nwins; i++)
2797 snew(window[i].aver, window[i].nPull);
2798 snew(window[i].sigma, window[i].nPull);
2800 ntot = window[i].Ntot[0];
2801 for (ig = 0; ig < window[i].nPull; ig++)
2803 ztime = window[i].ztime[ig];
2804 for (k = 0, av = 0.; k < ntot; k++)
2809 for (k = 0, sum2 = 0.; k < ntot; k++)
2814 sig = sqrt(sum2/ntot);
2815 window[i].aver[ig] = av;
2817 /* Note: This estimate for sigma is biased from the limited sampling.
2818 Correct sigma by n/(n-1) where n = number of independent
2819 samples. Only possible if IACT is known.
2823 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2824 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2828 window[i].sigma[ig] = sig;
2830 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2837 * Use histograms to compute average force on pull group.
2839 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2841 int i, j, bins = opt->bins, k;
2842 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2845 dz = (max-min)/bins;
2846 ztot = opt->max-min;
2849 /* Compute average displacement from histograms */
2850 for (j = 0; j < nWindows; ++j)
2852 snew(window[j].forceAv, window[j].nPull);
2853 for (k = 0; k < window[j].nPull; ++k)
2857 for (i = 0; i < opt->bins; ++i)
2859 temp = (1.0*i+0.5)*dz+min;
2860 distance = temp - window[j].pos[k];
2862 { /* in cyclic wham: */
2863 if (distance > ztot_half) /* |distance| < ztot_half */
2867 else if (distance < -ztot_half)
2872 w = window[j].Histo[k][i]/window[j].g[k];
2873 displAv += w*distance;
2875 /* Are we near min or max? We are getting wrong forces from the histgrams since
2876 the histograms are zero outside [min,max). Therefore, assume that the position
2877 on the other side of the histomgram center is equally likely. */
2880 posmirrored = window[j].pos[k]-distance;
2881 if (posmirrored >= max || posmirrored < min)
2883 displAv += -w*distance;
2890 /* average force from average displacement */
2891 window[j].forceAv[k] = displAv*window[j].k[k];
2892 /* sigma from average square displacement */
2893 /* window[j].sigma [k] = sqrt(displAv2); */
2894 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2900 * Check if the complete reaction coordinate is covered by the histograms
2902 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2903 t_UmbrellaOptions *opt)
2905 int i, ig, j, bins = opt->bins, bBoundary;
2906 real avcount = 0, z, relcount, *count;
2907 snew(count, opt->bins);
2909 for (j = 0; j < opt->bins; ++j)
2911 for (i = 0; i < nwins; i++)
2913 for (ig = 0; ig < window[i].nPull; ig++)
2915 count[j] += window[i].Histo[ig][j];
2918 avcount += 1.0*count[j];
2921 for (j = 0; j < bins; ++j)
2923 relcount = count[j]/avcount;
2924 z = (j+0.5)*opt->dz+opt->min;
2925 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2926 /* check for bins with no data */
2929 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2930 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2932 /* and check for poor sampling */
2933 else if (relcount < 0.005 && !bBoundary)
2935 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2941 /*! \brief Compute initial potential by integrating the average force
2943 * This speeds up the convergence by roughly a factor of 2
2945 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt,
2948 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2949 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2952 dz = (opt->max-min)/bins;
2954 printf("Getting initial potential by integration.\n");
2956 /* Compute average displacement from histograms */
2957 computeAverageForce(window, nWindows, opt);
2959 /* Get force for each bin from all histograms in this bin, or, alternatively,
2960 if no histograms are inside this bin, from the closest histogram */
2963 for (j = 0; j < opt->bins; ++j)
2965 pos = (1.0*j+0.5)*dz+min;
2969 groupmin = winmin = 0;
2970 for (i = 0; i < nWindows; i++)
2972 for (ig = 0; ig < window[i].nPull; ig++)
2974 hispos = window[i].pos[ig];
2975 dist = fabs(hispos-pos);
2976 /* average force within bin */
2980 fAv += window[i].forceAv[ig];
2982 /* at the same time, rememer closest histogram */
2991 /* if no histogram found in this bin, use closest histogram */
2998 fAv = window[winmin].forceAv[groupmin];
3002 for (j = 1; j < opt->bins; ++j)
3004 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3007 /* cyclic wham: linearly correct possible offset */
3010 diff = (pot[bins-1]-pot[0])/(bins-1);
3011 for (j = 1; j < opt->bins; ++j)
3018 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", "z", "PMF [kJ/mol]", opt->oenv);
3019 for (j = 0; j < opt->bins; ++j)
3021 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3024 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3027 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3028 energy offsets which are usually determined by wham
3029 First: turn pot into probabilities:
3031 for (j = 0; j < opt->bins; ++j)
3033 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3035 calc_z(pot, window, nWindows, opt, TRUE);
3041 //! Count number of words in an line
3042 static int wordcount(char *ptr)
3047 if (strlen(ptr) == 0)
3051 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3053 for (i = 0; (ptr[i] != '\0'); i++)
3055 is[cur] = isspace(ptr[i]);
3056 if ((i > 0) && (is[cur] && !is[1-cur]))
3065 /*! \brief Read input file for pull group selection (option -is)
3067 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3069 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3072 int i, iline, n, len = STRLEN, temp;
3073 char *ptr = 0, *tmpbuf = 0;
3074 char fmt[1024], fmtign[1024];
3075 int block = 1, sizenow;
3077 fp = ffopen(opt->fnGroupsel, "r");
3078 opt->groupsel = NULL;
3083 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3088 if (iline >= sizenow)
3091 srenew(opt->groupsel, sizenow);
3093 opt->groupsel[iline].n = n;
3094 opt->groupsel[iline].nUse = 0;
3095 snew(opt->groupsel[iline].bUse, n);
3098 for (i = 0; i < n; i++)
3100 strcpy(fmt, fmtign);
3102 if (sscanf(ptr, fmt, &temp))
3104 opt->groupsel[iline].bUse[i] = (temp > 0);
3105 if (opt->groupsel[iline].bUse[i])
3107 opt->groupsel[iline].nUse++;
3110 strcat(fmtign, "%*s");
3114 opt->nGroupsel = iline;
3115 if (nTpr != opt->nGroupsel)
3117 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3121 printf("\nUse only these pull groups:\n");
3122 for (iline = 0; iline < nTpr; iline++)
3124 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3125 for (i = 0; i < opt->groupsel[iline].n; i++)
3127 if (opt->groupsel[iline].bUse[i])
3140 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3142 /*! Number of elements in fnm (used for command line parsing) */
3143 #define NFILE asize(fnm)
3145 //! The main g_wham routine
3146 int gmx_wham(int argc, char *argv[])
3148 const char *desc[] = {
3149 "This is an analysis program that implements the Weighted",
3150 "Histogram Analysis Method (WHAM). It is intended to analyze",
3151 "output files generated by umbrella sampling simulations to ",
3152 "compute a potential of mean force (PMF). [PAR] ",
3153 "At present, three input modes are supported.[BR]",
3154 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3155 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3156 " AND, with option [TT]-ix[tt], a file which contains file names of",
3157 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3158 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3159 " first pullx, etc.[BR]",
3160 "[TT]*[tt] Same as the previous input mode, except that the the user",
3161 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3162 " From the pull force the position in the umbrella potential is",
3163 " computed. This does not work with tabulated umbrella potentials.[BR]"
3164 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3165 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3166 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3167 " feed them with the [TT]-ip[tt] option into to [TT]g_wham[tt]. The [TT].pdo[tt] file header",
3168 " must be similar to the following:[PAR]",
3169 "[TT]# UMBRELLA 3.0[BR]",
3170 "# Component selection: 0 0 1[BR]",
3172 "# Ref. Group 'TestAtom'[BR]",
3173 "# Nr. of pull groups 2[BR]",
3174 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3175 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3177 "The number of pull groups, umbrella positions, force constants, and names ",
3178 "may (of course) differ. Following the header, a time column and ",
3179 "a data column for each pull group follows (i.e. the displacement",
3180 "with respect to the umbrella center). Up to four pull groups are possible ",
3181 "per [TT].pdo[tt] file at present.[PAR]",
3182 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3183 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3184 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3185 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3186 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3187 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3188 "used, groupsel.dat looks like this:[BR]",
3192 "By default, the output files are[BR]",
3193 " [TT]-o[tt] PMF output file[BR]",
3194 " [TT]-hist[tt] Histograms output file[BR]",
3195 "Always check whether the histograms sufficiently overlap.[PAR]",
3196 "The umbrella potential is assumed to be harmonic and the force constants are ",
3197 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3198 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3199 "WHAM OPTIONS[BR]------------[BR]",
3200 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3201 " [TT]-temp[tt] Temperature in the simulations[BR]",
3202 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3203 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3204 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3205 "The data points that are used to compute the profile",
3206 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3207 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3208 "umbrella window.[PAR]",
3209 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3210 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3211 "With energy output, the energy in the first bin is defined to be zero. ",
3212 "If you want the free energy at a different ",
3213 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3214 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3215 "without osmotic gradient), the option [TT]-cycl[tt] is useful. [TT]g_wham[tt] will make use of the ",
3216 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3217 "reaction coordinate will assumed be be neighbors.[PAR]",
3218 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3219 "which may be useful for, e.g. membranes.[PAR]",
3220 "AUTOCORRELATIONS[BR]----------------[BR]",
3221 "With [TT]-ac[tt], [TT]g_wham[tt] estimates the integrated autocorrelation ",
3222 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3223 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3224 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3225 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3226 "Because the IACTs can be severely underestimated in case of limited ",
3227 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3228 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3229 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3230 "integration of the ACFs while the ACFs are larger 0.05.",
3231 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3232 "less robust) method such as fitting to a double exponential, you can ",
3233 "compute the IACTs with [TT]g_analyze[tt] and provide them to [TT]g_wham[tt] with the file ",
3234 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3235 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3236 "ERROR ANALYSIS[BR]--------------[BR]",
3237 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3238 "otherwise the statistical error may be substantially underestimated. ",
3239 "More background and examples for the bootstrap technique can be found in ",
3240 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3241 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3242 "Four bootstrapping methods are supported and ",
3243 "selected with [TT]-bs-method[tt].[BR]",
3244 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3245 "data points, and the bootstrap is carried out by assigning random weights to the ",
3246 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3247 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3248 "statistical error is underestimated.[BR]",
3249 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3250 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3251 "(allowing duplication, i.e. sampling with replacement).",
3252 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3253 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3254 "divided into blocks and only histograms within each block are mixed. Note that ",
3255 "the histograms within each block must be representative for all possible histograms, ",
3256 "otherwise the statistical error is underestimated.[BR]",
3257 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3258 "such that the generated data points are distributed according the given histograms ",
3259 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3260 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3261 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3262 "Note that this method may severely underestimate the error in case of limited sampling, ",
3263 "that is if individual histograms do not represent the complete phase space at ",
3264 "the respective positions.[BR]",
3265 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3266 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3267 "and width of the umbrella histograms. That method yields similar error estimates ",
3268 "like method [TT]traj[tt].[PAR]"
3269 "Bootstrapping output:[BR]",
3270 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3271 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3272 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3273 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3277 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3278 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3279 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3281 static t_UmbrellaOptions opt;
3284 { "-min", FALSE, etREAL, {&opt.min},
3285 "Minimum coordinate in profile"},
3286 { "-max", FALSE, etREAL, {&opt.max},
3287 "Maximum coordinate in profile"},
3288 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3289 "Determine min and max automatically"},
3290 { "-bins", FALSE, etINT, {&opt.bins},
3291 "Number of bins in profile"},
3292 { "-temp", FALSE, etREAL, {&opt.Temperature},
3294 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3296 { "-v", FALSE, etBOOL, {&opt.verbose},
3298 { "-b", FALSE, etREAL, {&opt.tmin},
3299 "First time to analyse (ps)"},
3300 { "-e", FALSE, etREAL, {&opt.tmax},
3301 "Last time to analyse (ps)"},
3302 { "-dt", FALSE, etREAL, {&opt.dt},
3303 "Analyse only every dt ps"},
3304 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3305 "Write histograms and exit"},
3306 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3307 "Determine min and max and exit (with [TT]-auto[tt])"},
3308 { "-log", FALSE, etBOOL, {&opt.bLog},
3309 "Calculate the log of the profile before printing"},
3310 { "-unit", FALSE, etENUM, {en_unit},
3311 "Energy unit in case of log output" },
3312 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3313 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3314 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3315 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3316 { "-sym", FALSE, etBOOL, {&opt.bSym},
3317 "Symmetrize profile around z=0"},
3318 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3319 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3320 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3321 "Calculate integrated autocorrelation times and use in wham"},
3322 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3323 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3324 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3325 "When computing autocorrelation functions, restart computing every .. (ps)"},
3326 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3327 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3328 "during smoothing"},
3329 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3330 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3331 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3332 "Bootstrap method" },
3333 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3334 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3335 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3336 "Seed for bootstrapping. (-1 = use time)"},
3337 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3338 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3339 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3340 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3341 { "-stepout", FALSE, etINT, {&opt.stepchange},
3342 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3343 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3344 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3348 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3349 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3350 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3351 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3352 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3353 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3354 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3355 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3356 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3357 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3358 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3359 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3362 int i, j, l, nfiles, nwins, nfiles2;
3363 t_UmbrellaHeader header;
3364 t_UmbrellaWindow * window = NULL;
3365 double *profile, maxchange = 1e20;
3366 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3367 char **fninTpr, **fninPull, **fninPdo;
3369 FILE *histout, *profout;
3370 char ylabel[256], title[256];
3373 opt.verbose = FALSE;
3374 opt.bHistOnly = FALSE;
3384 /* bootstrapping stuff */
3386 opt.bsMethod = bsMethod_hist;
3387 opt.tauBootStrap = 0.0;
3389 opt.histBootStrapBlockLength = 8;
3390 opt.bs_verbose = FALSE;
3395 opt.Temperature = 298;
3396 opt.Tolerance = 1e-6;
3397 opt.bBoundsOnly = FALSE;
3399 opt.bCalcTauInt = FALSE;
3400 opt.sigSmoothIact = 0.0;
3401 opt.bAllowReduceIact = TRUE;
3402 opt.bInitPotByIntegration = TRUE;
3403 opt.acTrestart = 1.0;
3404 opt.stepchange = 100;
3405 opt.stepUpdateContrib = 100;
3407 parse_common_args(&argc, argv, PCA_BE_NICE,
3408 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv);
3410 opt.unit = nenum(en_unit);
3411 opt.bsMethod = nenum(en_bsMethod);
3413 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3415 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3416 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3417 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3418 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3419 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3420 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3421 if (opt.bTab && opt.bPullf)
3423 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3424 "Provide pullx.xvg or pdo files!");
3427 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3429 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3431 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3433 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3434 "\n\n Check g_wham -h !");
3437 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3438 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3439 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3440 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3441 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3443 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3444 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3445 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3446 if ( (bMinSet || bMaxSet) && bAutoSet)
3448 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3451 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3453 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3456 if (bMinSet && opt.bAuto)
3458 printf("Note: min and max given, switching off -auto.\n");
3462 if (opt.bTauIntGiven && opt.bCalcTauInt)
3464 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3465 "the autocorrelation times. Not both.");
3468 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3470 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3471 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3473 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3475 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3476 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3479 /* Reading gmx4 pull output and tpr files */
3480 if (opt.bTpr || opt.bPullf || opt.bPullx)
3482 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3484 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3485 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3486 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3487 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3488 if (nfiles != nfiles2)
3490 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3491 opt.fnTpr, nfiles2, fnPull);
3494 /* Read file that selects the pull group to be used */
3495 if (opt.fnGroupsel != NULL)
3497 readPullGroupSelection(&opt, fninTpr, nfiles);
3500 window = initUmbrellaWindows(nfiles);
3501 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3504 { /* reading pdo files */
3505 if (opt.fnGroupsel != NULL)
3507 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3508 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3510 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3511 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3512 window = initUmbrellaWindows(nfiles);
3513 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3517 /* enforce equal weight for all histograms? */
3520 enforceEqualWeights(window, nwins);
3523 /* write histograms */
3524 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3525 "z", "count", opt.oenv);
3526 for (l = 0; l < opt.bins; ++l)
3528 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3529 for (i = 0; i < nwins; ++i)
3531 for (j = 0; j < window[i].nPull; ++j)
3533 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3536 fprintf(histout, "\n");
3539 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3542 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3546 /* Using tabulated umbrella potential */
3549 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3552 /* Integrated autocorrelation times provided ? */
3553 if (opt.bTauIntGiven)
3555 readIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-iiact", NFILE, fnm));
3558 /* Compute integrated autocorrelation times */
3559 if (opt.bCalcTauInt)
3561 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3564 /* calc average and sigma for each histogram
3565 (maybe required for bootstrapping. If not, this is fast anyhow) */
3566 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3568 averageSigma(window, nwins, &opt);
3571 /* Get initial potential by simple integration */
3572 if (opt.bInitPotByIntegration)
3574 guessPotByIntegration(window, nwins, &opt, 0);
3577 /* Check if complete reaction coordinate is covered */
3578 checkReactionCoordinateCovered(window, nwins, &opt);
3580 /* Calculate profile */
3581 snew(profile, opt.bins);
3589 if ( (i%opt.stepUpdateContrib) == 0)
3591 setup_acc_wham(profile, window, nwins, &opt);
3593 if (maxchange < opt.Tolerance)
3596 /* if (opt.verbose) */
3597 printf("Switched to exact iteration in iteration %d\n", i);
3599 calc_profile(profile, window, nwins, &opt, bExact);
3600 if (((i%opt.stepchange) == 0 || i == 1) && !i == 0)
3602 printf("\t%4d) Maximum change %e\n", i, maxchange);
3606 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3607 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3609 /* calc error from Kumar's formula */
3610 /* Unclear how the error propagates along reaction coordinate, therefore
3612 /* calc_error_kumar(profile,window, nwins,&opt); */
3614 /* Write profile in energy units? */
3617 prof_normalization_and_unit(profile, &opt);
3618 strcpy(ylabel, en_unit_label[opt.unit]);
3619 strcpy(title, "Umbrella potential");
3623 strcpy(ylabel, "Density of states");
3624 strcpy(title, "Density of states");
3627 /* symmetrize profile around z=0? */
3630 symmetrizeProfile(profile, &opt);
3633 /* write profile or density of states */
3634 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, "z", ylabel, opt.oenv);
3635 for (i = 0; i < opt.bins; ++i)
3637 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3640 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3642 /* Bootstrap Method */
3645 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3646 opt2fn("-hist", NFILE, fnm),
3647 ylabel, profile, window, nwins, &opt);
3651 freeUmbrellaWindows(window, nfiles);
3653 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3654 please_cite(stdout, "Hub2010");