1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
38 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
40 * \author Jochen Hub <jhub@gwdg.de>
56 #include "gromacs/fileio/tpxio.h"
58 #include "gmx_random.h"
65 //! longest file names allowed in input files
66 #define WHAM_MAXFILELEN 2048
69 * enum for energy units
72 enSel, en_kJ, en_kCal, en_kT, enNr
75 * enum for type of input files (pdos, tpr, or pullf)
78 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
81 /*! \brief enum for bootstrapping method
83 * These bootstrap methods are supported:
84 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
85 * (bsMethod_BayesianHist)
86 * - bootstrap complete histograms (bsMethod_hist)
87 * - bootstrap trajectories from given umbrella histograms. This generates new
88 * "synthetic" histograms (bsMethod_traj)
89 * - bootstrap trajectories from Gaussian with mu/sigma computed from
90 * the respective histogram (bsMethod_trajGauss). This gives very similar
91 * results compared to bsMethod_traj.
93 * ********************************************************************
94 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
95 * JS Hub, BL de Groot, D van der Spoel
96 * g_wham - A free weighted histogram analysis implementation including
97 * robust error and autocorrelation estimates,
98 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
99 * ********************************************************************
102 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
103 bsMethod_traj, bsMethod_trajGauss
107 //! Parameters of the umbrella potentials
111 * \name Using umbrella pull code since gromacs 4.x
114 int npullgrps; //!< nr of pull groups in tpr file
115 int pull_geometry; //!< such as distance, position
116 ivec pull_dim; //!< pull dimension with geometry distance
117 int pull_ndim; //!< nr of pull_dim != 0
118 real *k; //!< force constants in tpr file
119 rvec *init_dist; //!< reference displacements
120 real *umbInitDist; //!< reference displacement in umbrella direction
123 * \name Using PDO files common until gromacs 3.x
131 char PullName[4][256];
133 double UmbCons[4][3];
137 //! Data in the umbrella histograms
140 int nPull; //!< nr of pull groups in this pdo or pullf/x file
141 double **Histo; //!< nPull histograms
142 double **cum; //!< nPull cumulative distribution functions
143 int nBin; //!< nr of bins. identical to opt->bins
144 double *k; //!< force constants for the nPull groups
145 double *pos; //!< umbrella positions for the nPull groups
146 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
147 int *N; //!< nr of data points in nPull histograms.
148 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
150 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
152 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
153 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
156 double *tau; //!< intetrated autocorrelation time (IACT)
157 double *tausmooth; //!< smoothed IACT
159 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
161 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
163 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
165 /*! \brief average force estimated from average displacement, fAv=dzAv*k
167 * Used for integration to guess the potential.
170 real *aver; //!< average of histograms
171 real *sigma; //!< stddev of histograms
172 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
175 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
178 int n; //!< total nr of pull groups in this tpr file
179 int nUse; //!< nr of pull groups used
180 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
183 //! Parameters of WHAM
190 const char *fnTpr, *fnPullf, *fnGroupsel;
191 const char *fnPdo, *fnPullx; //!< file names of input
192 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
193 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
195 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
196 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
197 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
198 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
201 * \name Basic WHAM options
204 int bins; //!< nr of bins, min, max, and dz of profile
206 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
207 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
210 * \name Output control
213 gmx_bool bLog; //!< energy output (instead of probability) for profile
214 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
215 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
216 /*! \brief after wham, set prof to zero at this z-position.
217 * When bootstrapping, set zProf0 to a "stable" reference position.
220 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
222 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
223 gmx_bool bAuto; //!< determine min and max automatically but do not exit
225 gmx_bool verbose; //!< more noisy wham mode
226 int stepchange; //!< print maximum change in prof after how many interations
227 output_env_t oenv; //!< xvgr options
230 * \name Autocorrelation stuff
233 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
234 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
235 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
236 real acTrestart; //!< when computing ACT, time between restarting points
238 /* \brief Enforce the same weight for each umbella window, that is
239 * calculate with the same number of data points for
240 * each window. That can be reasonable, if the histograms
241 * have different length, but due to autocorrelation,
242 * a longer simulation should not have larger weightin wham.
248 * \name Bootstrapping stuff
251 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
253 /* \brief bootstrap method
255 * if == bsMethod_hist, consider complete histograms as independent
256 * data points and, hence, only mix complete histograms.
257 * if == bsMethod_BayesianHist, consider complete histograms
258 * as independent data points, but assign random weights
259 * to the histograms during the bootstrapping ("Bayesian bootstrap")
260 * In case of long correlations (e.g., inside a channel), these
261 * will yield a more realistic error.
262 * if == bsMethod_traj(Gauss), generate synthetic histograms
264 * histogram by generating an autocorrelated random sequence
265 * that is distributed according to the respective given
266 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
267 * (instead of from the umbrella histogram) to generate a new
272 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
275 /* \brief when mixing histograms, mix only histograms withing blocks
276 long the reaction coordinate xi. Avoids gaps along xi. */
277 int histBootStrapBlockLength;
279 int bsSeed; //!< random seed for bootstrapping
281 /* \brief Write cumulative distribution functions (CDFs) of histograms
282 and write the generated histograms for each bootstrap */
286 * \name tabulated umbrella potential stuff
290 double *tabX, *tabY, tabMin, tabMax, tabDz;
293 gmx_rng_t rng; //!< gromacs random number generator
296 //! Make an umbrella window (may contain several histograms)
297 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
299 t_UmbrellaWindow *win;
302 for (i = 0; i < nwin; i++)
304 win[i].Histo = win[i].cum = 0;
305 win[i].k = win[i].pos = win[i].z = 0;
306 win[i].N = win[i].Ntot = 0;
307 win[i].g = win[i].tau = win[i].tausmooth = 0;
311 win[i].aver = win[i].sigma = 0;
317 //! Delete an umbrella window (may contain several histograms)
318 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
321 for (i = 0; i < nwin; i++)
325 for (j = 0; j < win[i].nPull; j++)
327 sfree(win[i].Histo[j]);
332 for (j = 0; j < win[i].nPull; j++)
334 sfree(win[i].cum[j]);
339 for (j = 0; j < win[i].nPull; j++)
341 sfree(win[i].bContrib[j]);
353 sfree(win[i].tausmooth);
354 sfree(win[i].bContrib);
356 sfree(win[i].forceAv);
359 sfree(win[i].bsWeight);
365 * Read and setup tabulated umbrella potential
367 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
372 printf("Setting up tabulated potential from file %s\n", fn);
373 nl = read_xvg(fn, &y, &ny);
377 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
379 opt->tabMin = y[0][0];
380 opt->tabMax = y[0][nl-1];
381 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
384 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
385 "ascending z-direction", fn);
387 for (i = 0; i < nl-1; i++)
389 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
391 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
396 for (i = 0; i < nl; i++)
398 opt->tabX[i] = y[0][i];
399 opt->tabY[i] = y[1][i];
401 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
402 opt->tabMin, opt->tabMax, opt->tabDz);
405 //! Read the header of an PDO file (position, force const, nr of groups)
406 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
409 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
411 std::istringstream ist;
414 if (fgets(line, 2048, file) == NULL)
416 gmx_fatal(FARGS, "Error reading header from pdo file\n");
419 ist >> Buffer0 >> Buffer1 >> Buffer2;
420 if (strcmp(Buffer1, "UMBRELLA"))
422 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
423 "(Found in first line: `%s')\n",
424 Buffer1, "UMBRELLA", line);
426 if (strcmp(Buffer2, "3.0"))
428 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
432 if (fgets(line, 2048, file) == NULL)
434 gmx_fatal(FARGS, "Error reading header from pdo file\n");
437 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
438 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
440 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
441 if (header->nDim != 1)
443 gmx_fatal(FARGS, "Currently only supports one dimension");
447 if (fgets(line, 2048, file) == NULL)
449 gmx_fatal(FARGS, "Error reading header from pdo file\n");
452 ist >> Buffer0 >> Buffer1 >> header->nSkip;
455 if (fgets(line, 2048, file) == NULL)
457 gmx_fatal(FARGS, "Error reading header from pdo file\n");
460 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
463 if (fgets(line, 2048, file) == NULL)
465 gmx_fatal(FARGS, "Error reading header from pdo file\n");
468 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
472 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
476 for (i = 0; i < header->nPull; ++i)
478 if (fgets(line, 2048, file) == NULL)
480 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
484 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
485 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
489 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
490 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
494 if (fgets(line, 2048, file) == NULL)
496 gmx_fatal(FARGS, "Cannot read from file\n");
500 if (strcmp(Buffer3, "#####") != 0)
502 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
507 static char *fgets3(FILE *fp, char ptr[], int *len)
512 if (fgets(ptr, *len-1, fp) == NULL)
517 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
519 /* This line is longer than len characters, let's increase len! */
523 if (fgets(p-1, STRLEN, fp) == NULL)
529 if (ptr[slen-1] == '\n')
537 /*! \brief Read the data columns of and PDO file.
539 * TO DO: Get rid of the scanf function to avoid the clang warning.
540 * At the moment, this warning is avoided by hiding the format string
541 * the variable fmtlf.
543 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
544 int fileno, t_UmbrellaWindow * win,
545 t_UmbrellaOptions *opt,
546 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
548 int i, inttemp, bins, count, ntot;
549 real min, max, minfound = 1e20, maxfound = -1e20;
550 double temp, time, time0 = 0, dt;
552 t_UmbrellaWindow * window = 0;
553 gmx_bool timeok, dt_ok = 1;
554 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
555 int len = STRLEN, dstep = 1;
556 const int blocklen = 4096;
566 /* Need to alocate memory and set up structure */
567 window->nPull = header->nPull;
570 snew(window->Histo, window->nPull);
571 snew(window->z, window->nPull);
572 snew(window->k, window->nPull);
573 snew(window->pos, window->nPull);
574 snew(window->N, window->nPull);
575 snew(window->Ntot, window->nPull);
576 snew(window->g, window->nPull);
577 snew(window->bsWeight, window->nPull);
579 window->bContrib = 0;
581 if (opt->bCalcTauInt)
583 snew(window->ztime, window->nPull);
589 snew(lennow, window->nPull);
591 for (i = 0; i < window->nPull; ++i)
594 window->bsWeight[i] = 1.;
595 snew(window->Histo[i], bins);
596 window->k[i] = header->UmbCons[i][0];
597 window->pos[i] = header->UmbPos[i][0];
601 if (opt->bCalcTauInt)
603 window->ztime[i] = 0;
607 /* Done with setup */
613 min = max = bins = 0; /* Get rid of warnings */
618 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
622 if (ptr[0] == '#' || strlen(ptr) < 2)
627 /* Initiate format string */
629 strcat(fmtign, "%*s");
631 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
632 /* Round time to fs */
633 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
635 /* get time step of pdo file */
645 dstep = static_cast<int>(opt->dt/dt+0.5);
653 window->dt = dt*dstep;
658 dt_ok = ((count-1)%dstep == 0);
659 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
661 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
662 time,opt->tmin, opt->tmax, dt_ok,timeok); */
666 for (i = 0; i < header->nPull; ++i)
669 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
670 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
671 if (sscanf(ptr, fmt, &temp))
673 temp += header->UmbPos[i][0];
687 if (opt->bCalcTauInt)
689 /* save time series for autocorrelation analysis */
690 ntot = window->Ntot[i];
691 if (ntot >= lennow[i])
693 lennow[i] += blocklen;
694 srenew(window->ztime[i], lennow[i]);
696 window->ztime[i][ntot] = temp;
704 inttemp = static_cast<int> (temp);
711 else if (inttemp >= bins)
717 if (inttemp >= 0 && inttemp < bins)
719 window->Histo[i][inttemp] += 1.;
727 if (time > opt->tmax)
731 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
747 /*! \brief Set identical weights for all histograms
749 * Normally, the weight is given by the number data points in each
750 * histogram, together with the autocorrelation time. This can be overwritten
751 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
752 * an appropriate autocorrelation times instead of using this function.
754 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
756 int i, k, j, NEnforced;
759 NEnforced = window[0].Ntot[0];
760 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
761 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
762 /* enforce all histograms to have the same weight as the very first histogram */
764 for (j = 0; j < nWindows; ++j)
766 for (k = 0; k < window[j].nPull; ++k)
768 ratio = 1.0*NEnforced/window[j].Ntot[k];
769 for (i = 0; i < window[0].nBin; ++i)
771 window[j].Histo[k][i] *= ratio;
773 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
778 /*! \brief Simple linear interpolation between two given tabulated points
780 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
783 double pl, pu, dz, dp;
785 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
787 if (jl < 0 || ju >= opt->tabNbins)
789 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
790 "Provide an extended table.", dist, jl, ju);
794 dz = dist-opt->tabX[jl];
795 dp = (pu-pl)*dz/opt->tabDz;
801 * Check which bins substiantially contribute (accelerates WHAM)
803 * Don't worry, that routine does not mean we compute the PMF in limited precision.
804 * After rapid convergence (using only substiantal contributions), we always switch to
807 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
808 t_UmbrellaOptions *opt)
810 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
811 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
812 gmx_bool bAnyContrib;
813 static int bFirst = 1;
814 static double wham_contrib_lim;
818 for (i = 0; i < nWindows; ++i)
820 nGrptot += window[i].nPull;
822 wham_contrib_lim = opt->Tolerance/nGrptot;
825 ztot = opt->max-opt->min;
828 for (i = 0; i < nWindows; ++i)
830 if (!window[i].bContrib)
832 snew(window[i].bContrib, window[i].nPull);
834 for (j = 0; j < window[i].nPull; ++j)
836 if (!window[i].bContrib[j])
838 snew(window[i].bContrib[j], opt->bins);
841 for (k = 0; k < opt->bins; ++k)
843 temp = (1.0*k+0.5)*dz+min;
844 distance = temp - window[i].pos[j]; /* distance to umbrella center */
846 { /* in cyclic wham: */
847 if (distance > ztot_half) /* |distance| < ztot_half */
851 else if (distance < -ztot_half)
856 /* Note: there are two contributions to bin k in the wham equations:
857 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
858 ii) exp(- U/(8.314e-3*opt->Temperature))
859 where U is the umbrella potential
860 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
865 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
869 U = tabulated_pot(distance, opt); /* Use tabulated potential */
872 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
873 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
874 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
875 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
876 if (window[i].bContrib[j][k])
882 /* If this histo is far outside min and max all bContrib may be FALSE,
883 causing a floating point exception later on. To avoid that, switch
887 for (k = 0; k < opt->bins; ++k)
889 window[i].bContrib[j][k] = TRUE;
896 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
897 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
902 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
908 //! Compute the PMF (one of the two main WHAM routines)
909 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
910 t_UmbrellaOptions *opt, gmx_bool bExact)
913 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
914 double denom, U = 0, temp = 0, invg;
916 ztot = opt->max-opt->min;
919 for (i = 0; i < opt->bins; ++i)
922 for (j = 0; j < nWindows; ++j)
924 for (k = 0; k < window[j].nPull; ++k)
926 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
927 temp = (1.0*i+0.5)*dz+min;
928 num += invg*window[j].Histo[k][i];
930 if (!(bExact || window[j].bContrib[k][i]))
934 distance = temp - window[j].pos[k]; /* distance to umbrella center */
936 { /* in cyclic wham: */
937 if (distance > ztot_half) /* |distance| < ztot_half */
941 else if (distance < -ztot_half)
949 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
953 U = tabulated_pot(distance, opt); /* Use tabulated potential */
955 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
958 profile[i] = num/denom;
962 //! Compute the free energy offsets z (one of the two main WHAM routines)
963 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
964 t_UmbrellaOptions *opt, gmx_bool bExact)
967 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
968 double MAX = -1e20, total = 0;
970 ztot = opt->max-opt->min;
973 for (i = 0; i < nWindows; ++i)
975 for (j = 0; j < window[i].nPull; ++j)
978 for (k = 0; k < window[i].nBin; ++k)
980 if (!(bExact || window[i].bContrib[j][k]))
984 temp = (1.0*k+0.5)*dz+min;
985 distance = temp - window[i].pos[j]; /* distance to umbrella center */
987 { /* in cyclic wham: */
988 if (distance > ztot_half) /* |distance| < ztot_half */
992 else if (distance < -ztot_half)
1000 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1004 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1007 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1009 /* Avoid floating point exception if window is far outside min and max */
1012 total = -log(total);
1018 temp = fabs(total - window[i].z[j]);
1023 window[i].z[j] = total;
1029 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1030 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1032 int i, j, bins = opt->bins;
1033 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1036 if (min > 0. || max < 0.)
1038 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1039 opt->min, opt->max);
1044 for (i = 0; i < bins; i++)
1048 /* bin left of zsym */
1049 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1050 if (j >= 0 && (j+1) < bins)
1052 /* interpolate profile linearly between bins j and j+1 */
1053 z1 = min+(j+0.5)*dz;
1055 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1056 /* average between left and right */
1057 prof2[i] = 0.5*(profsym+profile[i]);
1061 prof2[i] = profile[i];
1065 memcpy(profile, prof2, bins*sizeof(double));
1069 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1070 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1073 double unit_factor = 1., R_MolarGasConst, diff;
1075 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1078 /* Not log? Nothing to do! */
1084 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1085 if (opt->unit == en_kT)
1089 else if (opt->unit == en_kJ)
1091 unit_factor = R_MolarGasConst*opt->Temperature;
1093 else if (opt->unit == en_kCal)
1095 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1099 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1102 for (i = 0; i < bins; i++)
1104 if (profile[i] > 0.0)
1106 profile[i] = -log(profile[i])*unit_factor;
1110 /* shift to zero at z=opt->zProf0 */
1111 if (!opt->bProf0Set)
1117 /* Get bin with shortest distance to opt->zProf0
1118 (-0.5 from bin position and +0.5 from rounding cancel) */
1119 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1124 else if (imin >= bins)
1128 diff = profile[imin];
1132 for (i = 0; i < bins; i++)
1138 //! Make an array of random integers (used for bootstrapping)
1139 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1141 int ipull, blockBase, nr, ipullRandom;
1143 if (blockLength == 0)
1145 blockLength = nPull;
1148 for (ipull = 0; ipull < nPull; ipull++)
1150 blockBase = (ipull/blockLength)*blockLength;
1152 { /* make sure nothing bad happens in the last block */
1153 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1154 ipullRandom = blockBase + nr;
1156 while (ipullRandom >= nPull);
1157 if (ipullRandom < 0 || ipullRandom >= nPull)
1159 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1160 "blockLength = %d, blockBase = %d\n",
1161 ipullRandom, nPull, nr, blockLength, blockBase);
1163 randomArray[ipull] = ipullRandom;
1165 /*for (ipull=0; ipull<nPull; ipull++)
1166 printf("%d ",randomArray[ipull]); printf("\n"); */
1169 /*! \brief Set pull group information of a synthetic histogram
1171 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1172 * but it is not required if we bootstrap complete histograms.
1174 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1175 t_UmbrellaWindow *thisWindow, int pullid)
1177 synthWindow->N [0] = thisWindow->N [pullid];
1178 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1179 synthWindow->pos [0] = thisWindow->pos [pullid];
1180 synthWindow->z [0] = thisWindow->z [pullid];
1181 synthWindow->k [0] = thisWindow->k [pullid];
1182 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1183 synthWindow->g [0] = thisWindow->g [pullid];
1184 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1187 /*! \brief Calculate cumulative distribution function of of all histograms.
1189 * This allow to create random number sequences
1190 * which are distributed according to the histograms. Required to generate
1191 * the "synthetic" histograms for the Bootstrap method
1193 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1194 t_UmbrellaOptions *opt, const char *fnhist)
1198 char *fn = 0, *buf = 0;
1201 if (opt->bs_verbose)
1203 snew(fn, strlen(fnhist)+10);
1204 snew(buf, strlen(fnhist)+10);
1205 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1206 fp = xvgropen(fn, "CDFs of umbrella windows", "z", "CDF", opt->oenv);
1210 for (i = 0; i < nWindows; i++)
1212 snew(window[i].cum, window[i].nPull);
1213 for (j = 0; j < window[i].nPull; j++)
1215 snew(window[i].cum[j], nbin+1);
1216 window[i].cum[j][0] = 0.;
1217 for (k = 1; k <= nbin; k++)
1219 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1222 /* normalize CDFs. Ensure cum[nbin]==1 */
1223 last = window[i].cum[j][nbin];
1224 for (k = 0; k <= nbin; k++)
1226 window[i].cum[j][k] /= last;
1231 printf("Cumulative distriubtion functions of all histograms created.\n");
1232 if (opt->bs_verbose)
1234 for (k = 0; k <= nbin; k++)
1236 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1237 for (i = 0; i < nWindows; i++)
1239 for (j = 0; j < window[i].nPull; j++)
1241 fprintf(fp, "%g\t", window[i].cum[j][k]);
1246 printf("Wrote cumulative distribution functions to %s\n", fn);
1254 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1256 * This is used to generate a random sequence distributed according to a histogram
1258 void searchCumulative(double xx[], int n, double x, int *j)
1280 else if (x == xx[n-1])
1290 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1291 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1292 int pullid, t_UmbrellaOptions *opt)
1294 int N, i, nbins, r_index, ibin;
1295 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1298 N = thisWindow->N[pullid];
1299 dt = thisWindow->dt;
1300 nbins = thisWindow->nBin;
1302 /* tau = autocorrelation time */
1303 if (opt->tauBootStrap > 0.0)
1305 tausteps = opt->tauBootStrap/dt;
1307 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1309 /* calc tausteps from g=1+2tausteps */
1310 g = thisWindow->g[pullid];
1316 "When generating hypothetical trajctories from given umbrella histograms,\n"
1317 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1318 "cannot be predicted. You have 3 options:\n"
1319 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1320 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1322 " with option -iiact for all umbrella windows.\n"
1323 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1324 " Use option (3) only if you are sure what you're doing, you may severely\n"
1325 " underestimate the error if a too small ACT is given.\n");
1326 gmx_fatal(FARGS, errstr);
1329 synthWindow->N [0] = N;
1330 synthWindow->pos [0] = thisWindow->pos[pullid];
1331 synthWindow->z [0] = thisWindow->z[pullid];
1332 synthWindow->k [0] = thisWindow->k[pullid];
1333 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1334 synthWindow->g [0] = thisWindow->g [pullid];
1335 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1337 for (i = 0; i < nbins; i++)
1339 synthWindow->Histo[0][i] = 0.;
1342 if (opt->bsMethod == bsMethod_trajGauss)
1344 sig = thisWindow->sigma [pullid];
1345 mu = thisWindow->aver [pullid];
1348 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1350 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1352 z = a*x + sqrt(1-a^2)*y
1353 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1354 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1356 C(t) = exp(-t/tau) with tau=-1/ln(a)
1358 Then, use error function to turn the Gaussian random variable into a uniformly
1359 distributed one in [0,1]. Eventually, use cumulative distribution function of
1360 histogram to get random variables distributed according to histogram.
1361 Note: The ACT of the flat distribution and of the generated histogram is not
1362 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1364 a = exp(-1.0/tausteps);
1366 invsqrt2 = 1./sqrt(2.0);
1368 /* init random sequence */
1369 x = gmx_rng_gaussian_table(opt->rng);
1371 if (opt->bsMethod == bsMethod_traj)
1373 /* bootstrap points from the umbrella histograms */
1374 for (i = 0; i < N; i++)
1376 y = gmx_rng_gaussian_table(opt->rng);
1378 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1379 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1381 r = 0.5*(1+gmx_erf(x*invsqrt2));
1382 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1383 synthWindow->Histo[0][r_index] += 1.;
1386 else if (opt->bsMethod == bsMethod_trajGauss)
1388 /* bootstrap points from a Gaussian with the same average and sigma
1389 as the respective umbrella histogram. The idea was, that -given
1390 limited sampling- the bootstrapped histograms are otherwise biased
1391 from the limited sampling of the US histos. However, bootstrapping from
1392 the Gaussian seems to yield a similar estimate. */
1396 y = gmx_rng_gaussian_table(opt->rng);
1399 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1404 while ( (ibin += nbins) < 0)
1409 else if (ibin >= nbins)
1411 while ( (ibin -= nbins) >= nbins)
1418 if (ibin >= 0 && ibin < nbins)
1420 synthWindow->Histo[0][ibin] += 1.;
1427 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1431 /*! \brief Write all histograms to a file
1433 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1434 * sets of bootstrapped histograms.
1436 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1437 int bs_index, t_UmbrellaOptions *opt)
1439 char *fn = 0, *buf = 0, title[256];
1445 snew(fn, strlen(fnhist)+10);
1446 snew(buf, strlen(fnhist)+1);
1447 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1448 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1452 fn = strdup(fnhist);
1453 strcpy(title, "Umbrella histograms");
1456 fp = xvgropen(fn, title, "z", "count", opt->oenv);
1459 /* Write histograms */
1460 for (l = 0; l < bins; ++l)
1462 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1463 for (i = 0; i < nWindows; ++i)
1465 for (j = 0; j < window[i].nPull; ++j)
1467 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1474 printf("Wrote %s\n", fn);
1482 //! Used for qsort to sort random numbers
1483 int func_wham_is_larger(const void *a, const void *b)
1502 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1503 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1510 /* generate ordered random numbers between 0 and nAllPull */
1511 for (i = 0; i < nAllPull-1; i++)
1513 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1515 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1516 r[nAllPull-1] = 1.0*nAllPull;
1518 synthwin[0].bsWeight[0] = r[0];
1519 for (i = 1; i < nAllPull; i++)
1521 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1524 /* avoid to have zero weight by adding a tiny value */
1525 for (i = 0; i < nAllPull; i++)
1527 if (synthwin[i].bsWeight[0] < 1e-5)
1529 synthwin[i].bsWeight[0] = 1e-5;
1536 //! The main bootstrapping routine
1537 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1538 char* ylabel, double *profile,
1539 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1541 t_UmbrellaWindow * synthWindow;
1542 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1543 int i, j, *randomArray = 0, winid, pullid, ib;
1544 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1546 gmx_bool bExact = FALSE;
1548 /* init random generator */
1549 if (opt->bsSeed == -1)
1551 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1555 opt->rng = gmx_rng_init(opt->bsSeed);
1558 snew(bsProfile, opt->bins);
1559 snew(bsProfiles_av, opt->bins);
1560 snew(bsProfiles_av2, opt->bins);
1562 /* Create array of all pull groups. Note that different windows
1563 may have different nr of pull groups
1564 First: Get total nr of pull groups */
1566 for (i = 0; i < nWindows; i++)
1568 nAllPull += window[i].nPull;
1570 snew(allPull_winId, nAllPull);
1571 snew(allPull_pullId, nAllPull);
1573 /* Setup one array of all pull groups */
1574 for (i = 0; i < nWindows; i++)
1576 for (j = 0; j < window[i].nPull; j++)
1578 allPull_winId[iAllPull] = i;
1579 allPull_pullId[iAllPull] = j;
1584 /* setup stuff for synthetic windows */
1585 snew(synthWindow, nAllPull);
1586 for (i = 0; i < nAllPull; i++)
1588 synthWindow[i].nPull = 1;
1589 synthWindow[i].nBin = opt->bins;
1590 snew(synthWindow[i].Histo, 1);
1591 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1593 snew(synthWindow[i].Histo[0], opt->bins);
1595 snew(synthWindow[i].N, 1);
1596 snew(synthWindow[i].pos, 1);
1597 snew(synthWindow[i].z, 1);
1598 snew(synthWindow[i].k, 1);
1599 snew(synthWindow[i].bContrib, 1);
1600 snew(synthWindow[i].g, 1);
1601 snew(synthWindow[i].bsWeight, 1);
1604 switch (opt->bsMethod)
1607 snew(randomArray, nAllPull);
1608 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1609 please_cite(stdout, "Hub2006");
1611 case bsMethod_BayesianHist:
1612 /* just copy all histogams into synthWindow array */
1613 for (i = 0; i < nAllPull; i++)
1615 winid = allPull_winId [i];
1616 pullid = allPull_pullId[i];
1617 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1621 case bsMethod_trajGauss:
1622 calc_cumulatives(window, nWindows, opt, fnhist);
1625 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1628 /* do bootstrapping */
1629 fp = xvgropen(fnprof, "Boot strap profiles", "z", ylabel, opt->oenv);
1630 for (ib = 0; ib < opt->nBootStrap; ib++)
1632 printf(" *******************************************\n"
1633 " ******** Start bootstrap nr %d ************\n"
1634 " *******************************************\n", ib+1);
1636 switch (opt->bsMethod)
1639 /* bootstrap complete histograms from given histograms */
1640 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1641 for (i = 0; i < nAllPull; i++)
1643 winid = allPull_winId [randomArray[i]];
1644 pullid = allPull_pullId[randomArray[i]];
1645 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1648 case bsMethod_BayesianHist:
1649 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1650 setRandomBsWeights(synthWindow, nAllPull, opt);
1653 case bsMethod_trajGauss:
1654 /* create new histos from given histos, that is generate new hypothetical
1656 for (i = 0; i < nAllPull; i++)
1658 winid = allPull_winId[i];
1659 pullid = allPull_pullId[i];
1660 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1665 /* write histos in case of verbose output */
1666 if (opt->bs_verbose)
1668 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1675 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1678 if ( (i%opt->stepUpdateContrib) == 0)
1680 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1682 if (maxchange < opt->Tolerance)
1686 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1688 printf("\t%4d) Maximum change %e\n", i, maxchange);
1690 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1693 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1694 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1698 prof_normalization_and_unit(bsProfile, opt);
1701 /* symmetrize profile around z=0 */
1704 symmetrizeProfile(bsProfile, opt);
1707 /* save stuff to get average and stddev */
1708 for (i = 0; i < opt->bins; i++)
1711 bsProfiles_av[i] += tmp;
1712 bsProfiles_av2[i] += tmp*tmp;
1713 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1719 /* write average and stddev */
1720 fp = xvgropen(fnres, "Average and stddev from bootstrapping", "z", ylabel, opt->oenv);
1721 fprintf(fp, "@TYPE xydy\n");
1722 for (i = 0; i < opt->bins; i++)
1724 bsProfiles_av [i] /= opt->nBootStrap;
1725 bsProfiles_av2[i] /= opt->nBootStrap;
1726 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1727 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1728 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1731 printf("Wrote boot strap result to %s\n", fnres);
1734 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1735 int whaminFileType(char *fn)
1739 if (strcmp(fn+len-3, "tpr") == 0)
1743 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1745 return whamin_pullxf;
1747 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1753 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1755 return whamin_unknown;
1758 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1759 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1760 t_UmbrellaOptions *opt)
1762 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1763 int nread, sizenow, i, block = 1;
1766 fp = ffopen(fn, "r");
1769 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1771 if (strlen(tmp) >= WHAM_MAXFILELEN)
1773 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1775 if (nread >= sizenow)
1778 srenew(filename, sizenow);
1779 for (i = sizenow-block; i < sizenow; i++)
1781 snew(filename[i], WHAM_MAXFILELEN);
1784 /* remove newline if there is one */
1785 if (tmp[strlen(tmp)-1] == '\n')
1787 tmp[strlen(tmp)-1] = '\0';
1789 strcpy(filename[nread], tmp);
1792 printf("Found file %s in %s\n", filename[nread], fn);
1796 *filenamesRet = filename;
1800 //! Open a file or a pipe to a gzipped file
1801 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1803 char Buffer[1024], gunzip[1024], *Path = 0;
1805 static gmx_bool bFirst = 1;
1807 /* gzipped pdo file? */
1808 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1810 /* search gunzip executable */
1811 if (!(Path = getenv("GMX_PATH_GZIP")))
1813 if (gmx_fexist("/bin/gunzip"))
1815 sprintf(gunzip, "%s", "/bin/gunzip");
1817 else if (gmx_fexist("/usr/bin/gunzip"))
1819 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1823 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1824 "You may want to define the path to gunzip "
1825 "with the environment variable GMX_PATH_GZIP.", gunzip);
1830 sprintf(gunzip, "%s/gunzip", Path);
1831 if (!gmx_fexist(gunzip))
1833 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1834 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1839 printf("Using gunzig executable %s\n", gunzip);
1842 if (!gmx_fexist(fn))
1844 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1846 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1849 printf("Executing command '%s'\n", Buffer);
1852 if ((pipe = popen(Buffer, "r")) == NULL)
1854 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1857 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1863 pipe = ffopen(fn, "r");
1870 //! Close file or pipe
1871 void pdo_close_file(FILE *fp)
1880 //! Reading all pdo files
1881 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1882 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1885 real mintmp, maxtmp, done = 0.;
1888 /* char Buffer0[1000]; */
1892 gmx_fatal(FARGS, "No files found. Hick.");
1895 /* if min and max are not given, get min and max from the input files */
1898 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1901 for (i = 0; i < nfiles; ++i)
1903 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1904 /*fgets(Buffer0,999,file);
1905 fprintf(stderr,"First line '%s'\n",Buffer0); */
1906 done = 100.0*(i+1)/nfiles;
1907 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1912 read_pdo_header(file, header, opt);
1913 /* here only determine min and max of this window */
1914 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1915 if (maxtmp > opt->max)
1919 if (mintmp < opt->min)
1925 pdo_close_file(file);
1933 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1934 if (opt->bBoundsOnly)
1936 printf("Found option -boundsonly, now exiting.\n");
1940 /* store stepsize in profile */
1941 opt->dz = (opt->max-opt->min)/opt->bins;
1943 /* Having min and max, we read in all files */
1944 /* Loop over all files */
1945 for (i = 0; i < nfiles; ++i)
1947 done = 100.0*(i+1)/nfiles;
1948 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1953 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1954 read_pdo_header(file, header, opt);
1955 /* load data into window */
1956 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1957 if ((window+i)->Ntot[0] == 0)
1959 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1963 pdo_close_file(file);
1971 for (i = 0; i < nfiles; ++i)
1978 //! translate 0/1 to N/Y to write pull dimensions
1979 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1981 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1982 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
1987 static int first = 1;
1989 /* printf("Reading %s \n",fn); */
1990 read_tpx_state(fn, &ir, &state, NULL, NULL);
1992 if (ir.ePull != epullUMBRELLA)
1994 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1995 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
1998 /* nr of pull groups */
1999 ngrp = ir.pull->ngrp;
2002 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull groups\n", ngrp);
2005 header->npullgrps = ir.pull->ngrp;
2006 header->pull_geometry = ir.pull->eGeom;
2007 copy_ivec(ir.pull->dim, header->pull_dim);
2008 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2009 if (header->pull_geometry == epullgPOS && header->pull_ndim > 1)
2011 gmx_fatal(FARGS, "Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
2012 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
2013 "If you have some special umbrella setup you may want to write your own pdo files\n"
2014 "and feed them into g_wham. Check g_wham -h !\n", header->pull_ndim);
2016 snew(header->k, ngrp);
2017 snew(header->init_dist, ngrp);
2018 snew(header->umbInitDist, ngrp);
2020 /* only z-direction with epullgCYL? */
2021 if (header->pull_geometry == epullgCYL)
2023 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2025 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2026 "However, found dimensions [%s %s %s]\n",
2027 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2028 int2YN(header->pull_dim[ZZ]));
2032 for (i = 0; i < ngrp; i++)
2034 header->k[i] = ir.pull->grp[i+1].k;
2035 if (header->k[i] == 0.0)
2037 gmx_fatal(FARGS, "Pull group %d has force constant of of 0.0 in %s.\n"
2038 "That doesn't seem to be an Umbrella tpr.\n",
2041 copy_rvec(ir.pull->grp[i+1].init, header->init_dist[i]);
2043 /* initial distance to reference */
2044 switch (header->pull_geometry)
2047 for (d = 0; d < DIM; d++)
2049 if (header->pull_dim[d])
2051 header->umbInitDist[i] = header->init_dist[i][d];
2056 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
2060 header->umbInitDist[i] = header->init_dist[i][0];
2063 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2067 if (opt->verbose || first)
2069 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
2070 fn, header->npullgrps, epullg_names[header->pull_geometry],
2071 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2073 for (i = 0; i < ngrp; i++)
2075 printf("\tgrp %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2078 if (!opt->verbose && first)
2080 printf("\tUse option -v to see this output for all input tpr files\n");
2086 //! 2-norm in a ndim-dimensional space
2087 double dist_ndim(double **dx, int ndim, int line)
2091 for (i = 0; i < ndim; i++)
2093 r2 += sqr(dx[i][line]);
2098 //! Read pullx.xvg or pullf.xvg
2099 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2100 t_UmbrellaWindow * window,
2101 t_UmbrellaOptions *opt,
2102 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2103 t_groupselection *groupsel)
2105 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2106 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerGrp, nColRefOnce, nColRefEachGrp, nColExpect, ntot;
2107 real min, max, minfound = 1e20, maxfound = -1e20;
2108 gmx_bool dt_ok, timeok, bHaveForce;
2109 const char *quantity;
2110 const int blocklen = 4096;
2112 static gmx_bool bFirst = TRUE;
2115 in force output pullf.xvg:
2116 No reference, one column per pull group
2117 in position output pullx.xvg (not cylinder)
2118 ndim reference, ndim columns per pull group
2119 in position output pullx.xvg (in geometry cylinder):
2120 ndim*2 columns per pull group (ndim for ref, ndim for group)
2123 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
2124 quantity = opt->bPullx ? "position" : "force";
2128 if (header->pull_geometry == epullgCYL)
2130 /* Geometry cylinder -> reference group before each pull group */
2131 nColRefEachGrp = header->pull_ndim;
2136 /* Geometry NOT cylinder -> reference group only once after time column */
2138 nColRefOnce = header->pull_ndim;
2141 else /* read forces, no reference groups */
2147 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
2148 bHaveForce = opt->bPullf;
2150 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2151 That avoids the somewhat tedious extraction of the right columns from the pullx files
2152 to compute the distances projection on the vector. Sorry for the laziness. */
2153 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2156 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2157 "reading \n(option -if) is supported at present, "
2158 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2159 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2160 epullg_names[header->pull_geometry]);
2163 nt = read_xvg(fn, &y, &ny);
2165 /* Check consistency */
2168 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2172 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2173 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2175 printf("Expecting these columns in pull file:\n"
2176 "\t%d reference columns for all pull groups together\n"
2177 "\t%d reference columns for each individual pull group\n"
2178 "\t%d data columns for each pull group\n", nColRefOnce, nColRefEachGrp, nColPerGrp);
2179 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullgrps, nColExpect);
2182 if (ny != nColExpect)
2184 gmx_fatal(FARGS, "Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n"
2185 "\nMaybe you confused options -ix and -if ?\n",
2186 header->npullgrps, fntpr, ny-1, fn, nColExpect-1);
2191 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerGrp, quantity, fn);
2201 window->dt = y[0][1]-y[0][0];
2203 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2205 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2208 /* Need to alocate memory and set up structure */
2212 /* Use only groups selected with option -is file */
2213 if (header->npullgrps != groupsel->n)
2215 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2216 header->npullgrps, groupsel->n);
2218 window->nPull = groupsel->nUse;
2222 window->nPull = header->npullgrps;
2225 window->nBin = bins;
2226 snew(window->Histo, window->nPull);
2227 snew(window->z, window->nPull);
2228 snew(window->k, window->nPull);
2229 snew(window->pos, window->nPull);
2230 snew(window->N, window->nPull);
2231 snew(window->Ntot, window->nPull);
2232 snew(window->g, window->nPull);
2233 snew(window->bsWeight, window->nPull);
2234 window->bContrib = 0;
2236 if (opt->bCalcTauInt)
2238 snew(window->ztime, window->nPull);
2242 window->ztime = NULL;
2244 snew(lennow, window->nPull);
2246 for (g = 0; g < window->nPull; ++g)
2249 window->bsWeight[g] = 1.;
2250 snew(window->Histo[g], bins);
2252 window->Ntot[g] = 0;
2254 if (opt->bCalcTauInt)
2256 window->ztime[g] = NULL;
2260 /* Copying umbrella center and force const is more involved since not
2261 all pull groups from header (tpr file) may be used in window variable */
2262 for (g = 0, gUsed = 0; g < header->npullgrps; ++g)
2264 if (groupsel && (groupsel->bUse[g] == FALSE))
2268 window->k[gUsed] = header->k[g];
2269 window->pos[gUsed] = header->umbInitDist[g];
2274 { /* only determine min and max */
2277 min = max = bins = 0; /* Get rid of warnings */
2281 for (i = 0; i < nt; i++)
2283 /* Do you want that time frame? */
2284 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2286 /* get time step of pdo file and get dstep from opt->dt */
2296 dstep = static_cast<int>(opt->dt/dt+0.5);
2304 window->dt = dt*dstep;
2308 dt_ok = (i%dstep == 0);
2309 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2311 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2312 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2316 /* Note: if groupsel == NULL:
2317 * all groups in pullf/x file are stored in this window, and gUsed == g
2318 * if groupsel != NULL:
2319 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2322 for (g = 0; g < header->npullgrps; ++g)
2324 /* was this group selected for application in WHAM? */
2325 if (groupsel && (groupsel->bUse[g] == FALSE))
2334 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
2336 pos = -force/header->k[g] + header->umbInitDist[g];
2340 switch (header->pull_geometry)
2343 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
2344 Distance to reference: */
2345 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
2346 pos = dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp, header->pull_ndim, i);
2350 Time ref[ndim] group1[ndim] group2[ndim] ... */
2353 Time ref1[ndim] group1[ndim] ref2[ndim] group2[ndim] ... */
2355 /* * with geometry==position, we have the reference once (nColRefOnce==ndim), but
2356 no extra reference group columns before each group (nColRefEachGrp==0)
2358 * with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
2359 but ndim ref group colums before every group (nColRefEachGrp==ndim)
2360 Distance to reference: */
2361 pos = y[1 + nColRefOnce + nColRefEachGrp + g*(nColRefEachGrp+nColPerGrp)][i];
2364 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2368 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2382 if (gUsed >= window->nPull)
2384 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2385 gUsed, window->nPull);
2388 if (opt->bCalcTauInt && !bGetMinMax)
2390 /* save time series for autocorrelation analysis */
2391 ntot = window->Ntot[gUsed];
2392 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2393 if (ntot >= lennow[gUsed])
2395 lennow[gUsed] += blocklen;
2396 srenew(window->ztime[gUsed], lennow[gUsed]);
2398 window->ztime[gUsed][ntot] = pos;
2401 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2406 while ( (ibin += bins) < 0)
2411 else if (ibin >= bins)
2413 while ( (ibin -= bins) >= bins)
2419 if (ibin >= 0 && ibin < bins)
2421 window->Histo[gUsed][ibin] += 1.;
2424 window->Ntot[gUsed]++;
2428 else if (t > opt->tmax)
2432 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2444 for (i = 0; i < ny; i++)
2450 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2451 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2452 t_UmbrellaHeader* header,
2453 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2456 real mintmp, maxtmp;
2458 printf("Reading %d tpr and pullf files\n", nfiles/2);
2460 /* min and max not given? */
2463 printf("Automatic determination of boundaries...\n");
2466 for (i = 0; i < nfiles; i++)
2468 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2470 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2472 read_tpr_header(fnTprs[i], header, opt);
2473 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2475 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2477 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2478 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2479 if (maxtmp > opt->max)
2483 if (mintmp < opt->min)
2488 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2489 if (opt->bBoundsOnly)
2491 printf("Found option -boundsonly, now exiting.\n");
2495 /* store stepsize in profile */
2496 opt->dz = (opt->max-opt->min)/opt->bins;
2498 for (i = 0; i < nfiles; i++)
2500 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2502 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2504 read_tpr_header(fnTprs[i], header, opt);
2505 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2507 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2509 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2510 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2511 if (window[i].Ntot[0] == 0)
2513 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2517 for (i = 0; i < nfiles; i++)
2526 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2528 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2529 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2531 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2533 int nlines, ny, i, ig;
2536 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2537 nlines = read_xvg(fn, &iact, &ny);
2538 if (nlines != nwins)
2540 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2543 for (i = 0; i < nlines; i++)
2545 if (window[i].nPull != ny)
2547 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2548 "number of pull groups is different in different simulations. That is not\n"
2549 "supported yet. Sorry.\n");
2551 for (ig = 0; ig < window[i].nPull; ig++)
2553 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2554 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2556 if (iact[ig][i] <= 0.0)
2558 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2565 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2568 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2569 * that -in case of limited sampling- the ACT may be severely underestimated.
2570 * Note: the g=1+2tau are overwritten.
2571 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2574 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2577 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2579 /* only evaluate within +- 3sigma of the Gausian */
2580 siglim = 3.0*opt->sigSmoothIact;
2581 siglim2 = dsqr(siglim);
2582 /* pre-factor of Gaussian */
2583 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2584 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2586 for (i = 0; i < nwins; i++)
2588 snew(window[i].tausmooth, window[i].nPull);
2589 for (ig = 0; ig < window[i].nPull; ig++)
2593 pos = window[i].pos[ig];
2594 for (j = 0; j < nwins; j++)
2596 for (jg = 0; jg < window[j].nPull; jg++)
2598 dpos2 = dsqr(window[j].pos[jg]-pos);
2599 if (dpos2 < siglim2)
2601 w = gaufact*exp(-dpos2*invtwosig2);
2603 tausm += w*window[j].tau[jg];
2604 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2605 w,dpos2,pos,gaufact,invtwosig2); */
2610 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2612 window[i].tausmooth[ig] = tausm;
2616 window[i].tausmooth[ig] = window[i].tau[ig];
2618 window[i].g[ig] = 1+2*tausm/window[i].dt;
2623 //! Stop integrating autoccorelation function when ACF drops under this value
2624 #define WHAM_AC_ZERO_LIMIT 0.05
2626 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2628 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2629 t_UmbrellaOptions *opt, const char *fn)
2631 int i, ig, ncorr, ntot, j, k, *count, restart;
2632 real *corr, c0, dt, tmp;
2633 real *ztime, av, tausteps;
2634 FILE *fp, *fpcorr = 0;
2638 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2639 "time [ps]", "autocorrelation function", opt->oenv);
2643 for (i = 0; i < nwins; i++)
2645 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2647 ntot = window[i].Ntot[0];
2649 /* using half the maximum time as length of autocorrelation function */
2653 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2654 " points. Provide more pull data!", ntot);
2657 /* snew(corrSq,ncorr); */
2660 snew(window[i].tau, window[i].nPull);
2661 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2667 for (ig = 0; ig < window[i].nPull; ig++)
2669 if (ntot != window[i].Ntot[ig])
2671 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2672 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2674 ztime = window[i].ztime[ig];
2676 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2677 for (j = 0, av = 0; (j < ntot); j++)
2682 for (k = 0; (k < ncorr); k++)
2687 for (j = 0; (j < ntot); j += restart)
2689 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2691 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2693 /* corrSq[k] += tmp*tmp; */
2697 /* divide by nr of frames for each time displacement */
2698 for (k = 0; (k < ncorr); k++)
2700 /* count probably = (ncorr-k+(restart-1))/restart; */
2701 corr[k] = corr[k]/count[k];
2702 /* variance of autocorrelation function */
2703 /* corrSq[k]=corrSq[k]/count[k]; */
2705 /* normalize such that corr[0] == 0 */
2707 for (k = 0; (k < ncorr); k++)
2710 /* corrSq[k]*=c0*c0; */
2713 /* write ACFs in verbose mode */
2716 for (k = 0; (k < ncorr); k++)
2718 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2720 fprintf(fpcorr, "&\n");
2723 /* esimate integrated correlation time, fitting is too unstable */
2724 tausteps = 0.5*corr[0];
2725 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2726 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2728 tausteps += corr[j];
2731 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2732 Kumar et al, eq. 28 ff. */
2733 window[i].tau[ig] = tausteps*dt;
2734 window[i].g[ig] = 1+2*tausteps;
2735 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2746 /* plot IACT along reaction coordinate */
2747 fp = xvgropen(fn, "Integrated autocorrelation times", "z", "IACT [ps]", opt->oenv);
2748 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2749 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2750 for (i = 0; i < nwins; i++)
2752 fprintf(fp, "# %3d ", i);
2753 for (ig = 0; ig < window[i].nPull; ig++)
2755 fprintf(fp, " %11g", window[i].tau[ig]);
2759 for (i = 0; i < nwins; i++)
2761 for (ig = 0; ig < window[i].nPull; ig++)
2763 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2766 if (opt->sigSmoothIact > 0.0)
2768 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2769 opt->sigSmoothIact);
2770 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2771 smoothIact(window, nwins, opt);
2772 fprintf(fp, "&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2773 fprintf(fp, "@ s1 symbol color 2\n");
2774 for (i = 0; i < nwins; i++)
2776 for (ig = 0; ig < window[i].nPull; ig++)
2778 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2783 printf("Wrote %s\n", fn);
2787 * compute average and sigma of each umbrella histogram
2789 void averageSigma(t_UmbrellaWindow *window, int nwins)
2792 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2794 for (i = 0; i < nwins; i++)
2796 snew(window[i].aver, window[i].nPull);
2797 snew(window[i].sigma, window[i].nPull);
2799 ntot = window[i].Ntot[0];
2800 for (ig = 0; ig < window[i].nPull; ig++)
2802 ztime = window[i].ztime[ig];
2803 for (k = 0, av = 0.; k < ntot; k++)
2808 for (k = 0, sum2 = 0.; k < ntot; k++)
2813 sig = sqrt(sum2/ntot);
2814 window[i].aver[ig] = av;
2816 /* Note: This estimate for sigma is biased from the limited sampling.
2817 Correct sigma by n/(n-1) where n = number of independent
2818 samples. Only possible if IACT is known.
2822 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2823 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2827 window[i].sigma[ig] = sig;
2829 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2836 * Use histograms to compute average force on pull group.
2838 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2840 int i, j, bins = opt->bins, k;
2841 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2844 dz = (max-min)/bins;
2845 ztot = opt->max-min;
2848 /* Compute average displacement from histograms */
2849 for (j = 0; j < nWindows; ++j)
2851 snew(window[j].forceAv, window[j].nPull);
2852 for (k = 0; k < window[j].nPull; ++k)
2856 for (i = 0; i < opt->bins; ++i)
2858 temp = (1.0*i+0.5)*dz+min;
2859 distance = temp - window[j].pos[k];
2861 { /* in cyclic wham: */
2862 if (distance > ztot_half) /* |distance| < ztot_half */
2866 else if (distance < -ztot_half)
2871 w = window[j].Histo[k][i]/window[j].g[k];
2872 displAv += w*distance;
2874 /* Are we near min or max? We are getting wrong forces from the histgrams since
2875 the histograms are zero outside [min,max). Therefore, assume that the position
2876 on the other side of the histomgram center is equally likely. */
2879 posmirrored = window[j].pos[k]-distance;
2880 if (posmirrored >= max || posmirrored < min)
2882 displAv += -w*distance;
2889 /* average force from average displacement */
2890 window[j].forceAv[k] = displAv*window[j].k[k];
2891 /* sigma from average square displacement */
2892 /* window[j].sigma [k] = sqrt(displAv2); */
2893 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2899 * Check if the complete reaction coordinate is covered by the histograms
2901 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2902 t_UmbrellaOptions *opt)
2904 int i, ig, j, bins = opt->bins, bBoundary;
2905 real avcount = 0, z, relcount, *count;
2906 snew(count, opt->bins);
2908 for (j = 0; j < opt->bins; ++j)
2910 for (i = 0; i < nwins; i++)
2912 for (ig = 0; ig < window[i].nPull; ig++)
2914 count[j] += window[i].Histo[ig][j];
2917 avcount += 1.0*count[j];
2920 for (j = 0; j < bins; ++j)
2922 relcount = count[j]/avcount;
2923 z = (j+0.5)*opt->dz+opt->min;
2924 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2925 /* check for bins with no data */
2928 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2929 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2931 /* and check for poor sampling */
2932 else if (relcount < 0.005 && !bBoundary)
2934 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2940 /*! \brief Compute initial potential by integrating the average force
2942 * This speeds up the convergence by roughly a factor of 2
2944 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2946 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2947 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2950 dz = (opt->max-min)/bins;
2952 printf("Getting initial potential by integration.\n");
2954 /* Compute average displacement from histograms */
2955 computeAverageForce(window, nWindows, opt);
2957 /* Get force for each bin from all histograms in this bin, or, alternatively,
2958 if no histograms are inside this bin, from the closest histogram */
2961 for (j = 0; j < opt->bins; ++j)
2963 pos = (1.0*j+0.5)*dz+min;
2967 groupmin = winmin = 0;
2968 for (i = 0; i < nWindows; i++)
2970 for (ig = 0; ig < window[i].nPull; ig++)
2972 hispos = window[i].pos[ig];
2973 dist = fabs(hispos-pos);
2974 /* average force within bin */
2978 fAv += window[i].forceAv[ig];
2980 /* at the same time, rememer closest histogram */
2989 /* if no histogram found in this bin, use closest histogram */
2996 fAv = window[winmin].forceAv[groupmin];
3000 for (j = 1; j < opt->bins; ++j)
3002 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3005 /* cyclic wham: linearly correct possible offset */
3008 diff = (pot[bins-1]-pot[0])/(bins-1);
3009 for (j = 1; j < opt->bins; ++j)
3016 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", "z", "PMF [kJ/mol]", opt->oenv);
3017 for (j = 0; j < opt->bins; ++j)
3019 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3022 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3025 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3026 energy offsets which are usually determined by wham
3027 First: turn pot into probabilities:
3029 for (j = 0; j < opt->bins; ++j)
3031 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3033 calc_z(pot, window, nWindows, opt, TRUE);
3039 //! Count number of words in an line
3040 static int wordcount(char *ptr)
3045 if (strlen(ptr) == 0)
3049 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3051 for (i = 0; (ptr[i] != '\0'); i++)
3053 is[cur] = isspace(ptr[i]);
3054 if ((i > 0) && (is[cur] && !is[1-cur]))
3063 /*! \brief Read input file for pull group selection (option -is)
3065 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3067 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3070 int i, iline, n, len = STRLEN, temp;
3071 char *ptr = 0, *tmpbuf = 0;
3072 char fmt[1024], fmtign[1024];
3073 int block = 1, sizenow;
3075 fp = ffopen(opt->fnGroupsel, "r");
3076 opt->groupsel = NULL;
3081 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3086 if (iline >= sizenow)
3089 srenew(opt->groupsel, sizenow);
3091 opt->groupsel[iline].n = n;
3092 opt->groupsel[iline].nUse = 0;
3093 snew(opt->groupsel[iline].bUse, n);
3096 for (i = 0; i < n; i++)
3098 strcpy(fmt, fmtign);
3100 if (sscanf(ptr, fmt, &temp))
3102 opt->groupsel[iline].bUse[i] = (temp > 0);
3103 if (opt->groupsel[iline].bUse[i])
3105 opt->groupsel[iline].nUse++;
3108 strcat(fmtign, "%*s");
3112 opt->nGroupsel = iline;
3113 if (nTpr != opt->nGroupsel)
3115 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3119 printf("\nUse only these pull groups:\n");
3120 for (iline = 0; iline < nTpr; iline++)
3122 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3123 for (i = 0; i < opt->groupsel[iline].n; i++)
3125 if (opt->groupsel[iline].bUse[i])
3138 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3140 /*! Number of elements in fnm (used for command line parsing) */
3141 #define NFILE asize(fnm)
3143 //! The main g_wham routine
3144 int gmx_wham(int argc, char *argv[])
3146 const char *desc[] = {
3147 "This is an analysis program that implements the Weighted",
3148 "Histogram Analysis Method (WHAM). It is intended to analyze",
3149 "output files generated by umbrella sampling simulations to ",
3150 "compute a potential of mean force (PMF). [PAR] ",
3151 "At present, three input modes are supported.[BR]",
3152 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3153 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3154 " AND, with option [TT]-ix[tt], a file which contains file names of",
3155 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3156 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3157 " first pullx, etc.[BR]",
3158 "[TT]*[tt] Same as the previous input mode, except that the the user",
3159 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3160 " From the pull force the position in the umbrella potential is",
3161 " computed. This does not work with tabulated umbrella potentials.[BR]"
3162 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3163 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3164 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3165 " feed them with the [TT]-ip[tt] option into to [TT]g_wham[tt]. The [TT].pdo[tt] file header",
3166 " must be similar to the following:[PAR]",
3167 "[TT]# UMBRELLA 3.0[BR]",
3168 "# Component selection: 0 0 1[BR]",
3170 "# Ref. Group 'TestAtom'[BR]",
3171 "# Nr. of pull groups 2[BR]",
3172 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3173 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3175 "The number of pull groups, umbrella positions, force constants, and names ",
3176 "may (of course) differ. Following the header, a time column and ",
3177 "a data column for each pull group follows (i.e. the displacement",
3178 "with respect to the umbrella center). Up to four pull groups are possible ",
3179 "per [TT].pdo[tt] file at present.[PAR]",
3180 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3181 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3182 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3183 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3184 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3185 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3186 "used, groupsel.dat looks like this:[BR]",
3190 "By default, the output files are[BR]",
3191 " [TT]-o[tt] PMF output file[BR]",
3192 " [TT]-hist[tt] Histograms output file[BR]",
3193 "Always check whether the histograms sufficiently overlap.[PAR]",
3194 "The umbrella potential is assumed to be harmonic and the force constants are ",
3195 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3196 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3197 "WHAM OPTIONS[BR]------------[BR]",
3198 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3199 " [TT]-temp[tt] Temperature in the simulations[BR]",
3200 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3201 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3202 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3203 "The data points that are used to compute the profile",
3204 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3205 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3206 "umbrella window.[PAR]",
3207 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3208 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3209 "With energy output, the energy in the first bin is defined to be zero. ",
3210 "If you want the free energy at a different ",
3211 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3212 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3213 "without osmotic gradient), the option [TT]-cycl[tt] is useful. [TT]g_wham[tt] will make use of the ",
3214 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3215 "reaction coordinate will assumed be be neighbors.[PAR]",
3216 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3217 "which may be useful for, e.g. membranes.[PAR]",
3218 "AUTOCORRELATIONS[BR]----------------[BR]",
3219 "With [TT]-ac[tt], [TT]g_wham[tt] estimates the integrated autocorrelation ",
3220 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3221 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3222 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3223 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3224 "Because the IACTs can be severely underestimated in case of limited ",
3225 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3226 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3227 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3228 "integration of the ACFs while the ACFs are larger 0.05.",
3229 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3230 "less robust) method such as fitting to a double exponential, you can ",
3231 "compute the IACTs with [TT]g_analyze[tt] and provide them to [TT]g_wham[tt] with the file ",
3232 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3233 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3234 "ERROR ANALYSIS[BR]--------------[BR]",
3235 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3236 "otherwise the statistical error may be substantially underestimated. ",
3237 "More background and examples for the bootstrap technique can be found in ",
3238 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3239 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3240 "Four bootstrapping methods are supported and ",
3241 "selected with [TT]-bs-method[tt].[BR]",
3242 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3243 "data points, and the bootstrap is carried out by assigning random weights to the ",
3244 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3245 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3246 "statistical error is underestimated.[BR]",
3247 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3248 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3249 "(allowing duplication, i.e. sampling with replacement).",
3250 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3251 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3252 "divided into blocks and only histograms within each block are mixed. Note that ",
3253 "the histograms within each block must be representative for all possible histograms, ",
3254 "otherwise the statistical error is underestimated.[BR]",
3255 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3256 "such that the generated data points are distributed according the given histograms ",
3257 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3258 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3259 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3260 "Note that this method may severely underestimate the error in case of limited sampling, ",
3261 "that is if individual histograms do not represent the complete phase space at ",
3262 "the respective positions.[BR]",
3263 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3264 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3265 "and width of the umbrella histograms. That method yields similar error estimates ",
3266 "like method [TT]traj[tt].[PAR]"
3267 "Bootstrapping output:[BR]",
3268 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3269 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3270 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3271 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3275 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3276 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3277 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3279 static t_UmbrellaOptions opt;
3282 { "-min", FALSE, etREAL, {&opt.min},
3283 "Minimum coordinate in profile"},
3284 { "-max", FALSE, etREAL, {&opt.max},
3285 "Maximum coordinate in profile"},
3286 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3287 "Determine min and max automatically"},
3288 { "-bins", FALSE, etINT, {&opt.bins},
3289 "Number of bins in profile"},
3290 { "-temp", FALSE, etREAL, {&opt.Temperature},
3292 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3294 { "-v", FALSE, etBOOL, {&opt.verbose},
3296 { "-b", FALSE, etREAL, {&opt.tmin},
3297 "First time to analyse (ps)"},
3298 { "-e", FALSE, etREAL, {&opt.tmax},
3299 "Last time to analyse (ps)"},
3300 { "-dt", FALSE, etREAL, {&opt.dt},
3301 "Analyse only every dt ps"},
3302 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3303 "Write histograms and exit"},
3304 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3305 "Determine min and max and exit (with [TT]-auto[tt])"},
3306 { "-log", FALSE, etBOOL, {&opt.bLog},
3307 "Calculate the log of the profile before printing"},
3308 { "-unit", FALSE, etENUM, {en_unit},
3309 "Energy unit in case of log output" },
3310 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3311 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3312 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3313 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3314 { "-sym", FALSE, etBOOL, {&opt.bSym},
3315 "Symmetrize profile around z=0"},
3316 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3317 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3318 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3319 "Calculate integrated autocorrelation times and use in wham"},
3320 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3321 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3322 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3323 "When computing autocorrelation functions, restart computing every .. (ps)"},
3324 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3325 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3326 "during smoothing"},
3327 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3328 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3329 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3330 "Bootstrap method" },
3331 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3332 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3333 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3334 "Seed for bootstrapping. (-1 = use time)"},
3335 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3336 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3337 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3338 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3339 { "-stepout", FALSE, etINT, {&opt.stepchange},
3340 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3341 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3342 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3346 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3347 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3348 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3349 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3350 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3351 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3352 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3353 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3354 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3355 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3356 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3357 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3360 int i, j, l, nfiles, nwins, nfiles2;
3361 t_UmbrellaHeader header;
3362 t_UmbrellaWindow * window = NULL;
3363 double *profile, maxchange = 1e20;
3364 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3365 char **fninTpr, **fninPull, **fninPdo;
3367 FILE *histout, *profout;
3368 char ylabel[256], title[256];
3371 opt.verbose = FALSE;
3372 opt.bHistOnly = FALSE;
3382 /* bootstrapping stuff */
3384 opt.bsMethod = bsMethod_hist;
3385 opt.tauBootStrap = 0.0;
3387 opt.histBootStrapBlockLength = 8;
3388 opt.bs_verbose = FALSE;
3393 opt.Temperature = 298;
3394 opt.Tolerance = 1e-6;
3395 opt.bBoundsOnly = FALSE;
3397 opt.bCalcTauInt = FALSE;
3398 opt.sigSmoothIact = 0.0;
3399 opt.bAllowReduceIact = TRUE;
3400 opt.bInitPotByIntegration = TRUE;
3401 opt.acTrestart = 1.0;
3402 opt.stepchange = 100;
3403 opt.stepUpdateContrib = 100;
3405 if (!parse_common_args(&argc, argv, PCA_BE_NICE,
3406 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3411 opt.unit = nenum(en_unit);
3412 opt.bsMethod = nenum(en_bsMethod);
3414 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3416 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3417 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3418 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3419 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3420 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3421 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3422 if (opt.bTab && opt.bPullf)
3424 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3425 "Provide pullx.xvg or pdo files!");
3428 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3430 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3432 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3434 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3435 "\n\n Check g_wham -h !");
3438 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3439 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3440 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3441 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3442 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3444 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3445 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3446 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3447 if ( (bMinSet || bMaxSet) && bAutoSet)
3449 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3452 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3454 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3457 if (bMinSet && opt.bAuto)
3459 printf("Note: min and max given, switching off -auto.\n");
3463 if (opt.bTauIntGiven && opt.bCalcTauInt)
3465 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3466 "the autocorrelation times. Not both.");
3469 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3471 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3472 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3474 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3476 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3477 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3480 /* Reading gmx4 pull output and tpr files */
3481 if (opt.bTpr || opt.bPullf || opt.bPullx)
3483 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3485 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3486 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3487 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3488 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3489 if (nfiles != nfiles2)
3491 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3492 opt.fnTpr, nfiles2, fnPull);
3495 /* Read file that selects the pull group to be used */
3496 if (opt.fnGroupsel != NULL)
3498 readPullGroupSelection(&opt, fninTpr, nfiles);
3501 window = initUmbrellaWindows(nfiles);
3502 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3505 { /* reading pdo files */
3506 if (opt.fnGroupsel != NULL)
3508 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3509 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3511 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3512 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3513 window = initUmbrellaWindows(nfiles);
3514 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3518 /* enforce equal weight for all histograms? */
3521 enforceEqualWeights(window, nwins);
3524 /* write histograms */
3525 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3526 "z", "count", opt.oenv);
3527 for (l = 0; l < opt.bins; ++l)
3529 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3530 for (i = 0; i < nwins; ++i)
3532 for (j = 0; j < window[i].nPull; ++j)
3534 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3537 fprintf(histout, "\n");
3540 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3543 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3547 /* Using tabulated umbrella potential */
3550 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3553 /* Integrated autocorrelation times provided ? */
3554 if (opt.bTauIntGiven)
3556 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3559 /* Compute integrated autocorrelation times */
3560 if (opt.bCalcTauInt)
3562 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3565 /* calc average and sigma for each histogram
3566 (maybe required for bootstrapping. If not, this is fast anyhow) */
3567 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3569 averageSigma(window, nwins);
3572 /* Get initial potential by simple integration */
3573 if (opt.bInitPotByIntegration)
3575 guessPotByIntegration(window, nwins, &opt);
3578 /* Check if complete reaction coordinate is covered */
3579 checkReactionCoordinateCovered(window, nwins, &opt);
3581 /* Calculate profile */
3582 snew(profile, opt.bins);
3590 if ( (i%opt.stepUpdateContrib) == 0)
3592 setup_acc_wham(profile, window, nwins, &opt);
3594 if (maxchange < opt.Tolerance)
3597 /* if (opt.verbose) */
3598 printf("Switched to exact iteration in iteration %d\n", i);
3600 calc_profile(profile, window, nwins, &opt, bExact);
3601 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3603 printf("\t%4d) Maximum change %e\n", i, maxchange);
3607 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3608 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3610 /* calc error from Kumar's formula */
3611 /* Unclear how the error propagates along reaction coordinate, therefore
3613 /* calc_error_kumar(profile,window, nwins,&opt); */
3615 /* Write profile in energy units? */
3618 prof_normalization_and_unit(profile, &opt);
3619 strcpy(ylabel, en_unit_label[opt.unit]);
3620 strcpy(title, "Umbrella potential");
3624 strcpy(ylabel, "Density of states");
3625 strcpy(title, "Density of states");
3628 /* symmetrize profile around z=0? */
3631 symmetrizeProfile(profile, &opt);
3634 /* write profile or density of states */
3635 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, "z", ylabel, opt.oenv);
3636 for (i = 0; i < opt.bins; ++i)
3638 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3641 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3643 /* Bootstrap Method */
3646 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3647 opt2fn("-hist", NFILE, fnm),
3648 ylabel, profile, window, nwins, &opt);
3652 freeUmbrellaWindows(window, nfiles);
3654 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3655 please_cite(stdout, "Hub2010");