2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
53 #include "gromacs/commandline/pargs.h"
55 #include "gromacs/utility/smalloc.h"
56 #include "gromacs/math/vec.h"
58 #include "gromacs/fileio/tpxio.h"
60 #include "gromacs/random/random.h"
63 #include "gromacs/utility/cstringutil.h"
64 #include "gromacs/fileio/xvgr.h"
66 #include "gromacs/utility/fatalerror.h"
68 //! longest file names allowed in input files
69 #define WHAM_MAXFILELEN 2048
72 * enum for energy units
75 enSel, en_kJ, en_kCal, en_kT, enNr
78 * enum for type of input files (pdos, tpr, or pullf)
81 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
84 /*! \brief enum for bootstrapping method
86 * These bootstrap methods are supported:
87 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
88 * (bsMethod_BayesianHist)
89 * - bootstrap complete histograms (bsMethod_hist)
90 * - bootstrap trajectories from given umbrella histograms. This generates new
91 * "synthetic" histograms (bsMethod_traj)
92 * - bootstrap trajectories from Gaussian with mu/sigma computed from
93 * the respective histogram (bsMethod_trajGauss). This gives very similar
94 * results compared to bsMethod_traj.
96 * ********************************************************************
97 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
98 * JS Hub, BL de Groot, D van der Spoel
99 * g_wham - A free weighted histogram analysis implementation including
100 * robust error and autocorrelation estimates,
101 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
102 * ********************************************************************
105 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
106 bsMethod_traj, bsMethod_trajGauss
110 //! Parameters of the umbrella potentials
114 * \name Using umbrella pull code since gromacs 4.x
117 int npullcrds; //!< nr of pull coordinates in tpr file
118 int pull_geometry; //!< such as distance, direction
119 ivec pull_dim; //!< pull dimension with geometry distance
120 int pull_ndim; //!< nr of pull_dim != 0
121 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
122 real *k; //!< force constants in tpr file
123 real *init_dist; //!< reference displacements
124 real *umbInitDist; //!< reference displacement in umbrella direction
127 * \name Using PDO files common until gromacs 3.x
135 char PullName[4][256];
137 double UmbCons[4][3];
141 //! Data in the umbrella histograms
144 int nPull; //!< nr of pull groups in this pdo or pullf/x file
145 double **Histo; //!< nPull histograms
146 double **cum; //!< nPull cumulative distribution functions
147 int nBin; //!< nr of bins. identical to opt->bins
148 double *k; //!< force constants for the nPull groups
149 double *pos; //!< umbrella positions for the nPull groups
150 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
151 int *N; //!< nr of data points in nPull histograms.
152 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
154 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
156 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
157 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
160 double *tau; //!< intetrated autocorrelation time (IACT)
161 double *tausmooth; //!< smoothed IACT
163 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
165 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
167 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
169 /*! \brief average force estimated from average displacement, fAv=dzAv*k
171 * Used for integration to guess the potential.
174 real *aver; //!< average of histograms
175 real *sigma; //!< stddev of histograms
176 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
179 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
182 int n; //!< total nr of pull groups in this tpr file
183 int nUse; //!< nr of pull groups used
184 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
187 //! Parameters of WHAM
194 const char *fnTpr, *fnPullf, *fnGroupsel;
195 const char *fnPdo, *fnPullx; //!< file names of input
196 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
197 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
199 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
200 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
201 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
202 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
205 * \name Basic WHAM options
208 int bins; //!< nr of bins, min, max, and dz of profile
210 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
211 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
214 * \name Output control
217 gmx_bool bLog; //!< energy output (instead of probability) for profile
218 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
219 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
220 /*! \brief after wham, set prof to zero at this z-position.
221 * When bootstrapping, set zProf0 to a "stable" reference position.
224 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
226 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
227 gmx_bool bAuto; //!< determine min and max automatically but do not exit
229 gmx_bool verbose; //!< more noisy wham mode
230 int stepchange; //!< print maximum change in prof after how many interations
231 output_env_t oenv; //!< xvgr options
234 * \name Autocorrelation stuff
237 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
238 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
239 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
240 real acTrestart; //!< when computing ACT, time between restarting points
242 /* \brief Enforce the same weight for each umbella window, that is
243 * calculate with the same number of data points for
244 * each window. That can be reasonable, if the histograms
245 * have different length, but due to autocorrelation,
246 * a longer simulation should not have larger weightin wham.
252 * \name Bootstrapping stuff
255 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
257 /* \brief bootstrap method
259 * if == bsMethod_hist, consider complete histograms as independent
260 * data points and, hence, only mix complete histograms.
261 * if == bsMethod_BayesianHist, consider complete histograms
262 * as independent data points, but assign random weights
263 * to the histograms during the bootstrapping ("Bayesian bootstrap")
264 * In case of long correlations (e.g., inside a channel), these
265 * will yield a more realistic error.
266 * if == bsMethod_traj(Gauss), generate synthetic histograms
268 * histogram by generating an autocorrelated random sequence
269 * that is distributed according to the respective given
270 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
271 * (instead of from the umbrella histogram) to generate a new
276 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
279 /* \brief when mixing histograms, mix only histograms withing blocks
280 long the reaction coordinate xi. Avoids gaps along xi. */
281 int histBootStrapBlockLength;
283 int bsSeed; //!< random seed for bootstrapping
285 /* \brief Write cumulative distribution functions (CDFs) of histograms
286 and write the generated histograms for each bootstrap */
290 * \name tabulated umbrella potential stuff
294 double *tabX, *tabY, tabMin, tabMax, tabDz;
297 gmx_rng_t rng; //!< gromacs random number generator
300 //! Make an umbrella window (may contain several histograms)
301 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
303 t_UmbrellaWindow *win;
306 for (i = 0; i < nwin; i++)
308 win[i].Histo = win[i].cum = 0;
309 win[i].k = win[i].pos = win[i].z = 0;
310 win[i].N = win[i].Ntot = 0;
311 win[i].g = win[i].tau = win[i].tausmooth = 0;
315 win[i].aver = win[i].sigma = 0;
321 //! Delete an umbrella window (may contain several histograms)
322 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
325 for (i = 0; i < nwin; i++)
329 for (j = 0; j < win[i].nPull; j++)
331 sfree(win[i].Histo[j]);
336 for (j = 0; j < win[i].nPull; j++)
338 sfree(win[i].cum[j]);
343 for (j = 0; j < win[i].nPull; j++)
345 sfree(win[i].bContrib[j]);
357 sfree(win[i].tausmooth);
358 sfree(win[i].bContrib);
360 sfree(win[i].forceAv);
363 sfree(win[i].bsWeight);
369 * Read and setup tabulated umbrella potential
371 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
376 printf("Setting up tabulated potential from file %s\n", fn);
377 nl = read_xvg(fn, &y, &ny);
381 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
383 opt->tabMin = y[0][0];
384 opt->tabMax = y[0][nl-1];
385 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
388 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
389 "ascending z-direction", fn);
391 for (i = 0; i < nl-1; i++)
393 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
395 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
400 for (i = 0; i < nl; i++)
402 opt->tabX[i] = y[0][i];
403 opt->tabY[i] = y[1][i];
405 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
406 opt->tabMin, opt->tabMax, opt->tabDz);
409 //! Read the header of an PDO file (position, force const, nr of groups)
410 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
413 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
415 std::istringstream ist;
418 if (fgets(line, 2048, file) == NULL)
420 gmx_fatal(FARGS, "Error reading header from pdo file\n");
423 ist >> Buffer0 >> Buffer1 >> Buffer2;
424 if (strcmp(Buffer1, "UMBRELLA"))
426 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
427 "(Found in first line: `%s')\n",
428 Buffer1, "UMBRELLA", line);
430 if (strcmp(Buffer2, "3.0"))
432 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
436 if (fgets(line, 2048, file) == NULL)
438 gmx_fatal(FARGS, "Error reading header from pdo file\n");
441 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
442 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
444 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
445 if (header->nDim != 1)
447 gmx_fatal(FARGS, "Currently only supports one dimension");
451 if (fgets(line, 2048, file) == NULL)
453 gmx_fatal(FARGS, "Error reading header from pdo file\n");
456 ist >> Buffer0 >> Buffer1 >> header->nSkip;
459 if (fgets(line, 2048, file) == NULL)
461 gmx_fatal(FARGS, "Error reading header from pdo file\n");
464 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
467 if (fgets(line, 2048, file) == NULL)
469 gmx_fatal(FARGS, "Error reading header from pdo file\n");
472 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
476 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
480 for (i = 0; i < header->nPull; ++i)
482 if (fgets(line, 2048, file) == NULL)
484 gmx_fatal(FARGS, "Error reading header from pdo file\n");
487 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
488 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
489 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
493 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
494 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
498 if (fgets(line, 2048, file) == NULL)
500 gmx_fatal(FARGS, "Cannot read from file\n");
504 if (strcmp(Buffer3, "#####") != 0)
506 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
511 static char *fgets3(FILE *fp, char ptr[], int *len)
516 if (fgets(ptr, *len-1, fp) == NULL)
521 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
523 /* This line is longer than len characters, let's increase len! */
527 if (fgets(p-1, STRLEN, fp) == NULL)
533 if (ptr[slen-1] == '\n')
541 /*! \brief Read the data columns of and PDO file.
543 * TO DO: Get rid of the scanf function to avoid the clang warning.
544 * At the moment, this warning is avoided by hiding the format string
545 * the variable fmtlf.
547 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
548 int fileno, t_UmbrellaWindow * win,
549 t_UmbrellaOptions *opt,
550 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
552 int i, inttemp, bins, count, ntot;
553 real min, max, minfound = 1e20, maxfound = -1e20;
554 double temp, time, time0 = 0, dt;
556 t_UmbrellaWindow * window = 0;
557 gmx_bool timeok, dt_ok = 1;
558 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
559 int len = STRLEN, dstep = 1;
560 const int blocklen = 4096;
570 /* Need to alocate memory and set up structure */
571 window->nPull = header->nPull;
574 snew(window->Histo, window->nPull);
575 snew(window->z, window->nPull);
576 snew(window->k, window->nPull);
577 snew(window->pos, window->nPull);
578 snew(window->N, window->nPull);
579 snew(window->Ntot, window->nPull);
580 snew(window->g, window->nPull);
581 snew(window->bsWeight, window->nPull);
583 window->bContrib = 0;
585 if (opt->bCalcTauInt)
587 snew(window->ztime, window->nPull);
593 snew(lennow, window->nPull);
595 for (i = 0; i < window->nPull; ++i)
598 window->bsWeight[i] = 1.;
599 snew(window->Histo[i], bins);
600 window->k[i] = header->UmbCons[i][0];
601 window->pos[i] = header->UmbPos[i][0];
605 if (opt->bCalcTauInt)
607 window->ztime[i] = 0;
611 /* Done with setup */
617 min = max = bins = 0; /* Get rid of warnings */
622 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
626 if (ptr[0] == '#' || strlen(ptr) < 2)
631 /* Initiate format string */
633 strcat(fmtign, "%*s");
635 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
636 /* Round time to fs */
637 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
639 /* get time step of pdo file */
649 dstep = static_cast<int>(opt->dt/dt+0.5);
657 window->dt = dt*dstep;
662 dt_ok = ((count-1)%dstep == 0);
663 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
665 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
666 time,opt->tmin, opt->tmax, dt_ok,timeok); */
670 for (i = 0; i < header->nPull; ++i)
673 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
674 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
675 if (sscanf(ptr, fmt, &temp))
677 temp += header->UmbPos[i][0];
691 if (opt->bCalcTauInt)
693 /* save time series for autocorrelation analysis */
694 ntot = window->Ntot[i];
695 if (ntot >= lennow[i])
697 lennow[i] += blocklen;
698 srenew(window->ztime[i], lennow[i]);
700 window->ztime[i][ntot] = temp;
708 inttemp = static_cast<int> (temp);
715 else if (inttemp >= bins)
721 if (inttemp >= 0 && inttemp < bins)
723 window->Histo[i][inttemp] += 1.;
731 if (time > opt->tmax)
735 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
751 /*! \brief Set identical weights for all histograms
753 * Normally, the weight is given by the number data points in each
754 * histogram, together with the autocorrelation time. This can be overwritten
755 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
756 * an appropriate autocorrelation times instead of using this function.
758 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
760 int i, k, j, NEnforced;
763 NEnforced = window[0].Ntot[0];
764 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
765 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
766 /* enforce all histograms to have the same weight as the very first histogram */
768 for (j = 0; j < nWindows; ++j)
770 for (k = 0; k < window[j].nPull; ++k)
772 ratio = 1.0*NEnforced/window[j].Ntot[k];
773 for (i = 0; i < window[0].nBin; ++i)
775 window[j].Histo[k][i] *= ratio;
777 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
782 /*! \brief Simple linear interpolation between two given tabulated points
784 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
787 double pl, pu, dz, dp;
789 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
791 if (jl < 0 || ju >= opt->tabNbins)
793 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
794 "Provide an extended table.", dist, jl, ju);
798 dz = dist-opt->tabX[jl];
799 dp = (pu-pl)*dz/opt->tabDz;
805 * Check which bins substiantially contribute (accelerates WHAM)
807 * Don't worry, that routine does not mean we compute the PMF in limited precision.
808 * After rapid convergence (using only substiantal contributions), we always switch to
811 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
812 t_UmbrellaOptions *opt)
814 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
815 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
816 gmx_bool bAnyContrib;
817 static int bFirst = 1;
818 static double wham_contrib_lim;
822 for (i = 0; i < nWindows; ++i)
824 nGrptot += window[i].nPull;
826 wham_contrib_lim = opt->Tolerance/nGrptot;
829 ztot = opt->max-opt->min;
832 for (i = 0; i < nWindows; ++i)
834 if (!window[i].bContrib)
836 snew(window[i].bContrib, window[i].nPull);
838 for (j = 0; j < window[i].nPull; ++j)
840 if (!window[i].bContrib[j])
842 snew(window[i].bContrib[j], opt->bins);
845 for (k = 0; k < opt->bins; ++k)
847 temp = (1.0*k+0.5)*dz+min;
848 distance = temp - window[i].pos[j]; /* distance to umbrella center */
850 { /* in cyclic wham: */
851 if (distance > ztot_half) /* |distance| < ztot_half */
855 else if (distance < -ztot_half)
860 /* Note: there are two contributions to bin k in the wham equations:
861 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
862 ii) exp(- U/(8.314e-3*opt->Temperature))
863 where U is the umbrella potential
864 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
869 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
873 U = tabulated_pot(distance, opt); /* Use tabulated potential */
876 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
877 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
878 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
879 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
880 if (window[i].bContrib[j][k])
886 /* If this histo is far outside min and max all bContrib may be FALSE,
887 causing a floating point exception later on. To avoid that, switch
891 for (k = 0; k < opt->bins; ++k)
893 window[i].bContrib[j][k] = TRUE;
900 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
901 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
906 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
912 //! Compute the PMF (one of the two main WHAM routines)
913 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
914 t_UmbrellaOptions *opt, gmx_bool bExact)
917 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
918 double denom, U = 0, temp = 0, invg;
920 ztot = opt->max-opt->min;
923 for (i = 0; i < opt->bins; ++i)
926 for (j = 0; j < nWindows; ++j)
928 for (k = 0; k < window[j].nPull; ++k)
930 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
931 temp = (1.0*i+0.5)*dz+min;
932 num += invg*window[j].Histo[k][i];
934 if (!(bExact || window[j].bContrib[k][i]))
938 distance = temp - window[j].pos[k]; /* distance to umbrella center */
940 { /* in cyclic wham: */
941 if (distance > ztot_half) /* |distance| < ztot_half */
945 else if (distance < -ztot_half)
953 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
957 U = tabulated_pot(distance, opt); /* Use tabulated potential */
959 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
962 profile[i] = num/denom;
966 //! Compute the free energy offsets z (one of the two main WHAM routines)
967 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
968 t_UmbrellaOptions *opt, gmx_bool bExact)
971 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
972 double MAX = -1e20, total = 0;
974 ztot = opt->max-opt->min;
977 for (i = 0; i < nWindows; ++i)
979 for (j = 0; j < window[i].nPull; ++j)
982 for (k = 0; k < window[i].nBin; ++k)
984 if (!(bExact || window[i].bContrib[j][k]))
988 temp = (1.0*k+0.5)*dz+min;
989 distance = temp - window[i].pos[j]; /* distance to umbrella center */
991 { /* in cyclic wham: */
992 if (distance > ztot_half) /* |distance| < ztot_half */
996 else if (distance < -ztot_half)
1004 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1008 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1011 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1013 /* Avoid floating point exception if window is far outside min and max */
1016 total = -log(total);
1022 temp = fabs(total - window[i].z[j]);
1027 window[i].z[j] = total;
1033 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1034 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1036 int i, j, bins = opt->bins;
1037 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1040 if (min > 0. || max < 0.)
1042 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1043 opt->min, opt->max);
1048 for (i = 0; i < bins; i++)
1052 /* bin left of zsym */
1053 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1054 if (j >= 0 && (j+1) < bins)
1056 /* interpolate profile linearly between bins j and j+1 */
1057 z1 = min+(j+0.5)*dz;
1059 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1060 /* average between left and right */
1061 prof2[i] = 0.5*(profsym+profile[i]);
1065 prof2[i] = profile[i];
1069 memcpy(profile, prof2, bins*sizeof(double));
1073 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1074 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1077 double unit_factor = 1., R_MolarGasConst, diff;
1079 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1082 /* Not log? Nothing to do! */
1088 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1089 if (opt->unit == en_kT)
1093 else if (opt->unit == en_kJ)
1095 unit_factor = R_MolarGasConst*opt->Temperature;
1097 else if (opt->unit == en_kCal)
1099 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1103 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1106 for (i = 0; i < bins; i++)
1108 if (profile[i] > 0.0)
1110 profile[i] = -log(profile[i])*unit_factor;
1114 /* shift to zero at z=opt->zProf0 */
1115 if (!opt->bProf0Set)
1121 /* Get bin with shortest distance to opt->zProf0
1122 (-0.5 from bin position and +0.5 from rounding cancel) */
1123 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1128 else if (imin >= bins)
1132 diff = profile[imin];
1136 for (i = 0; i < bins; i++)
1142 //! Make an array of random integers (used for bootstrapping)
1143 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1145 int ipull, blockBase, nr, ipullRandom;
1147 if (blockLength == 0)
1149 blockLength = nPull;
1152 for (ipull = 0; ipull < nPull; ipull++)
1154 blockBase = (ipull/blockLength)*blockLength;
1156 { /* make sure nothing bad happens in the last block */
1157 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1158 ipullRandom = blockBase + nr;
1160 while (ipullRandom >= nPull);
1161 if (ipullRandom < 0 || ipullRandom >= nPull)
1163 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1164 "blockLength = %d, blockBase = %d\n",
1165 ipullRandom, nPull, nr, blockLength, blockBase);
1167 randomArray[ipull] = ipullRandom;
1169 /*for (ipull=0; ipull<nPull; ipull++)
1170 printf("%d ",randomArray[ipull]); printf("\n"); */
1173 /*! \brief Set pull group information of a synthetic histogram
1175 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1176 * but it is not required if we bootstrap complete histograms.
1178 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1179 t_UmbrellaWindow *thisWindow, int pullid)
1181 synthWindow->N [0] = thisWindow->N [pullid];
1182 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1183 synthWindow->pos [0] = thisWindow->pos [pullid];
1184 synthWindow->z [0] = thisWindow->z [pullid];
1185 synthWindow->k [0] = thisWindow->k [pullid];
1186 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1187 synthWindow->g [0] = thisWindow->g [pullid];
1188 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1191 /*! \brief Calculate cumulative distribution function of of all histograms.
1193 * This allow to create random number sequences
1194 * which are distributed according to the histograms. Required to generate
1195 * the "synthetic" histograms for the Bootstrap method
1197 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1198 t_UmbrellaOptions *opt, const char *fnhist)
1202 char *fn = 0, *buf = 0;
1205 if (opt->bs_verbose)
1207 snew(fn, strlen(fnhist)+10);
1208 snew(buf, strlen(fnhist)+10);
1209 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1210 fp = xvgropen(fn, "CDFs of umbrella windows", "z", "CDF", opt->oenv);
1214 for (i = 0; i < nWindows; i++)
1216 snew(window[i].cum, window[i].nPull);
1217 for (j = 0; j < window[i].nPull; j++)
1219 snew(window[i].cum[j], nbin+1);
1220 window[i].cum[j][0] = 0.;
1221 for (k = 1; k <= nbin; k++)
1223 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1226 /* normalize CDFs. Ensure cum[nbin]==1 */
1227 last = window[i].cum[j][nbin];
1228 for (k = 0; k <= nbin; k++)
1230 window[i].cum[j][k] /= last;
1235 printf("Cumulative distriubtion functions of all histograms created.\n");
1236 if (opt->bs_verbose)
1238 for (k = 0; k <= nbin; k++)
1240 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1241 for (i = 0; i < nWindows; i++)
1243 for (j = 0; j < window[i].nPull; j++)
1245 fprintf(fp, "%g\t", window[i].cum[j][k]);
1250 printf("Wrote cumulative distribution functions to %s\n", fn);
1258 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1260 * This is used to generate a random sequence distributed according to a histogram
1262 void searchCumulative(double xx[], int n, double x, int *j)
1284 else if (x == xx[n-1])
1294 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1295 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1296 int pullid, t_UmbrellaOptions *opt)
1298 int N, i, nbins, r_index, ibin;
1299 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1302 N = thisWindow->N[pullid];
1303 dt = thisWindow->dt;
1304 nbins = thisWindow->nBin;
1306 /* tau = autocorrelation time */
1307 if (opt->tauBootStrap > 0.0)
1309 tausteps = opt->tauBootStrap/dt;
1311 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1313 /* calc tausteps from g=1+2tausteps */
1314 g = thisWindow->g[pullid];
1320 "When generating hypothetical trajctories from given umbrella histograms,\n"
1321 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1322 "cannot be predicted. You have 3 options:\n"
1323 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1324 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1326 " with option -iiact for all umbrella windows.\n"
1327 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1328 " Use option (3) only if you are sure what you're doing, you may severely\n"
1329 " underestimate the error if a too small ACT is given.\n");
1330 gmx_fatal(FARGS, errstr);
1333 synthWindow->N [0] = N;
1334 synthWindow->pos [0] = thisWindow->pos[pullid];
1335 synthWindow->z [0] = thisWindow->z[pullid];
1336 synthWindow->k [0] = thisWindow->k[pullid];
1337 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1338 synthWindow->g [0] = thisWindow->g [pullid];
1339 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1341 for (i = 0; i < nbins; i++)
1343 synthWindow->Histo[0][i] = 0.;
1346 if (opt->bsMethod == bsMethod_trajGauss)
1348 sig = thisWindow->sigma [pullid];
1349 mu = thisWindow->aver [pullid];
1352 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1354 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1356 z = a*x + sqrt(1-a^2)*y
1357 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1358 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1360 C(t) = exp(-t/tau) with tau=-1/ln(a)
1362 Then, use error function to turn the Gaussian random variable into a uniformly
1363 distributed one in [0,1]. Eventually, use cumulative distribution function of
1364 histogram to get random variables distributed according to histogram.
1365 Note: The ACT of the flat distribution and of the generated histogram is not
1366 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1368 a = exp(-1.0/tausteps);
1370 invsqrt2 = 1./sqrt(2.0);
1372 /* init random sequence */
1373 x = gmx_rng_gaussian_table(opt->rng);
1375 if (opt->bsMethod == bsMethod_traj)
1377 /* bootstrap points from the umbrella histograms */
1378 for (i = 0; i < N; i++)
1380 y = gmx_rng_gaussian_table(opt->rng);
1382 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1383 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1385 r = 0.5*(1+gmx_erf(x*invsqrt2));
1386 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1387 synthWindow->Histo[0][r_index] += 1.;
1390 else if (opt->bsMethod == bsMethod_trajGauss)
1392 /* bootstrap points from a Gaussian with the same average and sigma
1393 as the respective umbrella histogram. The idea was, that -given
1394 limited sampling- the bootstrapped histograms are otherwise biased
1395 from the limited sampling of the US histos. However, bootstrapping from
1396 the Gaussian seems to yield a similar estimate. */
1400 y = gmx_rng_gaussian_table(opt->rng);
1403 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1408 while ( (ibin += nbins) < 0)
1413 else if (ibin >= nbins)
1415 while ( (ibin -= nbins) >= nbins)
1422 if (ibin >= 0 && ibin < nbins)
1424 synthWindow->Histo[0][ibin] += 1.;
1431 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1435 /*! \brief Write all histograms to a file
1437 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1438 * sets of bootstrapped histograms.
1440 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1441 int bs_index, t_UmbrellaOptions *opt)
1443 char *fn = 0, *buf = 0, title[256];
1449 snew(fn, strlen(fnhist)+10);
1450 snew(buf, strlen(fnhist)+1);
1451 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1452 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1456 fn = gmx_strdup(fnhist);
1457 strcpy(title, "Umbrella histograms");
1460 fp = xvgropen(fn, title, "z", "count", opt->oenv);
1463 /* Write histograms */
1464 for (l = 0; l < bins; ++l)
1466 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1467 for (i = 0; i < nWindows; ++i)
1469 for (j = 0; j < window[i].nPull; ++j)
1471 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1478 printf("Wrote %s\n", fn);
1486 //! Used for qsort to sort random numbers
1487 int func_wham_is_larger(const void *a, const void *b)
1506 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1507 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1514 /* generate ordered random numbers between 0 and nAllPull */
1515 for (i = 0; i < nAllPull-1; i++)
1517 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1519 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1520 r[nAllPull-1] = 1.0*nAllPull;
1522 synthwin[0].bsWeight[0] = r[0];
1523 for (i = 1; i < nAllPull; i++)
1525 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1528 /* avoid to have zero weight by adding a tiny value */
1529 for (i = 0; i < nAllPull; i++)
1531 if (synthwin[i].bsWeight[0] < 1e-5)
1533 synthwin[i].bsWeight[0] = 1e-5;
1540 //! The main bootstrapping routine
1541 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1542 char* ylabel, double *profile,
1543 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1545 t_UmbrellaWindow * synthWindow;
1546 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1547 int i, j, *randomArray = 0, winid, pullid, ib;
1548 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1550 gmx_bool bExact = FALSE;
1552 /* init random generator */
1553 if (opt->bsSeed == -1)
1555 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1559 opt->rng = gmx_rng_init(opt->bsSeed);
1562 snew(bsProfile, opt->bins);
1563 snew(bsProfiles_av, opt->bins);
1564 snew(bsProfiles_av2, opt->bins);
1566 /* Create array of all pull groups. Note that different windows
1567 may have different nr of pull groups
1568 First: Get total nr of pull groups */
1570 for (i = 0; i < nWindows; i++)
1572 nAllPull += window[i].nPull;
1574 snew(allPull_winId, nAllPull);
1575 snew(allPull_pullId, nAllPull);
1577 /* Setup one array of all pull groups */
1578 for (i = 0; i < nWindows; i++)
1580 for (j = 0; j < window[i].nPull; j++)
1582 allPull_winId[iAllPull] = i;
1583 allPull_pullId[iAllPull] = j;
1588 /* setup stuff for synthetic windows */
1589 snew(synthWindow, nAllPull);
1590 for (i = 0; i < nAllPull; i++)
1592 synthWindow[i].nPull = 1;
1593 synthWindow[i].nBin = opt->bins;
1594 snew(synthWindow[i].Histo, 1);
1595 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1597 snew(synthWindow[i].Histo[0], opt->bins);
1599 snew(synthWindow[i].N, 1);
1600 snew(synthWindow[i].pos, 1);
1601 snew(synthWindow[i].z, 1);
1602 snew(synthWindow[i].k, 1);
1603 snew(synthWindow[i].bContrib, 1);
1604 snew(synthWindow[i].g, 1);
1605 snew(synthWindow[i].bsWeight, 1);
1608 switch (opt->bsMethod)
1611 snew(randomArray, nAllPull);
1612 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1613 please_cite(stdout, "Hub2006");
1615 case bsMethod_BayesianHist:
1616 /* just copy all histogams into synthWindow array */
1617 for (i = 0; i < nAllPull; i++)
1619 winid = allPull_winId [i];
1620 pullid = allPull_pullId[i];
1621 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1625 case bsMethod_trajGauss:
1626 calc_cumulatives(window, nWindows, opt, fnhist);
1629 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1632 /* do bootstrapping */
1633 fp = xvgropen(fnprof, "Boot strap profiles", "z", ylabel, opt->oenv);
1634 for (ib = 0; ib < opt->nBootStrap; ib++)
1636 printf(" *******************************************\n"
1637 " ******** Start bootstrap nr %d ************\n"
1638 " *******************************************\n", ib+1);
1640 switch (opt->bsMethod)
1643 /* bootstrap complete histograms from given histograms */
1644 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1645 for (i = 0; i < nAllPull; i++)
1647 winid = allPull_winId [randomArray[i]];
1648 pullid = allPull_pullId[randomArray[i]];
1649 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1652 case bsMethod_BayesianHist:
1653 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1654 setRandomBsWeights(synthWindow, nAllPull, opt);
1657 case bsMethod_trajGauss:
1658 /* create new histos from given histos, that is generate new hypothetical
1660 for (i = 0; i < nAllPull; i++)
1662 winid = allPull_winId[i];
1663 pullid = allPull_pullId[i];
1664 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1669 /* write histos in case of verbose output */
1670 if (opt->bs_verbose)
1672 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1679 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1682 if ( (i%opt->stepUpdateContrib) == 0)
1684 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1686 if (maxchange < opt->Tolerance)
1690 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1692 printf("\t%4d) Maximum change %e\n", i, maxchange);
1694 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1697 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1698 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1702 prof_normalization_and_unit(bsProfile, opt);
1705 /* symmetrize profile around z=0 */
1708 symmetrizeProfile(bsProfile, opt);
1711 /* save stuff to get average and stddev */
1712 for (i = 0; i < opt->bins; i++)
1715 bsProfiles_av[i] += tmp;
1716 bsProfiles_av2[i] += tmp*tmp;
1717 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1719 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1723 /* write average and stddev */
1724 fp = xvgropen(fnres, "Average and stddev from bootstrapping", "z", ylabel, opt->oenv);
1725 if (output_env_get_print_xvgr_codes(opt->oenv))
1727 fprintf(fp, "@TYPE xydy\n");
1729 for (i = 0; i < opt->bins; i++)
1731 bsProfiles_av [i] /= opt->nBootStrap;
1732 bsProfiles_av2[i] /= opt->nBootStrap;
1733 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1734 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1735 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1738 printf("Wrote boot strap result to %s\n", fnres);
1741 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1742 int whaminFileType(char *fn)
1746 if (strcmp(fn+len-3, "tpr") == 0)
1750 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1752 return whamin_pullxf;
1754 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1760 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1762 return whamin_unknown;
1765 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1766 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1767 t_UmbrellaOptions *opt)
1769 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1770 int nread, sizenow, i, block = 1;
1773 fp = gmx_ffopen(fn, "r");
1776 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1778 if (strlen(tmp) >= WHAM_MAXFILELEN)
1780 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1782 if (nread >= sizenow)
1785 srenew(filename, sizenow);
1786 for (i = sizenow-block; i < sizenow; i++)
1788 snew(filename[i], WHAM_MAXFILELEN);
1791 /* remove newline if there is one */
1792 if (tmp[strlen(tmp)-1] == '\n')
1794 tmp[strlen(tmp)-1] = '\0';
1796 strcpy(filename[nread], tmp);
1799 printf("Found file %s in %s\n", filename[nread], fn);
1803 *filenamesRet = filename;
1807 //! Open a file or a pipe to a gzipped file
1808 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1810 char Buffer[1024], gunzip[1024], *Path = 0;
1812 static gmx_bool bFirst = 1;
1814 /* gzipped pdo file? */
1815 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1817 /* search gunzip executable */
1818 if (!(Path = getenv("GMX_PATH_GZIP")))
1820 if (gmx_fexist("/bin/gunzip"))
1822 sprintf(gunzip, "%s", "/bin/gunzip");
1824 else if (gmx_fexist("/usr/bin/gunzip"))
1826 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1830 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1831 "You may want to define the path to gunzip "
1832 "with the environment variable GMX_PATH_GZIP.", gunzip);
1837 sprintf(gunzip, "%s/gunzip", Path);
1838 if (!gmx_fexist(gunzip))
1840 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1841 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1846 printf("Using gunzig executable %s\n", gunzip);
1849 if (!gmx_fexist(fn))
1851 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1853 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1856 printf("Executing command '%s'\n", Buffer);
1859 if ((pipe = popen(Buffer, "r")) == NULL)
1861 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1864 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1870 pipe = gmx_ffopen(fn, "r");
1877 //! Close file or pipe
1878 void pdo_close_file(FILE *fp)
1887 //! Reading all pdo files
1888 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1889 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1892 real mintmp, maxtmp, done = 0.;
1895 /* char Buffer0[1000]; */
1899 gmx_fatal(FARGS, "No files found. Hick.");
1902 /* if min and max are not given, get min and max from the input files */
1905 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1908 for (i = 0; i < nfiles; ++i)
1910 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1911 /*fgets(Buffer0,999,file);
1912 fprintf(stderr,"First line '%s'\n",Buffer0); */
1913 done = 100.0*(i+1)/nfiles;
1914 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1919 read_pdo_header(file, header, opt);
1920 /* here only determine min and max of this window */
1921 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1922 if (maxtmp > opt->max)
1926 if (mintmp < opt->min)
1932 pdo_close_file(file);
1940 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1941 if (opt->bBoundsOnly)
1943 printf("Found option -boundsonly, now exiting.\n");
1947 /* store stepsize in profile */
1948 opt->dz = (opt->max-opt->min)/opt->bins;
1950 /* Having min and max, we read in all files */
1951 /* Loop over all files */
1952 for (i = 0; i < nfiles; ++i)
1954 done = 100.0*(i+1)/nfiles;
1955 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1960 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1961 read_pdo_header(file, header, opt);
1962 /* load data into window */
1963 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1964 if ((window+i)->Ntot[0] == 0)
1966 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1970 pdo_close_file(file);
1978 for (i = 0; i < nfiles; ++i)
1985 //! translate 0/1 to N/Y to write pull dimensions
1986 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1988 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1989 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
1994 static int first = 1;
1996 /* printf("Reading %s \n",fn); */
1997 read_tpx_state(fn, &ir, &state, NULL, NULL);
1999 if (ir.ePull != epullUMBRELLA)
2001 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
2002 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
2005 /* nr of pull groups */
2006 ncrd = ir.pull->ncoord;
2009 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull coordinates\n", ncrd);
2012 header->npullcrds = ir.pull->ncoord;
2013 header->pull_geometry = ir.pull->eGeom;
2014 header->bPrintRef = ir.pull->bPrintRef;
2015 copy_ivec(ir.pull->dim, header->pull_dim);
2016 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2017 snew(header->k, ncrd);
2018 snew(header->init_dist, ncrd);
2019 snew(header->umbInitDist, ncrd);
2021 /* only z-direction with epullgCYL? */
2022 if (header->pull_geometry == epullgCYL)
2024 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2026 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2027 "However, found dimensions [%s %s %s]\n",
2028 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2029 int2YN(header->pull_dim[ZZ]));
2033 for (i = 0; i < ncrd; i++)
2035 header->k[i] = ir.pull->coord[i].k;
2036 if (header->k[i] == 0.0)
2038 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2039 "That doesn't seem to be an Umbrella tpr.\n",
2042 header->init_dist[i] = ir.pull->coord[i].init;
2044 /* initial distance to reference */
2045 switch (header->pull_geometry)
2048 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2052 header->umbInitDist[i] = header->init_dist[i];
2055 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2059 if (opt->verbose || first)
2061 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2062 "\tPull group coordinates%s expected in pullx files.\n",
2063 fn, header->npullcrds, epullg_names[header->pull_geometry],
2064 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2065 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2066 for (i = 0; i < ncrd; i++)
2068 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2071 if (!opt->verbose && first)
2073 printf("\tUse option -v to see this output for all input tpr files\n\n");
2079 //! 2-norm in a ndim-dimensional space
2080 double dist_ndim(double **dx, int ndim, int line)
2084 for (i = 0; i < ndim; i++)
2086 r2 += sqr(dx[i][line]);
2091 //! Read pullx.xvg or pullf.xvg
2092 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2093 t_UmbrellaWindow * window,
2094 t_UmbrellaOptions *opt,
2095 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2096 t_groupselection *groupsel)
2098 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2099 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2100 real min, max, minfound = 1e20, maxfound = -1e20;
2101 gmx_bool dt_ok, timeok, bHaveForce;
2102 const char *quantity;
2103 const int blocklen = 4096;
2105 static gmx_bool bFirst = TRUE;
2108 * Data columns in pull output:
2109 * - in force output pullf.xvg:
2110 * No reference columns, one column per pull coordinate
2112 * - in position output pullx.xvg
2113 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2114 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2117 nColPerCrd = opt->bPullx ? header->pull_ndim : 1;
2118 quantity = opt->bPullx ? "position" : "force";
2120 if (opt->bPullx && header->bPrintRef)
2122 nColRefEachCrd = header->pull_ndim;
2129 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2130 bHaveForce = opt->bPullf;
2132 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2133 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2134 Sorry for the laziness, this is a To-do. */
2135 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2138 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2139 "reading \n(option -if) is supported at present, "
2140 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2141 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2142 epullg_names[header->pull_geometry]);
2145 nt = read_xvg(fn, &y, &ny);
2147 /* Check consistency */
2150 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2154 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2155 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2157 printf("Expecting these columns in pull file:\n"
2158 "\t%d reference columns for each individual pull coordinate\n"
2159 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2160 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullcrds, nColExpect);
2163 if (ny != nColExpect)
2165 gmx_fatal(FARGS, "Found %d pull coodinates in %s,\n but %d data columns in %s (expected %d)\n"
2166 "\nMaybe you confused options -ix and -if ?\n",
2167 header->npullcrds, fntpr, ny-1, fn, nColExpect-1);
2172 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2182 window->dt = y[0][1]-y[0][0];
2184 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2186 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2189 /* Need to alocate memory and set up structure */
2193 /* Use only groups selected with option -is file */
2194 if (header->npullcrds != groupsel->n)
2196 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2197 header->npullcrds, groupsel->n);
2199 window->nPull = groupsel->nUse;
2203 window->nPull = header->npullcrds;
2206 window->nBin = bins;
2207 snew(window->Histo, window->nPull);
2208 snew(window->z, window->nPull);
2209 snew(window->k, window->nPull);
2210 snew(window->pos, window->nPull);
2211 snew(window->N, window->nPull);
2212 snew(window->Ntot, window->nPull);
2213 snew(window->g, window->nPull);
2214 snew(window->bsWeight, window->nPull);
2215 window->bContrib = 0;
2217 if (opt->bCalcTauInt)
2219 snew(window->ztime, window->nPull);
2223 window->ztime = NULL;
2225 snew(lennow, window->nPull);
2227 for (g = 0; g < window->nPull; ++g)
2230 window->bsWeight[g] = 1.;
2231 snew(window->Histo[g], bins);
2233 window->Ntot[g] = 0;
2235 if (opt->bCalcTauInt)
2237 window->ztime[g] = NULL;
2241 /* Copying umbrella center and force const is more involved since not
2242 all pull groups from header (tpr file) may be used in window variable */
2243 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2245 if (groupsel && (groupsel->bUse[g] == FALSE))
2249 window->k[gUsed] = header->k[g];
2250 window->pos[gUsed] = header->umbInitDist[g];
2255 { /* only determine min and max */
2258 min = max = bins = 0; /* Get rid of warnings */
2262 for (i = 0; i < nt; i++)
2264 /* Do you want that time frame? */
2265 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2267 /* get time step of pdo file and get dstep from opt->dt */
2277 dstep = static_cast<int>(opt->dt/dt+0.5);
2285 window->dt = dt*dstep;
2289 dt_ok = (i%dstep == 0);
2290 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2292 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2293 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2296 /* Note: if groupsel == NULL:
2297 * all groups in pullf/x file are stored in this window, and gUsed == g
2298 * if groupsel != NULL:
2299 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2302 for (g = 0; g < header->npullcrds; ++g)
2304 /* was this group selected for application in WHAM? */
2305 if (groupsel && (groupsel->bUse[g] == FALSE))
2314 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2316 pos = -force/header->k[g] + header->umbInitDist[g];
2320 /* pick the right column from:
2321 * time ref1[ndim] coord1[ndim] ref2[ndim] coord2[ndim] ...
2323 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2324 switch (header->pull_geometry)
2327 pos = dist_ndim(y + column, header->pull_ndim, i);
2333 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2337 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2351 if (gUsed >= window->nPull)
2353 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2354 gUsed, window->nPull);
2357 if (opt->bCalcTauInt && !bGetMinMax)
2359 /* save time series for autocorrelation analysis */
2360 ntot = window->Ntot[gUsed];
2361 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2362 if (ntot >= lennow[gUsed])
2364 lennow[gUsed] += blocklen;
2365 srenew(window->ztime[gUsed], lennow[gUsed]);
2367 window->ztime[gUsed][ntot] = pos;
2370 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2375 while ( (ibin += bins) < 0)
2380 else if (ibin >= bins)
2382 while ( (ibin -= bins) >= bins)
2388 if (ibin >= 0 && ibin < bins)
2390 window->Histo[gUsed][ibin] += 1.;
2393 window->Ntot[gUsed]++;
2397 else if (t > opt->tmax)
2401 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2413 for (i = 0; i < ny; i++)
2419 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2420 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2421 t_UmbrellaHeader* header,
2422 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2425 real mintmp, maxtmp;
2427 printf("Reading %d tpr and pullf files\n", nfiles/2);
2429 /* min and max not given? */
2432 printf("Automatic determination of boundaries...\n");
2435 for (i = 0; i < nfiles; i++)
2437 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2439 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2441 read_tpr_header(fnTprs[i], header, opt);
2442 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2444 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2446 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2447 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2448 if (maxtmp > opt->max)
2452 if (mintmp < opt->min)
2457 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2458 if (opt->bBoundsOnly)
2460 printf("Found option -boundsonly, now exiting.\n");
2464 /* store stepsize in profile */
2465 opt->dz = (opt->max-opt->min)/opt->bins;
2467 for (i = 0; i < nfiles; i++)
2469 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2471 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2473 read_tpr_header(fnTprs[i], header, opt);
2474 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2476 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2478 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2479 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2480 if (window[i].Ntot[0] == 0)
2482 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2486 for (i = 0; i < nfiles; i++)
2495 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2497 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2498 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2500 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2502 int nlines, ny, i, ig;
2505 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2506 nlines = read_xvg(fn, &iact, &ny);
2507 if (nlines != nwins)
2509 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2512 for (i = 0; i < nlines; i++)
2514 if (window[i].nPull != ny)
2516 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2517 "number of pull groups is different in different simulations. That is not\n"
2518 "supported yet. Sorry.\n");
2520 for (ig = 0; ig < window[i].nPull; ig++)
2522 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2523 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2525 if (iact[ig][i] <= 0.0)
2527 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2534 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2537 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2538 * that -in case of limited sampling- the ACT may be severely underestimated.
2539 * Note: the g=1+2tau are overwritten.
2540 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2543 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2546 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2548 /* only evaluate within +- 3sigma of the Gausian */
2549 siglim = 3.0*opt->sigSmoothIact;
2550 siglim2 = dsqr(siglim);
2551 /* pre-factor of Gaussian */
2552 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2553 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2555 for (i = 0; i < nwins; i++)
2557 snew(window[i].tausmooth, window[i].nPull);
2558 for (ig = 0; ig < window[i].nPull; ig++)
2562 pos = window[i].pos[ig];
2563 for (j = 0; j < nwins; j++)
2565 for (jg = 0; jg < window[j].nPull; jg++)
2567 dpos2 = dsqr(window[j].pos[jg]-pos);
2568 if (dpos2 < siglim2)
2570 w = gaufact*exp(-dpos2*invtwosig2);
2572 tausm += w*window[j].tau[jg];
2573 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2574 w,dpos2,pos,gaufact,invtwosig2); */
2579 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2581 window[i].tausmooth[ig] = tausm;
2585 window[i].tausmooth[ig] = window[i].tau[ig];
2587 window[i].g[ig] = 1+2*tausm/window[i].dt;
2592 //! Stop integrating autoccorelation function when ACF drops under this value
2593 #define WHAM_AC_ZERO_LIMIT 0.05
2595 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2597 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2598 t_UmbrellaOptions *opt, const char *fn)
2600 int i, ig, ncorr, ntot, j, k, *count, restart;
2601 real *corr, c0, dt, tmp;
2602 real *ztime, av, tausteps;
2603 FILE *fp, *fpcorr = 0;
2607 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2608 "time [ps]", "autocorrelation function", opt->oenv);
2612 for (i = 0; i < nwins; i++)
2614 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2616 ntot = window[i].Ntot[0];
2618 /* using half the maximum time as length of autocorrelation function */
2622 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2623 " points. Provide more pull data!", ntot);
2626 /* snew(corrSq,ncorr); */
2629 snew(window[i].tau, window[i].nPull);
2630 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2636 for (ig = 0; ig < window[i].nPull; ig++)
2638 if (ntot != window[i].Ntot[ig])
2640 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2641 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2643 ztime = window[i].ztime[ig];
2645 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2646 for (j = 0, av = 0; (j < ntot); j++)
2651 for (k = 0; (k < ncorr); k++)
2656 for (j = 0; (j < ntot); j += restart)
2658 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2660 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2662 /* corrSq[k] += tmp*tmp; */
2666 /* divide by nr of frames for each time displacement */
2667 for (k = 0; (k < ncorr); k++)
2669 /* count probably = (ncorr-k+(restart-1))/restart; */
2670 corr[k] = corr[k]/count[k];
2671 /* variance of autocorrelation function */
2672 /* corrSq[k]=corrSq[k]/count[k]; */
2674 /* normalize such that corr[0] == 0 */
2676 for (k = 0; (k < ncorr); k++)
2679 /* corrSq[k]*=c0*c0; */
2682 /* write ACFs in verbose mode */
2685 for (k = 0; (k < ncorr); k++)
2687 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2689 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2692 /* esimate integrated correlation time, fitting is too unstable */
2693 tausteps = 0.5*corr[0];
2694 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2695 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2697 tausteps += corr[j];
2700 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2701 Kumar et al, eq. 28 ff. */
2702 window[i].tau[ig] = tausteps*dt;
2703 window[i].g[ig] = 1+2*tausteps;
2704 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2712 gmx_ffclose(fpcorr);
2715 /* plot IACT along reaction coordinate */
2716 fp = xvgropen(fn, "Integrated autocorrelation times", "z", "IACT [ps]", opt->oenv);
2717 if (output_env_get_print_xvgr_codes(opt->oenv))
2719 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2720 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2721 for (i = 0; i < nwins; i++)
2723 fprintf(fp, "# %3d ", i);
2724 for (ig = 0; ig < window[i].nPull; ig++)
2726 fprintf(fp, " %11g", window[i].tau[ig]);
2731 for (i = 0; i < nwins; i++)
2733 for (ig = 0; ig < window[i].nPull; ig++)
2735 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2738 if (opt->sigSmoothIact > 0.0)
2740 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2741 opt->sigSmoothIact);
2742 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2743 smoothIact(window, nwins, opt);
2744 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2745 if (output_env_get_print_xvgr_codes(opt->oenv))
2747 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2748 fprintf(fp, "@ s1 symbol color 2\n");
2750 for (i = 0; i < nwins; i++)
2752 for (ig = 0; ig < window[i].nPull; ig++)
2754 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2759 printf("Wrote %s\n", fn);
2763 * compute average and sigma of each umbrella histogram
2765 void averageSigma(t_UmbrellaWindow *window, int nwins)
2768 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2770 for (i = 0; i < nwins; i++)
2772 snew(window[i].aver, window[i].nPull);
2773 snew(window[i].sigma, window[i].nPull);
2775 ntot = window[i].Ntot[0];
2776 for (ig = 0; ig < window[i].nPull; ig++)
2778 ztime = window[i].ztime[ig];
2779 for (k = 0, av = 0.; k < ntot; k++)
2784 for (k = 0, sum2 = 0.; k < ntot; k++)
2789 sig = sqrt(sum2/ntot);
2790 window[i].aver[ig] = av;
2792 /* Note: This estimate for sigma is biased from the limited sampling.
2793 Correct sigma by n/(n-1) where n = number of independent
2794 samples. Only possible if IACT is known.
2798 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2799 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2803 window[i].sigma[ig] = sig;
2805 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2812 * Use histograms to compute average force on pull group.
2814 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2816 int i, j, bins = opt->bins, k;
2817 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2820 dz = (max-min)/bins;
2821 ztot = opt->max-min;
2824 /* Compute average displacement from histograms */
2825 for (j = 0; j < nWindows; ++j)
2827 snew(window[j].forceAv, window[j].nPull);
2828 for (k = 0; k < window[j].nPull; ++k)
2832 for (i = 0; i < opt->bins; ++i)
2834 temp = (1.0*i+0.5)*dz+min;
2835 distance = temp - window[j].pos[k];
2837 { /* in cyclic wham: */
2838 if (distance > ztot_half) /* |distance| < ztot_half */
2842 else if (distance < -ztot_half)
2847 w = window[j].Histo[k][i]/window[j].g[k];
2848 displAv += w*distance;
2850 /* Are we near min or max? We are getting wrong forces from the histgrams since
2851 the histograms are zero outside [min,max). Therefore, assume that the position
2852 on the other side of the histomgram center is equally likely. */
2855 posmirrored = window[j].pos[k]-distance;
2856 if (posmirrored >= max || posmirrored < min)
2858 displAv += -w*distance;
2865 /* average force from average displacement */
2866 window[j].forceAv[k] = displAv*window[j].k[k];
2867 /* sigma from average square displacement */
2868 /* window[j].sigma [k] = sqrt(displAv2); */
2869 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2875 * Check if the complete reaction coordinate is covered by the histograms
2877 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2878 t_UmbrellaOptions *opt)
2880 int i, ig, j, bins = opt->bins, bBoundary;
2881 real avcount = 0, z, relcount, *count;
2882 snew(count, opt->bins);
2884 for (j = 0; j < opt->bins; ++j)
2886 for (i = 0; i < nwins; i++)
2888 for (ig = 0; ig < window[i].nPull; ig++)
2890 count[j] += window[i].Histo[ig][j];
2893 avcount += 1.0*count[j];
2896 for (j = 0; j < bins; ++j)
2898 relcount = count[j]/avcount;
2899 z = (j+0.5)*opt->dz+opt->min;
2900 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2901 /* check for bins with no data */
2904 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2905 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2907 /* and check for poor sampling */
2908 else if (relcount < 0.005 && !bBoundary)
2910 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2916 /*! \brief Compute initial potential by integrating the average force
2918 * This speeds up the convergence by roughly a factor of 2
2920 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2922 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2923 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2926 dz = (opt->max-min)/bins;
2928 printf("Getting initial potential by integration.\n");
2930 /* Compute average displacement from histograms */
2931 computeAverageForce(window, nWindows, opt);
2933 /* Get force for each bin from all histograms in this bin, or, alternatively,
2934 if no histograms are inside this bin, from the closest histogram */
2937 for (j = 0; j < opt->bins; ++j)
2939 pos = (1.0*j+0.5)*dz+min;
2943 groupmin = winmin = 0;
2944 for (i = 0; i < nWindows; i++)
2946 for (ig = 0; ig < window[i].nPull; ig++)
2948 hispos = window[i].pos[ig];
2949 dist = fabs(hispos-pos);
2950 /* average force within bin */
2954 fAv += window[i].forceAv[ig];
2956 /* at the same time, rememer closest histogram */
2965 /* if no histogram found in this bin, use closest histogram */
2972 fAv = window[winmin].forceAv[groupmin];
2976 for (j = 1; j < opt->bins; ++j)
2978 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2981 /* cyclic wham: linearly correct possible offset */
2984 diff = (pot[bins-1]-pot[0])/(bins-1);
2985 for (j = 1; j < opt->bins; ++j)
2992 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", "z", "PMF [kJ/mol]", opt->oenv);
2993 for (j = 0; j < opt->bins; ++j)
2995 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
2998 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3001 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3002 energy offsets which are usually determined by wham
3003 First: turn pot into probabilities:
3005 for (j = 0; j < opt->bins; ++j)
3007 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3009 calc_z(pot, window, nWindows, opt, TRUE);
3015 //! Count number of words in an line
3016 static int wordcount(char *ptr)
3021 if (strlen(ptr) == 0)
3025 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3027 for (i = 0; (ptr[i] != '\0'); i++)
3029 is[cur] = isspace(ptr[i]);
3030 if ((i > 0) && (is[cur] && !is[1-cur]))
3039 /*! \brief Read input file for pull group selection (option -is)
3041 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3043 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3046 int i, iline, n, len = STRLEN, temp;
3047 char *ptr = 0, *tmpbuf = 0;
3048 char fmt[1024], fmtign[1024];
3049 int block = 1, sizenow;
3051 fp = gmx_ffopen(opt->fnGroupsel, "r");
3052 opt->groupsel = NULL;
3057 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3062 if (iline >= sizenow)
3065 srenew(opt->groupsel, sizenow);
3067 opt->groupsel[iline].n = n;
3068 opt->groupsel[iline].nUse = 0;
3069 snew(opt->groupsel[iline].bUse, n);
3072 for (i = 0; i < n; i++)
3074 strcpy(fmt, fmtign);
3076 if (sscanf(ptr, fmt, &temp))
3078 opt->groupsel[iline].bUse[i] = (temp > 0);
3079 if (opt->groupsel[iline].bUse[i])
3081 opt->groupsel[iline].nUse++;
3084 strcat(fmtign, "%*s");
3088 opt->nGroupsel = iline;
3089 if (nTpr != opt->nGroupsel)
3091 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3095 printf("\nUse only these pull groups:\n");
3096 for (iline = 0; iline < nTpr; iline++)
3098 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3099 for (i = 0; i < opt->groupsel[iline].n; i++)
3101 if (opt->groupsel[iline].bUse[i])
3114 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3116 //! Number of elements in fnm (used for command line parsing)
3117 #define NFILE asize(fnm)
3119 //! The main g_wham routine
3120 int gmx_wham(int argc, char *argv[])
3122 const char *desc[] = {
3123 "[THISMODULE] is an analysis program that implements the Weighted",
3124 "Histogram Analysis Method (WHAM). It is intended to analyze",
3125 "output files generated by umbrella sampling simulations to ",
3126 "compute a potential of mean force (PMF). [PAR] ",
3127 "At present, three input modes are supported.[BR]",
3128 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3129 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3130 " AND, with option [TT]-ix[tt], a file which contains file names of",
3131 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3132 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3133 " first pullx, etc.[BR]",
3134 "[TT]*[tt] Same as the previous input mode, except that the the user",
3135 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3136 " From the pull force the position in the umbrella potential is",
3137 " computed. This does not work with tabulated umbrella potentials.[BR]"
3138 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3139 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3140 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3141 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [TT].pdo[tt] file header",
3142 " must be similar to the following:[PAR]",
3143 "[TT]# UMBRELLA 3.0[BR]",
3144 "# Component selection: 0 0 1[BR]",
3146 "# Ref. Group 'TestAtom'[BR]",
3147 "# Nr. of pull groups 2[BR]",
3148 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3149 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3151 "The number of pull groups, umbrella positions, force constants, and names ",
3152 "may (of course) differ. Following the header, a time column and ",
3153 "a data column for each pull group follows (i.e. the displacement",
3154 "with respect to the umbrella center). Up to four pull groups are possible ",
3155 "per [TT].pdo[tt] file at present.[PAR]",
3156 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3157 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3158 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3159 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3160 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3161 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3162 "used, groupsel.dat looks like this:[BR]",
3166 "By default, the output files are[BR]",
3167 " [TT]-o[tt] PMF output file[BR]",
3168 " [TT]-hist[tt] Histograms output file[BR]",
3169 "Always check whether the histograms sufficiently overlap.[PAR]",
3170 "The umbrella potential is assumed to be harmonic and the force constants are ",
3171 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3172 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3173 "WHAM OPTIONS[BR]------------[BR]",
3174 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3175 " [TT]-temp[tt] Temperature in the simulations[BR]",
3176 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3177 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3178 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3179 "The data points that are used to compute the profile",
3180 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3181 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3182 "umbrella window.[PAR]",
3183 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3184 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3185 "With energy output, the energy in the first bin is defined to be zero. ",
3186 "If you want the free energy at a different ",
3187 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3188 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3189 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3190 "[THISMODULE] will make use of the",
3191 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3192 "reaction coordinate will assumed be be neighbors.[PAR]",
3193 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3194 "which may be useful for, e.g. membranes.[PAR]",
3195 "AUTOCORRELATIONS[BR]----------------[BR]",
3196 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3197 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3198 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3199 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3200 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3201 "Because the IACTs can be severely underestimated in case of limited ",
3202 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3203 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3204 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3205 "integration of the ACFs while the ACFs are larger 0.05.",
3206 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3207 "less robust) method such as fitting to a double exponential, you can ",
3208 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3209 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3210 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3211 "ERROR ANALYSIS[BR]--------------[BR]",
3212 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3213 "otherwise the statistical error may be substantially underestimated. ",
3214 "More background and examples for the bootstrap technique can be found in ",
3215 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3216 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3217 "Four bootstrapping methods are supported and ",
3218 "selected with [TT]-bs-method[tt].[BR]",
3219 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3220 "data points, and the bootstrap is carried out by assigning random weights to the ",
3221 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3222 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3223 "statistical error is underestimated.[BR]",
3224 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3225 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3226 "(allowing duplication, i.e. sampling with replacement).",
3227 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3228 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3229 "divided into blocks and only histograms within each block are mixed. Note that ",
3230 "the histograms within each block must be representative for all possible histograms, ",
3231 "otherwise the statistical error is underestimated.[BR]",
3232 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3233 "such that the generated data points are distributed according the given histograms ",
3234 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3235 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3236 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3237 "Note that this method may severely underestimate the error in case of limited sampling, ",
3238 "that is if individual histograms do not represent the complete phase space at ",
3239 "the respective positions.[BR]",
3240 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3241 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3242 "and width of the umbrella histograms. That method yields similar error estimates ",
3243 "like method [TT]traj[tt].[PAR]"
3244 "Bootstrapping output:[BR]",
3245 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3246 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3247 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3248 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3252 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3253 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3254 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3256 static t_UmbrellaOptions opt;
3259 { "-min", FALSE, etREAL, {&opt.min},
3260 "Minimum coordinate in profile"},
3261 { "-max", FALSE, etREAL, {&opt.max},
3262 "Maximum coordinate in profile"},
3263 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3264 "Determine min and max automatically"},
3265 { "-bins", FALSE, etINT, {&opt.bins},
3266 "Number of bins in profile"},
3267 { "-temp", FALSE, etREAL, {&opt.Temperature},
3269 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3271 { "-v", FALSE, etBOOL, {&opt.verbose},
3273 { "-b", FALSE, etREAL, {&opt.tmin},
3274 "First time to analyse (ps)"},
3275 { "-e", FALSE, etREAL, {&opt.tmax},
3276 "Last time to analyse (ps)"},
3277 { "-dt", FALSE, etREAL, {&opt.dt},
3278 "Analyse only every dt ps"},
3279 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3280 "Write histograms and exit"},
3281 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3282 "Determine min and max and exit (with [TT]-auto[tt])"},
3283 { "-log", FALSE, etBOOL, {&opt.bLog},
3284 "Calculate the log of the profile before printing"},
3285 { "-unit", FALSE, etENUM, {en_unit},
3286 "Energy unit in case of log output" },
3287 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3288 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3289 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3290 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3291 { "-sym", FALSE, etBOOL, {&opt.bSym},
3292 "Symmetrize profile around z=0"},
3293 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3294 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3295 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3296 "Calculate integrated autocorrelation times and use in wham"},
3297 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3298 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3299 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3300 "When computing autocorrelation functions, restart computing every .. (ps)"},
3301 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3302 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3303 "during smoothing"},
3304 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3305 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3306 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3307 "Bootstrap method" },
3308 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3309 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3310 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3311 "Seed for bootstrapping. (-1 = use time)"},
3312 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3313 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3314 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3315 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3316 { "-stepout", FALSE, etINT, {&opt.stepchange},
3317 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3318 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3319 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3323 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3324 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3325 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3326 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3327 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3328 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3329 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3330 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3331 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3332 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3333 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3334 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3337 int i, j, l, nfiles, nwins, nfiles2;
3338 t_UmbrellaHeader header;
3339 t_UmbrellaWindow * window = NULL;
3340 double *profile, maxchange = 1e20;
3341 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3342 char **fninTpr, **fninPull, **fninPdo;
3344 FILE *histout, *profout;
3345 char ylabel[256], title[256];
3348 opt.verbose = FALSE;
3349 opt.bHistOnly = FALSE;
3359 /* bootstrapping stuff */
3361 opt.bsMethod = bsMethod_hist;
3362 opt.tauBootStrap = 0.0;
3364 opt.histBootStrapBlockLength = 8;
3365 opt.bs_verbose = FALSE;
3370 opt.Temperature = 298;
3371 opt.Tolerance = 1e-6;
3372 opt.bBoundsOnly = FALSE;
3374 opt.bCalcTauInt = FALSE;
3375 opt.sigSmoothIact = 0.0;
3376 opt.bAllowReduceIact = TRUE;
3377 opt.bInitPotByIntegration = TRUE;
3378 opt.acTrestart = 1.0;
3379 opt.stepchange = 100;
3380 opt.stepUpdateContrib = 100;
3382 if (!parse_common_args(&argc, argv, PCA_BE_NICE,
3383 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3388 opt.unit = nenum(en_unit);
3389 opt.bsMethod = nenum(en_bsMethod);
3391 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3393 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3394 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3395 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3396 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3397 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3398 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3399 if (opt.bTab && opt.bPullf)
3401 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3402 "Provide pullx.xvg or pdo files!");
3405 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3407 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3409 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3411 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3412 "\n\n Check g_wham -h !");
3415 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3416 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3417 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3418 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3419 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3421 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3422 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3423 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3424 if ( (bMinSet || bMaxSet) && bAutoSet)
3426 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3429 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3431 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3434 if (bMinSet && opt.bAuto)
3436 printf("Note: min and max given, switching off -auto.\n");
3440 if (opt.bTauIntGiven && opt.bCalcTauInt)
3442 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3443 "the autocorrelation times. Not both.");
3446 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3448 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3449 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3451 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3453 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3454 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3457 /* Reading gmx4 pull output and tpr files */
3458 if (opt.bTpr || opt.bPullf || opt.bPullx)
3460 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3462 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3463 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3464 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3465 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3466 if (nfiles != nfiles2)
3468 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3469 opt.fnTpr, nfiles2, fnPull);
3472 /* Read file that selects the pull group to be used */
3473 if (opt.fnGroupsel != NULL)
3475 readPullGroupSelection(&opt, fninTpr, nfiles);
3478 window = initUmbrellaWindows(nfiles);
3479 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3482 { /* reading pdo files */
3483 if (opt.fnGroupsel != NULL)
3485 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3486 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3488 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3489 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3490 window = initUmbrellaWindows(nfiles);
3491 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3495 /* enforce equal weight for all histograms? */
3498 enforceEqualWeights(window, nwins);
3501 /* write histograms */
3502 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3503 "z", "count", opt.oenv);
3504 for (l = 0; l < opt.bins; ++l)
3506 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3507 for (i = 0; i < nwins; ++i)
3509 for (j = 0; j < window[i].nPull; ++j)
3511 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3514 fprintf(histout, "\n");
3516 gmx_ffclose(histout);
3517 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3520 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3524 /* Using tabulated umbrella potential */
3527 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3530 /* Integrated autocorrelation times provided ? */
3531 if (opt.bTauIntGiven)
3533 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3536 /* Compute integrated autocorrelation times */
3537 if (opt.bCalcTauInt)
3539 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3542 /* calc average and sigma for each histogram
3543 (maybe required for bootstrapping. If not, this is fast anyhow) */
3544 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3546 averageSigma(window, nwins);
3549 /* Get initial potential by simple integration */
3550 if (opt.bInitPotByIntegration)
3552 guessPotByIntegration(window, nwins, &opt);
3555 /* Check if complete reaction coordinate is covered */
3556 checkReactionCoordinateCovered(window, nwins, &opt);
3558 /* Calculate profile */
3559 snew(profile, opt.bins);
3567 if ( (i%opt.stepUpdateContrib) == 0)
3569 setup_acc_wham(profile, window, nwins, &opt);
3571 if (maxchange < opt.Tolerance)
3574 /* if (opt.verbose) */
3575 printf("Switched to exact iteration in iteration %d\n", i);
3577 calc_profile(profile, window, nwins, &opt, bExact);
3578 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3580 printf("\t%4d) Maximum change %e\n", i, maxchange);
3584 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3585 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3587 /* calc error from Kumar's formula */
3588 /* Unclear how the error propagates along reaction coordinate, therefore
3590 /* calc_error_kumar(profile,window, nwins,&opt); */
3592 /* Write profile in energy units? */
3595 prof_normalization_and_unit(profile, &opt);
3596 strcpy(ylabel, en_unit_label[opt.unit]);
3597 strcpy(title, "Umbrella potential");
3601 strcpy(ylabel, "Density of states");
3602 strcpy(title, "Density of states");
3605 /* symmetrize profile around z=0? */
3608 symmetrizeProfile(profile, &opt);
3611 /* write profile or density of states */
3612 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, "z", ylabel, opt.oenv);
3613 for (i = 0; i < opt.bins; ++i)
3615 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3617 gmx_ffclose(profout);
3618 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3620 /* Bootstrap Method */
3623 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3624 opt2fn("-hist", NFILE, fnm),
3625 ylabel, profile, window, nwins, &opt);
3629 freeUmbrellaWindows(window, nfiles);
3631 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3632 please_cite(stdout, "Hub2010");