2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
51 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/fileio/tpxio.h"
58 #include "gromacs/random/random.h"
65 //! longest file names allowed in input files
66 #define WHAM_MAXFILELEN 2048
69 * enum for energy units
72 enSel, en_kJ, en_kCal, en_kT, enNr
75 * enum for type of input files (pdos, tpr, or pullf)
78 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
81 /*! \brief enum for bootstrapping method
83 * These bootstrap methods are supported:
84 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
85 * (bsMethod_BayesianHist)
86 * - bootstrap complete histograms (bsMethod_hist)
87 * - bootstrap trajectories from given umbrella histograms. This generates new
88 * "synthetic" histograms (bsMethod_traj)
89 * - bootstrap trajectories from Gaussian with mu/sigma computed from
90 * the respective histogram (bsMethod_trajGauss). This gives very similar
91 * results compared to bsMethod_traj.
93 * ********************************************************************
94 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
95 * JS Hub, BL de Groot, D van der Spoel
96 * g_wham - A free weighted histogram analysis implementation including
97 * robust error and autocorrelation estimates,
98 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
99 * ********************************************************************
102 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
103 bsMethod_traj, bsMethod_trajGauss
107 //! Parameters of the umbrella potentials
111 * \name Using umbrella pull code since gromacs 4.x
114 int npullcrds; //!< nr of pull coordinates in tpr file
115 int pull_geometry; //!< such as distance, direction
116 ivec pull_dim; //!< pull dimension with geometry distance
117 int pull_ndim; //!< nr of pull_dim != 0
118 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
119 real *k; //!< force constants in tpr file
120 real *init_dist; //!< reference displacements
121 real *umbInitDist; //!< reference displacement in umbrella direction
124 * \name Using PDO files common until gromacs 3.x
132 char PullName[4][256];
134 double UmbCons[4][3];
138 //! Data in the umbrella histograms
141 int nPull; //!< nr of pull groups in this pdo or pullf/x file
142 double **Histo; //!< nPull histograms
143 double **cum; //!< nPull cumulative distribution functions
144 int nBin; //!< nr of bins. identical to opt->bins
145 double *k; //!< force constants for the nPull groups
146 double *pos; //!< umbrella positions for the nPull groups
147 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
148 int *N; //!< nr of data points in nPull histograms.
149 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
151 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
153 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
154 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
157 double *tau; //!< intetrated autocorrelation time (IACT)
158 double *tausmooth; //!< smoothed IACT
160 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
162 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
164 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
166 /*! \brief average force estimated from average displacement, fAv=dzAv*k
168 * Used for integration to guess the potential.
171 real *aver; //!< average of histograms
172 real *sigma; //!< stddev of histograms
173 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
176 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
179 int n; //!< total nr of pull groups in this tpr file
180 int nUse; //!< nr of pull groups used
181 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
184 //! Parameters of WHAM
191 const char *fnTpr, *fnPullf, *fnGroupsel;
192 const char *fnPdo, *fnPullx; //!< file names of input
193 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
194 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
196 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
197 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
198 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
199 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
202 * \name Basic WHAM options
205 int bins; //!< nr of bins, min, max, and dz of profile
207 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
208 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
211 * \name Output control
214 gmx_bool bLog; //!< energy output (instead of probability) for profile
215 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
216 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
217 /*! \brief after wham, set prof to zero at this z-position.
218 * When bootstrapping, set zProf0 to a "stable" reference position.
221 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
223 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
224 gmx_bool bAuto; //!< determine min and max automatically but do not exit
226 gmx_bool verbose; //!< more noisy wham mode
227 int stepchange; //!< print maximum change in prof after how many interations
228 output_env_t oenv; //!< xvgr options
231 * \name Autocorrelation stuff
234 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
235 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
236 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
237 real acTrestart; //!< when computing ACT, time between restarting points
239 /* \brief Enforce the same weight for each umbella window, that is
240 * calculate with the same number of data points for
241 * each window. That can be reasonable, if the histograms
242 * have different length, but due to autocorrelation,
243 * a longer simulation should not have larger weightin wham.
249 * \name Bootstrapping stuff
252 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
254 /* \brief bootstrap method
256 * if == bsMethod_hist, consider complete histograms as independent
257 * data points and, hence, only mix complete histograms.
258 * if == bsMethod_BayesianHist, consider complete histograms
259 * as independent data points, but assign random weights
260 * to the histograms during the bootstrapping ("Bayesian bootstrap")
261 * In case of long correlations (e.g., inside a channel), these
262 * will yield a more realistic error.
263 * if == bsMethod_traj(Gauss), generate synthetic histograms
265 * histogram by generating an autocorrelated random sequence
266 * that is distributed according to the respective given
267 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
268 * (instead of from the umbrella histogram) to generate a new
273 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
276 /* \brief when mixing histograms, mix only histograms withing blocks
277 long the reaction coordinate xi. Avoids gaps along xi. */
278 int histBootStrapBlockLength;
280 int bsSeed; //!< random seed for bootstrapping
282 /* \brief Write cumulative distribution functions (CDFs) of histograms
283 and write the generated histograms for each bootstrap */
287 * \name tabulated umbrella potential stuff
291 double *tabX, *tabY, tabMin, tabMax, tabDz;
294 gmx_rng_t rng; //!< gromacs random number generator
297 //! Make an umbrella window (may contain several histograms)
298 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
300 t_UmbrellaWindow *win;
303 for (i = 0; i < nwin; i++)
305 win[i].Histo = win[i].cum = 0;
306 win[i].k = win[i].pos = win[i].z = 0;
307 win[i].N = win[i].Ntot = 0;
308 win[i].g = win[i].tau = win[i].tausmooth = 0;
312 win[i].aver = win[i].sigma = 0;
318 //! Delete an umbrella window (may contain several histograms)
319 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
322 for (i = 0; i < nwin; i++)
326 for (j = 0; j < win[i].nPull; j++)
328 sfree(win[i].Histo[j]);
333 for (j = 0; j < win[i].nPull; j++)
335 sfree(win[i].cum[j]);
340 for (j = 0; j < win[i].nPull; j++)
342 sfree(win[i].bContrib[j]);
354 sfree(win[i].tausmooth);
355 sfree(win[i].bContrib);
357 sfree(win[i].forceAv);
360 sfree(win[i].bsWeight);
366 * Read and setup tabulated umbrella potential
368 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
373 printf("Setting up tabulated potential from file %s\n", fn);
374 nl = read_xvg(fn, &y, &ny);
378 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
380 opt->tabMin = y[0][0];
381 opt->tabMax = y[0][nl-1];
382 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
385 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
386 "ascending z-direction", fn);
388 for (i = 0; i < nl-1; i++)
390 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
392 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
397 for (i = 0; i < nl; i++)
399 opt->tabX[i] = y[0][i];
400 opt->tabY[i] = y[1][i];
402 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
403 opt->tabMin, opt->tabMax, opt->tabDz);
406 //! Read the header of an PDO file (position, force const, nr of groups)
407 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
410 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
412 std::istringstream ist;
415 if (fgets(line, 2048, file) == NULL)
417 gmx_fatal(FARGS, "Error reading header from pdo file\n");
420 ist >> Buffer0 >> Buffer1 >> Buffer2;
421 if (strcmp(Buffer1, "UMBRELLA"))
423 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
424 "(Found in first line: `%s')\n",
425 Buffer1, "UMBRELLA", line);
427 if (strcmp(Buffer2, "3.0"))
429 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
433 if (fgets(line, 2048, file) == NULL)
435 gmx_fatal(FARGS, "Error reading header from pdo file\n");
438 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
439 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
441 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
442 if (header->nDim != 1)
444 gmx_fatal(FARGS, "Currently only supports one dimension");
448 if (fgets(line, 2048, file) == NULL)
450 gmx_fatal(FARGS, "Error reading header from pdo file\n");
453 ist >> Buffer0 >> Buffer1 >> header->nSkip;
456 if (fgets(line, 2048, file) == NULL)
458 gmx_fatal(FARGS, "Error reading header from pdo file\n");
461 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
464 if (fgets(line, 2048, file) == NULL)
466 gmx_fatal(FARGS, "Error reading header from pdo file\n");
469 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
473 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
477 for (i = 0; i < header->nPull; ++i)
479 if (fgets(line, 2048, file) == NULL)
481 gmx_fatal(FARGS, "Error reading header from pdo file\n");
484 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
485 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
486 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
490 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
491 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
495 if (fgets(line, 2048, file) == NULL)
497 gmx_fatal(FARGS, "Cannot read from file\n");
501 if (strcmp(Buffer3, "#####") != 0)
503 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
508 static char *fgets3(FILE *fp, char ptr[], int *len)
513 if (fgets(ptr, *len-1, fp) == NULL)
518 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
520 /* This line is longer than len characters, let's increase len! */
524 if (fgets(p-1, STRLEN, fp) == NULL)
530 if (ptr[slen-1] == '\n')
538 /*! \brief Read the data columns of and PDO file.
540 * TO DO: Get rid of the scanf function to avoid the clang warning.
541 * At the moment, this warning is avoided by hiding the format string
542 * the variable fmtlf.
544 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
545 int fileno, t_UmbrellaWindow * win,
546 t_UmbrellaOptions *opt,
547 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
549 int i, inttemp, bins, count, ntot;
550 real min, max, minfound = 1e20, maxfound = -1e20;
551 double temp, time, time0 = 0, dt;
553 t_UmbrellaWindow * window = 0;
554 gmx_bool timeok, dt_ok = 1;
555 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
556 int len = STRLEN, dstep = 1;
557 const int blocklen = 4096;
567 /* Need to alocate memory and set up structure */
568 window->nPull = header->nPull;
571 snew(window->Histo, window->nPull);
572 snew(window->z, window->nPull);
573 snew(window->k, window->nPull);
574 snew(window->pos, window->nPull);
575 snew(window->N, window->nPull);
576 snew(window->Ntot, window->nPull);
577 snew(window->g, window->nPull);
578 snew(window->bsWeight, window->nPull);
580 window->bContrib = 0;
582 if (opt->bCalcTauInt)
584 snew(window->ztime, window->nPull);
590 snew(lennow, window->nPull);
592 for (i = 0; i < window->nPull; ++i)
595 window->bsWeight[i] = 1.;
596 snew(window->Histo[i], bins);
597 window->k[i] = header->UmbCons[i][0];
598 window->pos[i] = header->UmbPos[i][0];
602 if (opt->bCalcTauInt)
604 window->ztime[i] = 0;
608 /* Done with setup */
614 min = max = bins = 0; /* Get rid of warnings */
619 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
623 if (ptr[0] == '#' || strlen(ptr) < 2)
628 /* Initiate format string */
630 strcat(fmtign, "%*s");
632 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
633 /* Round time to fs */
634 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
636 /* get time step of pdo file */
646 dstep = static_cast<int>(opt->dt/dt+0.5);
654 window->dt = dt*dstep;
659 dt_ok = ((count-1)%dstep == 0);
660 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
662 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
663 time,opt->tmin, opt->tmax, dt_ok,timeok); */
667 for (i = 0; i < header->nPull; ++i)
670 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
671 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
672 if (sscanf(ptr, fmt, &temp))
674 temp += header->UmbPos[i][0];
688 if (opt->bCalcTauInt)
690 /* save time series for autocorrelation analysis */
691 ntot = window->Ntot[i];
692 if (ntot >= lennow[i])
694 lennow[i] += blocklen;
695 srenew(window->ztime[i], lennow[i]);
697 window->ztime[i][ntot] = temp;
705 inttemp = static_cast<int> (temp);
712 else if (inttemp >= bins)
718 if (inttemp >= 0 && inttemp < bins)
720 window->Histo[i][inttemp] += 1.;
728 if (time > opt->tmax)
732 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
748 /*! \brief Set identical weights for all histograms
750 * Normally, the weight is given by the number data points in each
751 * histogram, together with the autocorrelation time. This can be overwritten
752 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
753 * an appropriate autocorrelation times instead of using this function.
755 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
757 int i, k, j, NEnforced;
760 NEnforced = window[0].Ntot[0];
761 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
762 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
763 /* enforce all histograms to have the same weight as the very first histogram */
765 for (j = 0; j < nWindows; ++j)
767 for (k = 0; k < window[j].nPull; ++k)
769 ratio = 1.0*NEnforced/window[j].Ntot[k];
770 for (i = 0; i < window[0].nBin; ++i)
772 window[j].Histo[k][i] *= ratio;
774 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
779 /*! \brief Simple linear interpolation between two given tabulated points
781 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
784 double pl, pu, dz, dp;
786 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
788 if (jl < 0 || ju >= opt->tabNbins)
790 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
791 "Provide an extended table.", dist, jl, ju);
795 dz = dist-opt->tabX[jl];
796 dp = (pu-pl)*dz/opt->tabDz;
802 * Check which bins substiantially contribute (accelerates WHAM)
804 * Don't worry, that routine does not mean we compute the PMF in limited precision.
805 * After rapid convergence (using only substiantal contributions), we always switch to
808 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
809 t_UmbrellaOptions *opt)
811 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
812 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
813 gmx_bool bAnyContrib;
814 static int bFirst = 1;
815 static double wham_contrib_lim;
819 for (i = 0; i < nWindows; ++i)
821 nGrptot += window[i].nPull;
823 wham_contrib_lim = opt->Tolerance/nGrptot;
826 ztot = opt->max-opt->min;
829 for (i = 0; i < nWindows; ++i)
831 if (!window[i].bContrib)
833 snew(window[i].bContrib, window[i].nPull);
835 for (j = 0; j < window[i].nPull; ++j)
837 if (!window[i].bContrib[j])
839 snew(window[i].bContrib[j], opt->bins);
842 for (k = 0; k < opt->bins; ++k)
844 temp = (1.0*k+0.5)*dz+min;
845 distance = temp - window[i].pos[j]; /* distance to umbrella center */
847 { /* in cyclic wham: */
848 if (distance > ztot_half) /* |distance| < ztot_half */
852 else if (distance < -ztot_half)
857 /* Note: there are two contributions to bin k in the wham equations:
858 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
859 ii) exp(- U/(8.314e-3*opt->Temperature))
860 where U is the umbrella potential
861 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
866 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
870 U = tabulated_pot(distance, opt); /* Use tabulated potential */
873 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
874 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
875 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
876 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
877 if (window[i].bContrib[j][k])
883 /* If this histo is far outside min and max all bContrib may be FALSE,
884 causing a floating point exception later on. To avoid that, switch
888 for (k = 0; k < opt->bins; ++k)
890 window[i].bContrib[j][k] = TRUE;
897 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
898 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
903 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
909 //! Compute the PMF (one of the two main WHAM routines)
910 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
911 t_UmbrellaOptions *opt, gmx_bool bExact)
914 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
915 double denom, U = 0, temp = 0, invg;
917 ztot = opt->max-opt->min;
920 for (i = 0; i < opt->bins; ++i)
923 for (j = 0; j < nWindows; ++j)
925 for (k = 0; k < window[j].nPull; ++k)
927 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
928 temp = (1.0*i+0.5)*dz+min;
929 num += invg*window[j].Histo[k][i];
931 if (!(bExact || window[j].bContrib[k][i]))
935 distance = temp - window[j].pos[k]; /* distance to umbrella center */
937 { /* in cyclic wham: */
938 if (distance > ztot_half) /* |distance| < ztot_half */
942 else if (distance < -ztot_half)
950 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
954 U = tabulated_pot(distance, opt); /* Use tabulated potential */
956 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
959 profile[i] = num/denom;
963 //! Compute the free energy offsets z (one of the two main WHAM routines)
964 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
965 t_UmbrellaOptions *opt, gmx_bool bExact)
968 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
969 double MAX = -1e20, total = 0;
971 ztot = opt->max-opt->min;
974 for (i = 0; i < nWindows; ++i)
976 for (j = 0; j < window[i].nPull; ++j)
979 for (k = 0; k < window[i].nBin; ++k)
981 if (!(bExact || window[i].bContrib[j][k]))
985 temp = (1.0*k+0.5)*dz+min;
986 distance = temp - window[i].pos[j]; /* distance to umbrella center */
988 { /* in cyclic wham: */
989 if (distance > ztot_half) /* |distance| < ztot_half */
993 else if (distance < -ztot_half)
1001 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1005 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1008 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1010 /* Avoid floating point exception if window is far outside min and max */
1013 total = -log(total);
1019 temp = fabs(total - window[i].z[j]);
1024 window[i].z[j] = total;
1030 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1031 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1033 int i, j, bins = opt->bins;
1034 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1037 if (min > 0. || max < 0.)
1039 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1040 opt->min, opt->max);
1045 for (i = 0; i < bins; i++)
1049 /* bin left of zsym */
1050 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1051 if (j >= 0 && (j+1) < bins)
1053 /* interpolate profile linearly between bins j and j+1 */
1054 z1 = min+(j+0.5)*dz;
1056 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1057 /* average between left and right */
1058 prof2[i] = 0.5*(profsym+profile[i]);
1062 prof2[i] = profile[i];
1066 memcpy(profile, prof2, bins*sizeof(double));
1070 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1071 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1074 double unit_factor = 1., R_MolarGasConst, diff;
1076 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1079 /* Not log? Nothing to do! */
1085 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1086 if (opt->unit == en_kT)
1090 else if (opt->unit == en_kJ)
1092 unit_factor = R_MolarGasConst*opt->Temperature;
1094 else if (opt->unit == en_kCal)
1096 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1100 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1103 for (i = 0; i < bins; i++)
1105 if (profile[i] > 0.0)
1107 profile[i] = -log(profile[i])*unit_factor;
1111 /* shift to zero at z=opt->zProf0 */
1112 if (!opt->bProf0Set)
1118 /* Get bin with shortest distance to opt->zProf0
1119 (-0.5 from bin position and +0.5 from rounding cancel) */
1120 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1125 else if (imin >= bins)
1129 diff = profile[imin];
1133 for (i = 0; i < bins; i++)
1139 //! Make an array of random integers (used for bootstrapping)
1140 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1142 int ipull, blockBase, nr, ipullRandom;
1144 if (blockLength == 0)
1146 blockLength = nPull;
1149 for (ipull = 0; ipull < nPull; ipull++)
1151 blockBase = (ipull/blockLength)*blockLength;
1153 { /* make sure nothing bad happens in the last block */
1154 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1155 ipullRandom = blockBase + nr;
1157 while (ipullRandom >= nPull);
1158 if (ipullRandom < 0 || ipullRandom >= nPull)
1160 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1161 "blockLength = %d, blockBase = %d\n",
1162 ipullRandom, nPull, nr, blockLength, blockBase);
1164 randomArray[ipull] = ipullRandom;
1166 /*for (ipull=0; ipull<nPull; ipull++)
1167 printf("%d ",randomArray[ipull]); printf("\n"); */
1170 /*! \brief Set pull group information of a synthetic histogram
1172 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1173 * but it is not required if we bootstrap complete histograms.
1175 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1176 t_UmbrellaWindow *thisWindow, int pullid)
1178 synthWindow->N [0] = thisWindow->N [pullid];
1179 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1180 synthWindow->pos [0] = thisWindow->pos [pullid];
1181 synthWindow->z [0] = thisWindow->z [pullid];
1182 synthWindow->k [0] = thisWindow->k [pullid];
1183 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1184 synthWindow->g [0] = thisWindow->g [pullid];
1185 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1188 /*! \brief Calculate cumulative distribution function of of all histograms.
1190 * This allow to create random number sequences
1191 * which are distributed according to the histograms. Required to generate
1192 * the "synthetic" histograms for the Bootstrap method
1194 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1195 t_UmbrellaOptions *opt, const char *fnhist)
1199 char *fn = 0, *buf = 0;
1202 if (opt->bs_verbose)
1204 snew(fn, strlen(fnhist)+10);
1205 snew(buf, strlen(fnhist)+10);
1206 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1207 fp = xvgropen(fn, "CDFs of umbrella windows", "z", "CDF", opt->oenv);
1211 for (i = 0; i < nWindows; i++)
1213 snew(window[i].cum, window[i].nPull);
1214 for (j = 0; j < window[i].nPull; j++)
1216 snew(window[i].cum[j], nbin+1);
1217 window[i].cum[j][0] = 0.;
1218 for (k = 1; k <= nbin; k++)
1220 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1223 /* normalize CDFs. Ensure cum[nbin]==1 */
1224 last = window[i].cum[j][nbin];
1225 for (k = 0; k <= nbin; k++)
1227 window[i].cum[j][k] /= last;
1232 printf("Cumulative distriubtion functions of all histograms created.\n");
1233 if (opt->bs_verbose)
1235 for (k = 0; k <= nbin; k++)
1237 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1238 for (i = 0; i < nWindows; i++)
1240 for (j = 0; j < window[i].nPull; j++)
1242 fprintf(fp, "%g\t", window[i].cum[j][k]);
1247 printf("Wrote cumulative distribution functions to %s\n", fn);
1255 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1257 * This is used to generate a random sequence distributed according to a histogram
1259 void searchCumulative(double xx[], int n, double x, int *j)
1281 else if (x == xx[n-1])
1291 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1292 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1293 int pullid, t_UmbrellaOptions *opt)
1295 int N, i, nbins, r_index, ibin;
1296 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1299 N = thisWindow->N[pullid];
1300 dt = thisWindow->dt;
1301 nbins = thisWindow->nBin;
1303 /* tau = autocorrelation time */
1304 if (opt->tauBootStrap > 0.0)
1306 tausteps = opt->tauBootStrap/dt;
1308 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1310 /* calc tausteps from g=1+2tausteps */
1311 g = thisWindow->g[pullid];
1317 "When generating hypothetical trajctories from given umbrella histograms,\n"
1318 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1319 "cannot be predicted. You have 3 options:\n"
1320 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1321 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1323 " with option -iiact for all umbrella windows.\n"
1324 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1325 " Use option (3) only if you are sure what you're doing, you may severely\n"
1326 " underestimate the error if a too small ACT is given.\n");
1327 gmx_fatal(FARGS, errstr);
1330 synthWindow->N [0] = N;
1331 synthWindow->pos [0] = thisWindow->pos[pullid];
1332 synthWindow->z [0] = thisWindow->z[pullid];
1333 synthWindow->k [0] = thisWindow->k[pullid];
1334 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1335 synthWindow->g [0] = thisWindow->g [pullid];
1336 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1338 for (i = 0; i < nbins; i++)
1340 synthWindow->Histo[0][i] = 0.;
1343 if (opt->bsMethod == bsMethod_trajGauss)
1345 sig = thisWindow->sigma [pullid];
1346 mu = thisWindow->aver [pullid];
1349 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1351 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1353 z = a*x + sqrt(1-a^2)*y
1354 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1355 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1357 C(t) = exp(-t/tau) with tau=-1/ln(a)
1359 Then, use error function to turn the Gaussian random variable into a uniformly
1360 distributed one in [0,1]. Eventually, use cumulative distribution function of
1361 histogram to get random variables distributed according to histogram.
1362 Note: The ACT of the flat distribution and of the generated histogram is not
1363 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1365 a = exp(-1.0/tausteps);
1367 invsqrt2 = 1./sqrt(2.0);
1369 /* init random sequence */
1370 x = gmx_rng_gaussian_table(opt->rng);
1372 if (opt->bsMethod == bsMethod_traj)
1374 /* bootstrap points from the umbrella histograms */
1375 for (i = 0; i < N; i++)
1377 y = gmx_rng_gaussian_table(opt->rng);
1379 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1380 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1382 r = 0.5*(1+gmx_erf(x*invsqrt2));
1383 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1384 synthWindow->Histo[0][r_index] += 1.;
1387 else if (opt->bsMethod == bsMethod_trajGauss)
1389 /* bootstrap points from a Gaussian with the same average and sigma
1390 as the respective umbrella histogram. The idea was, that -given
1391 limited sampling- the bootstrapped histograms are otherwise biased
1392 from the limited sampling of the US histos. However, bootstrapping from
1393 the Gaussian seems to yield a similar estimate. */
1397 y = gmx_rng_gaussian_table(opt->rng);
1400 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1405 while ( (ibin += nbins) < 0)
1410 else if (ibin >= nbins)
1412 while ( (ibin -= nbins) >= nbins)
1419 if (ibin >= 0 && ibin < nbins)
1421 synthWindow->Histo[0][ibin] += 1.;
1428 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1432 /*! \brief Write all histograms to a file
1434 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1435 * sets of bootstrapped histograms.
1437 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1438 int bs_index, t_UmbrellaOptions *opt)
1440 char *fn = 0, *buf = 0, title[256];
1446 snew(fn, strlen(fnhist)+10);
1447 snew(buf, strlen(fnhist)+1);
1448 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1449 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1453 fn = strdup(fnhist);
1454 strcpy(title, "Umbrella histograms");
1457 fp = xvgropen(fn, title, "z", "count", opt->oenv);
1460 /* Write histograms */
1461 for (l = 0; l < bins; ++l)
1463 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1464 for (i = 0; i < nWindows; ++i)
1466 for (j = 0; j < window[i].nPull; ++j)
1468 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1475 printf("Wrote %s\n", fn);
1483 //! Used for qsort to sort random numbers
1484 int func_wham_is_larger(const void *a, const void *b)
1503 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1504 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1511 /* generate ordered random numbers between 0 and nAllPull */
1512 for (i = 0; i < nAllPull-1; i++)
1514 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1516 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1517 r[nAllPull-1] = 1.0*nAllPull;
1519 synthwin[0].bsWeight[0] = r[0];
1520 for (i = 1; i < nAllPull; i++)
1522 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1525 /* avoid to have zero weight by adding a tiny value */
1526 for (i = 0; i < nAllPull; i++)
1528 if (synthwin[i].bsWeight[0] < 1e-5)
1530 synthwin[i].bsWeight[0] = 1e-5;
1537 //! The main bootstrapping routine
1538 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1539 char* ylabel, double *profile,
1540 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1542 t_UmbrellaWindow * synthWindow;
1543 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1544 int i, j, *randomArray = 0, winid, pullid, ib;
1545 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1547 gmx_bool bExact = FALSE;
1549 /* init random generator */
1550 if (opt->bsSeed == -1)
1552 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1556 opt->rng = gmx_rng_init(opt->bsSeed);
1559 snew(bsProfile, opt->bins);
1560 snew(bsProfiles_av, opt->bins);
1561 snew(bsProfiles_av2, opt->bins);
1563 /* Create array of all pull groups. Note that different windows
1564 may have different nr of pull groups
1565 First: Get total nr of pull groups */
1567 for (i = 0; i < nWindows; i++)
1569 nAllPull += window[i].nPull;
1571 snew(allPull_winId, nAllPull);
1572 snew(allPull_pullId, nAllPull);
1574 /* Setup one array of all pull groups */
1575 for (i = 0; i < nWindows; i++)
1577 for (j = 0; j < window[i].nPull; j++)
1579 allPull_winId[iAllPull] = i;
1580 allPull_pullId[iAllPull] = j;
1585 /* setup stuff for synthetic windows */
1586 snew(synthWindow, nAllPull);
1587 for (i = 0; i < nAllPull; i++)
1589 synthWindow[i].nPull = 1;
1590 synthWindow[i].nBin = opt->bins;
1591 snew(synthWindow[i].Histo, 1);
1592 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1594 snew(synthWindow[i].Histo[0], opt->bins);
1596 snew(synthWindow[i].N, 1);
1597 snew(synthWindow[i].pos, 1);
1598 snew(synthWindow[i].z, 1);
1599 snew(synthWindow[i].k, 1);
1600 snew(synthWindow[i].bContrib, 1);
1601 snew(synthWindow[i].g, 1);
1602 snew(synthWindow[i].bsWeight, 1);
1605 switch (opt->bsMethod)
1608 snew(randomArray, nAllPull);
1609 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1610 please_cite(stdout, "Hub2006");
1612 case bsMethod_BayesianHist:
1613 /* just copy all histogams into synthWindow array */
1614 for (i = 0; i < nAllPull; i++)
1616 winid = allPull_winId [i];
1617 pullid = allPull_pullId[i];
1618 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1622 case bsMethod_trajGauss:
1623 calc_cumulatives(window, nWindows, opt, fnhist);
1626 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1629 /* do bootstrapping */
1630 fp = xvgropen(fnprof, "Boot strap profiles", "z", ylabel, opt->oenv);
1631 for (ib = 0; ib < opt->nBootStrap; ib++)
1633 printf(" *******************************************\n"
1634 " ******** Start bootstrap nr %d ************\n"
1635 " *******************************************\n", ib+1);
1637 switch (opt->bsMethod)
1640 /* bootstrap complete histograms from given histograms */
1641 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1642 for (i = 0; i < nAllPull; i++)
1644 winid = allPull_winId [randomArray[i]];
1645 pullid = allPull_pullId[randomArray[i]];
1646 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1649 case bsMethod_BayesianHist:
1650 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1651 setRandomBsWeights(synthWindow, nAllPull, opt);
1654 case bsMethod_trajGauss:
1655 /* create new histos from given histos, that is generate new hypothetical
1657 for (i = 0; i < nAllPull; i++)
1659 winid = allPull_winId[i];
1660 pullid = allPull_pullId[i];
1661 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1666 /* write histos in case of verbose output */
1667 if (opt->bs_verbose)
1669 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1676 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1679 if ( (i%opt->stepUpdateContrib) == 0)
1681 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1683 if (maxchange < opt->Tolerance)
1687 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1689 printf("\t%4d) Maximum change %e\n", i, maxchange);
1691 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1694 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1695 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1699 prof_normalization_and_unit(bsProfile, opt);
1702 /* symmetrize profile around z=0 */
1705 symmetrizeProfile(bsProfile, opt);
1708 /* save stuff to get average and stddev */
1709 for (i = 0; i < opt->bins; i++)
1712 bsProfiles_av[i] += tmp;
1713 bsProfiles_av2[i] += tmp*tmp;
1714 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1720 /* write average and stddev */
1721 fp = xvgropen(fnres, "Average and stddev from bootstrapping", "z", ylabel, opt->oenv);
1722 fprintf(fp, "@TYPE xydy\n");
1723 for (i = 0; i < opt->bins; i++)
1725 bsProfiles_av [i] /= opt->nBootStrap;
1726 bsProfiles_av2[i] /= opt->nBootStrap;
1727 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1728 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1729 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1732 printf("Wrote boot strap result to %s\n", fnres);
1735 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1736 int whaminFileType(char *fn)
1740 if (strcmp(fn+len-3, "tpr") == 0)
1744 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1746 return whamin_pullxf;
1748 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1754 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1756 return whamin_unknown;
1759 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1760 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1761 t_UmbrellaOptions *opt)
1763 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1764 int nread, sizenow, i, block = 1;
1767 fp = gmx_ffopen(fn, "r");
1770 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1772 if (strlen(tmp) >= WHAM_MAXFILELEN)
1774 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1776 if (nread >= sizenow)
1779 srenew(filename, sizenow);
1780 for (i = sizenow-block; i < sizenow; i++)
1782 snew(filename[i], WHAM_MAXFILELEN);
1785 /* remove newline if there is one */
1786 if (tmp[strlen(tmp)-1] == '\n')
1788 tmp[strlen(tmp)-1] = '\0';
1790 strcpy(filename[nread], tmp);
1793 printf("Found file %s in %s\n", filename[nread], fn);
1797 *filenamesRet = filename;
1801 //! Open a file or a pipe to a gzipped file
1802 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1804 char Buffer[1024], gunzip[1024], *Path = 0;
1806 static gmx_bool bFirst = 1;
1808 /* gzipped pdo file? */
1809 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1811 /* search gunzip executable */
1812 if (!(Path = getenv("GMX_PATH_GZIP")))
1814 if (gmx_fexist("/bin/gunzip"))
1816 sprintf(gunzip, "%s", "/bin/gunzip");
1818 else if (gmx_fexist("/usr/bin/gunzip"))
1820 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1824 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1825 "You may want to define the path to gunzip "
1826 "with the environment variable GMX_PATH_GZIP.", gunzip);
1831 sprintf(gunzip, "%s/gunzip", Path);
1832 if (!gmx_fexist(gunzip))
1834 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1835 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1840 printf("Using gunzig executable %s\n", gunzip);
1843 if (!gmx_fexist(fn))
1845 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1847 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1850 printf("Executing command '%s'\n", Buffer);
1853 if ((pipe = popen(Buffer, "r")) == NULL)
1855 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1858 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1864 pipe = gmx_ffopen(fn, "r");
1871 //! Close file or pipe
1872 void pdo_close_file(FILE *fp)
1881 //! Reading all pdo files
1882 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1883 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1886 real mintmp, maxtmp, done = 0.;
1889 /* char Buffer0[1000]; */
1893 gmx_fatal(FARGS, "No files found. Hick.");
1896 /* if min and max are not given, get min and max from the input files */
1899 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1902 for (i = 0; i < nfiles; ++i)
1904 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1905 /*fgets(Buffer0,999,file);
1906 fprintf(stderr,"First line '%s'\n",Buffer0); */
1907 done = 100.0*(i+1)/nfiles;
1908 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1913 read_pdo_header(file, header, opt);
1914 /* here only determine min and max of this window */
1915 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1916 if (maxtmp > opt->max)
1920 if (mintmp < opt->min)
1926 pdo_close_file(file);
1934 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1935 if (opt->bBoundsOnly)
1937 printf("Found option -boundsonly, now exiting.\n");
1941 /* store stepsize in profile */
1942 opt->dz = (opt->max-opt->min)/opt->bins;
1944 /* Having min and max, we read in all files */
1945 /* Loop over all files */
1946 for (i = 0; i < nfiles; ++i)
1948 done = 100.0*(i+1)/nfiles;
1949 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1954 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1955 read_pdo_header(file, header, opt);
1956 /* load data into window */
1957 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1958 if ((window+i)->Ntot[0] == 0)
1960 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1964 pdo_close_file(file);
1972 for (i = 0; i < nfiles; ++i)
1979 //! translate 0/1 to N/Y to write pull dimensions
1980 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1982 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1983 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
1988 static int first = 1;
1990 /* printf("Reading %s \n",fn); */
1991 read_tpx_state(fn, &ir, &state, NULL, NULL);
1993 if (ir.ePull != epullUMBRELLA)
1995 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1996 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
1999 /* nr of pull groups */
2000 ncrd = ir.pull->ncoord;
2003 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull coordinates\n", ncrd);
2006 header->npullcrds = ir.pull->ncoord;
2007 header->pull_geometry = ir.pull->eGeom;
2008 header->bPrintRef = ir.pull->bPrintRef;
2009 copy_ivec(ir.pull->dim, header->pull_dim);
2010 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2011 snew(header->k, ncrd);
2012 snew(header->init_dist, ncrd);
2013 snew(header->umbInitDist, ncrd);
2015 /* only z-direction with epullgCYL? */
2016 if (header->pull_geometry == epullgCYL)
2018 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2020 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2021 "However, found dimensions [%s %s %s]\n",
2022 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2023 int2YN(header->pull_dim[ZZ]));
2027 for (i = 0; i < ncrd; i++)
2029 header->k[i] = ir.pull->coord[i].k;
2030 if (header->k[i] == 0.0)
2032 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2033 "That doesn't seem to be an Umbrella tpr.\n",
2036 header->init_dist[i] = ir.pull->coord[i].init;
2038 /* initial distance to reference */
2039 switch (header->pull_geometry)
2042 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2046 header->umbInitDist[i] = header->init_dist[i];
2049 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2053 if (opt->verbose || first)
2055 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2056 "\tPull group coordinates%s expected in pullx files.\n",
2057 fn, header->npullcrds, epullg_names[header->pull_geometry],
2058 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2059 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2060 for (i = 0; i < ncrd; i++)
2062 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2065 if (!opt->verbose && first)
2067 printf("\tUse option -v to see this output for all input tpr files\n\n");
2073 //! 2-norm in a ndim-dimensional space
2074 double dist_ndim(double **dx, int ndim, int line)
2078 for (i = 0; i < ndim; i++)
2080 r2 += sqr(dx[i][line]);
2085 //! Read pullx.xvg or pullf.xvg
2086 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2087 t_UmbrellaWindow * window,
2088 t_UmbrellaOptions *opt,
2089 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2090 t_groupselection *groupsel)
2092 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2093 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2094 real min, max, minfound = 1e20, maxfound = -1e20;
2095 gmx_bool dt_ok, timeok, bHaveForce;
2096 const char *quantity;
2097 const int blocklen = 4096;
2099 static gmx_bool bFirst = TRUE;
2102 * Data columns in pull output:
2103 * - in force output pullf.xvg:
2104 * No reference columns, one column per pull coordinate
2106 * - in position output pullx.xvg
2107 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2108 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2111 nColPerCrd = opt->bPullx ? header->pull_ndim : 1;
2112 quantity = opt->bPullx ? "position" : "force";
2114 if (opt->bPullx && header->bPrintRef)
2116 nColRefEachCrd = header->pull_ndim;
2123 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2124 bHaveForce = opt->bPullf;
2126 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2127 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2128 Sorry for the laziness, this is a To-do. */
2129 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2132 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2133 "reading \n(option -if) is supported at present, "
2134 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2135 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2136 epullg_names[header->pull_geometry]);
2139 nt = read_xvg(fn, &y, &ny);
2141 /* Check consistency */
2144 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2148 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2149 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2151 printf("Expecting these columns in pull file:\n"
2152 "\t%d reference columns for each individual pull coordinate\n"
2153 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2154 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullcrds, nColExpect);
2157 if (ny != nColExpect)
2159 gmx_fatal(FARGS, "Found %d pull coodinates in %s,\n but %d data columns in %s (expected %d)\n"
2160 "\nMaybe you confused options -ix and -if ?\n",
2161 header->npullcrds, fntpr, ny-1, fn, nColExpect-1);
2166 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2176 window->dt = y[0][1]-y[0][0];
2178 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2180 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2183 /* Need to alocate memory and set up structure */
2187 /* Use only groups selected with option -is file */
2188 if (header->npullcrds != groupsel->n)
2190 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2191 header->npullcrds, groupsel->n);
2193 window->nPull = groupsel->nUse;
2197 window->nPull = header->npullcrds;
2200 window->nBin = bins;
2201 snew(window->Histo, window->nPull);
2202 snew(window->z, window->nPull);
2203 snew(window->k, window->nPull);
2204 snew(window->pos, window->nPull);
2205 snew(window->N, window->nPull);
2206 snew(window->Ntot, window->nPull);
2207 snew(window->g, window->nPull);
2208 snew(window->bsWeight, window->nPull);
2209 window->bContrib = 0;
2211 if (opt->bCalcTauInt)
2213 snew(window->ztime, window->nPull);
2217 window->ztime = NULL;
2219 snew(lennow, window->nPull);
2221 for (g = 0; g < window->nPull; ++g)
2224 window->bsWeight[g] = 1.;
2225 snew(window->Histo[g], bins);
2227 window->Ntot[g] = 0;
2229 if (opt->bCalcTauInt)
2231 window->ztime[g] = NULL;
2235 /* Copying umbrella center and force const is more involved since not
2236 all pull groups from header (tpr file) may be used in window variable */
2237 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2239 if (groupsel && (groupsel->bUse[g] == FALSE))
2243 window->k[gUsed] = header->k[g];
2244 window->pos[gUsed] = header->umbInitDist[g];
2249 { /* only determine min and max */
2252 min = max = bins = 0; /* Get rid of warnings */
2256 for (i = 0; i < nt; i++)
2258 /* Do you want that time frame? */
2259 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2261 /* get time step of pdo file and get dstep from opt->dt */
2271 dstep = static_cast<int>(opt->dt/dt+0.5);
2279 window->dt = dt*dstep;
2283 dt_ok = (i%dstep == 0);
2284 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2286 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2287 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2290 /* Note: if groupsel == NULL:
2291 * all groups in pullf/x file are stored in this window, and gUsed == g
2292 * if groupsel != NULL:
2293 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2296 for (g = 0; g < header->npullcrds; ++g)
2298 /* was this group selected for application in WHAM? */
2299 if (groupsel && (groupsel->bUse[g] == FALSE))
2308 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2310 pos = -force/header->k[g] + header->umbInitDist[g];
2314 /* pick the right column from:
2315 * time ref1[ndim] coord1[ndim] ref2[ndim] coord2[ndim] ...
2317 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2318 switch (header->pull_geometry)
2321 pos = dist_ndim(y + column, header->pull_ndim, i);
2327 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2331 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2345 if (gUsed >= window->nPull)
2347 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2348 gUsed, window->nPull);
2351 if (opt->bCalcTauInt && !bGetMinMax)
2353 /* save time series for autocorrelation analysis */
2354 ntot = window->Ntot[gUsed];
2355 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2356 if (ntot >= lennow[gUsed])
2358 lennow[gUsed] += blocklen;
2359 srenew(window->ztime[gUsed], lennow[gUsed]);
2361 window->ztime[gUsed][ntot] = pos;
2364 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2369 while ( (ibin += bins) < 0)
2374 else if (ibin >= bins)
2376 while ( (ibin -= bins) >= bins)
2382 if (ibin >= 0 && ibin < bins)
2384 window->Histo[gUsed][ibin] += 1.;
2387 window->Ntot[gUsed]++;
2391 else if (t > opt->tmax)
2395 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2407 for (i = 0; i < ny; i++)
2413 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2414 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2415 t_UmbrellaHeader* header,
2416 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2419 real mintmp, maxtmp;
2421 printf("Reading %d tpr and pullf files\n", nfiles/2);
2423 /* min and max not given? */
2426 printf("Automatic determination of boundaries...\n");
2429 for (i = 0; i < nfiles; i++)
2431 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2433 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2435 read_tpr_header(fnTprs[i], header, opt);
2436 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2438 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2440 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2441 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2442 if (maxtmp > opt->max)
2446 if (mintmp < opt->min)
2451 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2452 if (opt->bBoundsOnly)
2454 printf("Found option -boundsonly, now exiting.\n");
2458 /* store stepsize in profile */
2459 opt->dz = (opt->max-opt->min)/opt->bins;
2461 for (i = 0; i < nfiles; i++)
2463 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2465 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2467 read_tpr_header(fnTprs[i], header, opt);
2468 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2470 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2472 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2473 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2474 if (window[i].Ntot[0] == 0)
2476 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2480 for (i = 0; i < nfiles; i++)
2489 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2491 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2492 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2494 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2496 int nlines, ny, i, ig;
2499 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2500 nlines = read_xvg(fn, &iact, &ny);
2501 if (nlines != nwins)
2503 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2506 for (i = 0; i < nlines; i++)
2508 if (window[i].nPull != ny)
2510 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2511 "number of pull groups is different in different simulations. That is not\n"
2512 "supported yet. Sorry.\n");
2514 for (ig = 0; ig < window[i].nPull; ig++)
2516 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2517 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2519 if (iact[ig][i] <= 0.0)
2521 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2528 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2531 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2532 * that -in case of limited sampling- the ACT may be severely underestimated.
2533 * Note: the g=1+2tau are overwritten.
2534 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2537 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2540 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2542 /* only evaluate within +- 3sigma of the Gausian */
2543 siglim = 3.0*opt->sigSmoothIact;
2544 siglim2 = dsqr(siglim);
2545 /* pre-factor of Gaussian */
2546 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2547 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2549 for (i = 0; i < nwins; i++)
2551 snew(window[i].tausmooth, window[i].nPull);
2552 for (ig = 0; ig < window[i].nPull; ig++)
2556 pos = window[i].pos[ig];
2557 for (j = 0; j < nwins; j++)
2559 for (jg = 0; jg < window[j].nPull; jg++)
2561 dpos2 = dsqr(window[j].pos[jg]-pos);
2562 if (dpos2 < siglim2)
2564 w = gaufact*exp(-dpos2*invtwosig2);
2566 tausm += w*window[j].tau[jg];
2567 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2568 w,dpos2,pos,gaufact,invtwosig2); */
2573 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2575 window[i].tausmooth[ig] = tausm;
2579 window[i].tausmooth[ig] = window[i].tau[ig];
2581 window[i].g[ig] = 1+2*tausm/window[i].dt;
2586 //! Stop integrating autoccorelation function when ACF drops under this value
2587 #define WHAM_AC_ZERO_LIMIT 0.05
2589 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2591 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2592 t_UmbrellaOptions *opt, const char *fn)
2594 int i, ig, ncorr, ntot, j, k, *count, restart;
2595 real *corr, c0, dt, tmp;
2596 real *ztime, av, tausteps;
2597 FILE *fp, *fpcorr = 0;
2601 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2602 "time [ps]", "autocorrelation function", opt->oenv);
2606 for (i = 0; i < nwins; i++)
2608 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2610 ntot = window[i].Ntot[0];
2612 /* using half the maximum time as length of autocorrelation function */
2616 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2617 " points. Provide more pull data!", ntot);
2620 /* snew(corrSq,ncorr); */
2623 snew(window[i].tau, window[i].nPull);
2624 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2630 for (ig = 0; ig < window[i].nPull; ig++)
2632 if (ntot != window[i].Ntot[ig])
2634 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2635 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2637 ztime = window[i].ztime[ig];
2639 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2640 for (j = 0, av = 0; (j < ntot); j++)
2645 for (k = 0; (k < ncorr); k++)
2650 for (j = 0; (j < ntot); j += restart)
2652 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2654 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2656 /* corrSq[k] += tmp*tmp; */
2660 /* divide by nr of frames for each time displacement */
2661 for (k = 0; (k < ncorr); k++)
2663 /* count probably = (ncorr-k+(restart-1))/restart; */
2664 corr[k] = corr[k]/count[k];
2665 /* variance of autocorrelation function */
2666 /* corrSq[k]=corrSq[k]/count[k]; */
2668 /* normalize such that corr[0] == 0 */
2670 for (k = 0; (k < ncorr); k++)
2673 /* corrSq[k]*=c0*c0; */
2676 /* write ACFs in verbose mode */
2679 for (k = 0; (k < ncorr); k++)
2681 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2683 fprintf(fpcorr, "&\n");
2686 /* esimate integrated correlation time, fitting is too unstable */
2687 tausteps = 0.5*corr[0];
2688 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2689 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2691 tausteps += corr[j];
2694 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2695 Kumar et al, eq. 28 ff. */
2696 window[i].tau[ig] = tausteps*dt;
2697 window[i].g[ig] = 1+2*tausteps;
2698 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2706 gmx_ffclose(fpcorr);
2709 /* plot IACT along reaction coordinate */
2710 fp = xvgropen(fn, "Integrated autocorrelation times", "z", "IACT [ps]", opt->oenv);
2711 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2712 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2713 for (i = 0; i < nwins; i++)
2715 fprintf(fp, "# %3d ", i);
2716 for (ig = 0; ig < window[i].nPull; ig++)
2718 fprintf(fp, " %11g", window[i].tau[ig]);
2722 for (i = 0; i < nwins; i++)
2724 for (ig = 0; ig < window[i].nPull; ig++)
2726 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2729 if (opt->sigSmoothIact > 0.0)
2731 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2732 opt->sigSmoothIact);
2733 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2734 smoothIact(window, nwins, opt);
2735 fprintf(fp, "&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2736 fprintf(fp, "@ s1 symbol color 2\n");
2737 for (i = 0; i < nwins; i++)
2739 for (ig = 0; ig < window[i].nPull; ig++)
2741 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2746 printf("Wrote %s\n", fn);
2750 * compute average and sigma of each umbrella histogram
2752 void averageSigma(t_UmbrellaWindow *window, int nwins)
2755 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2757 for (i = 0; i < nwins; i++)
2759 snew(window[i].aver, window[i].nPull);
2760 snew(window[i].sigma, window[i].nPull);
2762 ntot = window[i].Ntot[0];
2763 for (ig = 0; ig < window[i].nPull; ig++)
2765 ztime = window[i].ztime[ig];
2766 for (k = 0, av = 0.; k < ntot; k++)
2771 for (k = 0, sum2 = 0.; k < ntot; k++)
2776 sig = sqrt(sum2/ntot);
2777 window[i].aver[ig] = av;
2779 /* Note: This estimate for sigma is biased from the limited sampling.
2780 Correct sigma by n/(n-1) where n = number of independent
2781 samples. Only possible if IACT is known.
2785 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2786 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2790 window[i].sigma[ig] = sig;
2792 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2799 * Use histograms to compute average force on pull group.
2801 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2803 int i, j, bins = opt->bins, k;
2804 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2807 dz = (max-min)/bins;
2808 ztot = opt->max-min;
2811 /* Compute average displacement from histograms */
2812 for (j = 0; j < nWindows; ++j)
2814 snew(window[j].forceAv, window[j].nPull);
2815 for (k = 0; k < window[j].nPull; ++k)
2819 for (i = 0; i < opt->bins; ++i)
2821 temp = (1.0*i+0.5)*dz+min;
2822 distance = temp - window[j].pos[k];
2824 { /* in cyclic wham: */
2825 if (distance > ztot_half) /* |distance| < ztot_half */
2829 else if (distance < -ztot_half)
2834 w = window[j].Histo[k][i]/window[j].g[k];
2835 displAv += w*distance;
2837 /* Are we near min or max? We are getting wrong forces from the histgrams since
2838 the histograms are zero outside [min,max). Therefore, assume that the position
2839 on the other side of the histomgram center is equally likely. */
2842 posmirrored = window[j].pos[k]-distance;
2843 if (posmirrored >= max || posmirrored < min)
2845 displAv += -w*distance;
2852 /* average force from average displacement */
2853 window[j].forceAv[k] = displAv*window[j].k[k];
2854 /* sigma from average square displacement */
2855 /* window[j].sigma [k] = sqrt(displAv2); */
2856 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2862 * Check if the complete reaction coordinate is covered by the histograms
2864 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2865 t_UmbrellaOptions *opt)
2867 int i, ig, j, bins = opt->bins, bBoundary;
2868 real avcount = 0, z, relcount, *count;
2869 snew(count, opt->bins);
2871 for (j = 0; j < opt->bins; ++j)
2873 for (i = 0; i < nwins; i++)
2875 for (ig = 0; ig < window[i].nPull; ig++)
2877 count[j] += window[i].Histo[ig][j];
2880 avcount += 1.0*count[j];
2883 for (j = 0; j < bins; ++j)
2885 relcount = count[j]/avcount;
2886 z = (j+0.5)*opt->dz+opt->min;
2887 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2888 /* check for bins with no data */
2891 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2892 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2894 /* and check for poor sampling */
2895 else if (relcount < 0.005 && !bBoundary)
2897 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2903 /*! \brief Compute initial potential by integrating the average force
2905 * This speeds up the convergence by roughly a factor of 2
2907 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2909 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2910 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2913 dz = (opt->max-min)/bins;
2915 printf("Getting initial potential by integration.\n");
2917 /* Compute average displacement from histograms */
2918 computeAverageForce(window, nWindows, opt);
2920 /* Get force for each bin from all histograms in this bin, or, alternatively,
2921 if no histograms are inside this bin, from the closest histogram */
2924 for (j = 0; j < opt->bins; ++j)
2926 pos = (1.0*j+0.5)*dz+min;
2930 groupmin = winmin = 0;
2931 for (i = 0; i < nWindows; i++)
2933 for (ig = 0; ig < window[i].nPull; ig++)
2935 hispos = window[i].pos[ig];
2936 dist = fabs(hispos-pos);
2937 /* average force within bin */
2941 fAv += window[i].forceAv[ig];
2943 /* at the same time, rememer closest histogram */
2952 /* if no histogram found in this bin, use closest histogram */
2959 fAv = window[winmin].forceAv[groupmin];
2963 for (j = 1; j < opt->bins; ++j)
2965 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2968 /* cyclic wham: linearly correct possible offset */
2971 diff = (pot[bins-1]-pot[0])/(bins-1);
2972 for (j = 1; j < opt->bins; ++j)
2979 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", "z", "PMF [kJ/mol]", opt->oenv);
2980 for (j = 0; j < opt->bins; ++j)
2982 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
2985 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
2988 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2989 energy offsets which are usually determined by wham
2990 First: turn pot into probabilities:
2992 for (j = 0; j < opt->bins; ++j)
2994 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
2996 calc_z(pot, window, nWindows, opt, TRUE);
3002 //! Count number of words in an line
3003 static int wordcount(char *ptr)
3008 if (strlen(ptr) == 0)
3012 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3014 for (i = 0; (ptr[i] != '\0'); i++)
3016 is[cur] = isspace(ptr[i]);
3017 if ((i > 0) && (is[cur] && !is[1-cur]))
3026 /*! \brief Read input file for pull group selection (option -is)
3028 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3030 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3033 int i, iline, n, len = STRLEN, temp;
3034 char *ptr = 0, *tmpbuf = 0;
3035 char fmt[1024], fmtign[1024];
3036 int block = 1, sizenow;
3038 fp = gmx_ffopen(opt->fnGroupsel, "r");
3039 opt->groupsel = NULL;
3044 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3049 if (iline >= sizenow)
3052 srenew(opt->groupsel, sizenow);
3054 opt->groupsel[iline].n = n;
3055 opt->groupsel[iline].nUse = 0;
3056 snew(opt->groupsel[iline].bUse, n);
3059 for (i = 0; i < n; i++)
3061 strcpy(fmt, fmtign);
3063 if (sscanf(ptr, fmt, &temp))
3065 opt->groupsel[iline].bUse[i] = (temp > 0);
3066 if (opt->groupsel[iline].bUse[i])
3068 opt->groupsel[iline].nUse++;
3071 strcat(fmtign, "%*s");
3075 opt->nGroupsel = iline;
3076 if (nTpr != opt->nGroupsel)
3078 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3082 printf("\nUse only these pull groups:\n");
3083 for (iline = 0; iline < nTpr; iline++)
3085 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3086 for (i = 0; i < opt->groupsel[iline].n; i++)
3088 if (opt->groupsel[iline].bUse[i])
3101 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3103 /*! Number of elements in fnm (used for command line parsing) */
3104 #define NFILE asize(fnm)
3106 //! The main g_wham routine
3107 int gmx_wham(int argc, char *argv[])
3109 const char *desc[] = {
3110 "[THISMODULE] is an analysis program that implements the Weighted",
3111 "Histogram Analysis Method (WHAM). It is intended to analyze",
3112 "output files generated by umbrella sampling simulations to ",
3113 "compute a potential of mean force (PMF). [PAR] ",
3114 "At present, three input modes are supported.[BR]",
3115 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3116 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3117 " AND, with option [TT]-ix[tt], a file which contains file names of",
3118 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3119 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3120 " first pullx, etc.[BR]",
3121 "[TT]*[tt] Same as the previous input mode, except that the the user",
3122 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3123 " From the pull force the position in the umbrella potential is",
3124 " computed. This does not work with tabulated umbrella potentials.[BR]"
3125 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3126 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3127 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3128 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [TT].pdo[tt] file header",
3129 " must be similar to the following:[PAR]",
3130 "[TT]# UMBRELLA 3.0[BR]",
3131 "# Component selection: 0 0 1[BR]",
3133 "# Ref. Group 'TestAtom'[BR]",
3134 "# Nr. of pull groups 2[BR]",
3135 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3136 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3138 "The number of pull groups, umbrella positions, force constants, and names ",
3139 "may (of course) differ. Following the header, a time column and ",
3140 "a data column for each pull group follows (i.e. the displacement",
3141 "with respect to the umbrella center). Up to four pull groups are possible ",
3142 "per [TT].pdo[tt] file at present.[PAR]",
3143 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3144 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3145 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3146 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3147 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3148 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3149 "used, groupsel.dat looks like this:[BR]",
3153 "By default, the output files are[BR]",
3154 " [TT]-o[tt] PMF output file[BR]",
3155 " [TT]-hist[tt] Histograms output file[BR]",
3156 "Always check whether the histograms sufficiently overlap.[PAR]",
3157 "The umbrella potential is assumed to be harmonic and the force constants are ",
3158 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3159 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3160 "WHAM OPTIONS[BR]------------[BR]",
3161 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3162 " [TT]-temp[tt] Temperature in the simulations[BR]",
3163 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3164 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3165 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3166 "The data points that are used to compute the profile",
3167 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3168 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3169 "umbrella window.[PAR]",
3170 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3171 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3172 "With energy output, the energy in the first bin is defined to be zero. ",
3173 "If you want the free energy at a different ",
3174 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3175 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3176 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3177 "[THISMODULE] will make use of the",
3178 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3179 "reaction coordinate will assumed be be neighbors.[PAR]",
3180 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3181 "which may be useful for, e.g. membranes.[PAR]",
3182 "AUTOCORRELATIONS[BR]----------------[BR]",
3183 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3184 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3185 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3186 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3187 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3188 "Because the IACTs can be severely underestimated in case of limited ",
3189 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3190 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3191 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3192 "integration of the ACFs while the ACFs are larger 0.05.",
3193 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3194 "less robust) method such as fitting to a double exponential, you can ",
3195 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3196 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3197 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3198 "ERROR ANALYSIS[BR]--------------[BR]",
3199 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3200 "otherwise the statistical error may be substantially underestimated. ",
3201 "More background and examples for the bootstrap technique can be found in ",
3202 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3203 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3204 "Four bootstrapping methods are supported and ",
3205 "selected with [TT]-bs-method[tt].[BR]",
3206 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3207 "data points, and the bootstrap is carried out by assigning random weights to the ",
3208 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3209 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3210 "statistical error is underestimated.[BR]",
3211 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3212 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3213 "(allowing duplication, i.e. sampling with replacement).",
3214 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3215 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3216 "divided into blocks and only histograms within each block are mixed. Note that ",
3217 "the histograms within each block must be representative for all possible histograms, ",
3218 "otherwise the statistical error is underestimated.[BR]",
3219 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3220 "such that the generated data points are distributed according the given histograms ",
3221 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3222 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3223 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3224 "Note that this method may severely underestimate the error in case of limited sampling, ",
3225 "that is if individual histograms do not represent the complete phase space at ",
3226 "the respective positions.[BR]",
3227 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3228 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3229 "and width of the umbrella histograms. That method yields similar error estimates ",
3230 "like method [TT]traj[tt].[PAR]"
3231 "Bootstrapping output:[BR]",
3232 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3233 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3234 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3235 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3239 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3240 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3241 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3243 static t_UmbrellaOptions opt;
3246 { "-min", FALSE, etREAL, {&opt.min},
3247 "Minimum coordinate in profile"},
3248 { "-max", FALSE, etREAL, {&opt.max},
3249 "Maximum coordinate in profile"},
3250 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3251 "Determine min and max automatically"},
3252 { "-bins", FALSE, etINT, {&opt.bins},
3253 "Number of bins in profile"},
3254 { "-temp", FALSE, etREAL, {&opt.Temperature},
3256 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3258 { "-v", FALSE, etBOOL, {&opt.verbose},
3260 { "-b", FALSE, etREAL, {&opt.tmin},
3261 "First time to analyse (ps)"},
3262 { "-e", FALSE, etREAL, {&opt.tmax},
3263 "Last time to analyse (ps)"},
3264 { "-dt", FALSE, etREAL, {&opt.dt},
3265 "Analyse only every dt ps"},
3266 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3267 "Write histograms and exit"},
3268 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3269 "Determine min and max and exit (with [TT]-auto[tt])"},
3270 { "-log", FALSE, etBOOL, {&opt.bLog},
3271 "Calculate the log of the profile before printing"},
3272 { "-unit", FALSE, etENUM, {en_unit},
3273 "Energy unit in case of log output" },
3274 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3275 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3276 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3277 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3278 { "-sym", FALSE, etBOOL, {&opt.bSym},
3279 "Symmetrize profile around z=0"},
3280 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3281 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3282 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3283 "Calculate integrated autocorrelation times and use in wham"},
3284 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3285 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3286 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3287 "When computing autocorrelation functions, restart computing every .. (ps)"},
3288 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3289 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3290 "during smoothing"},
3291 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3292 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3293 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3294 "Bootstrap method" },
3295 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3296 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3297 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3298 "Seed for bootstrapping. (-1 = use time)"},
3299 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3300 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3301 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3302 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3303 { "-stepout", FALSE, etINT, {&opt.stepchange},
3304 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3305 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3306 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3310 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3311 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3312 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3313 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3314 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3315 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3316 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3317 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3318 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3319 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3320 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3321 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3324 int i, j, l, nfiles, nwins, nfiles2;
3325 t_UmbrellaHeader header;
3326 t_UmbrellaWindow * window = NULL;
3327 double *profile, maxchange = 1e20;
3328 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3329 char **fninTpr, **fninPull, **fninPdo;
3331 FILE *histout, *profout;
3332 char ylabel[256], title[256];
3335 opt.verbose = FALSE;
3336 opt.bHistOnly = FALSE;
3346 /* bootstrapping stuff */
3348 opt.bsMethod = bsMethod_hist;
3349 opt.tauBootStrap = 0.0;
3351 opt.histBootStrapBlockLength = 8;
3352 opt.bs_verbose = FALSE;
3357 opt.Temperature = 298;
3358 opt.Tolerance = 1e-6;
3359 opt.bBoundsOnly = FALSE;
3361 opt.bCalcTauInt = FALSE;
3362 opt.sigSmoothIact = 0.0;
3363 opt.bAllowReduceIact = TRUE;
3364 opt.bInitPotByIntegration = TRUE;
3365 opt.acTrestart = 1.0;
3366 opt.stepchange = 100;
3367 opt.stepUpdateContrib = 100;
3369 if (!parse_common_args(&argc, argv, PCA_BE_NICE,
3370 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3375 opt.unit = nenum(en_unit);
3376 opt.bsMethod = nenum(en_bsMethod);
3378 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3380 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3381 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3382 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3383 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3384 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3385 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3386 if (opt.bTab && opt.bPullf)
3388 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3389 "Provide pullx.xvg or pdo files!");
3392 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3394 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3396 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3398 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3399 "\n\n Check g_wham -h !");
3402 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3403 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3404 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3405 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3406 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3408 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3409 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3410 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3411 if ( (bMinSet || bMaxSet) && bAutoSet)
3413 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3416 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3418 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3421 if (bMinSet && opt.bAuto)
3423 printf("Note: min and max given, switching off -auto.\n");
3427 if (opt.bTauIntGiven && opt.bCalcTauInt)
3429 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3430 "the autocorrelation times. Not both.");
3433 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3435 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3436 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3438 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3440 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3441 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3444 /* Reading gmx4 pull output and tpr files */
3445 if (opt.bTpr || opt.bPullf || opt.bPullx)
3447 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3449 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3450 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3451 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3452 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3453 if (nfiles != nfiles2)
3455 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3456 opt.fnTpr, nfiles2, fnPull);
3459 /* Read file that selects the pull group to be used */
3460 if (opt.fnGroupsel != NULL)
3462 readPullGroupSelection(&opt, fninTpr, nfiles);
3465 window = initUmbrellaWindows(nfiles);
3466 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3469 { /* reading pdo files */
3470 if (opt.fnGroupsel != NULL)
3472 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3473 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3475 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3476 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3477 window = initUmbrellaWindows(nfiles);
3478 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3482 /* enforce equal weight for all histograms? */
3485 enforceEqualWeights(window, nwins);
3488 /* write histograms */
3489 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3490 "z", "count", opt.oenv);
3491 for (l = 0; l < opt.bins; ++l)
3493 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3494 for (i = 0; i < nwins; ++i)
3496 for (j = 0; j < window[i].nPull; ++j)
3498 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3501 fprintf(histout, "\n");
3503 gmx_ffclose(histout);
3504 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3507 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3511 /* Using tabulated umbrella potential */
3514 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3517 /* Integrated autocorrelation times provided ? */
3518 if (opt.bTauIntGiven)
3520 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3523 /* Compute integrated autocorrelation times */
3524 if (opt.bCalcTauInt)
3526 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3529 /* calc average and sigma for each histogram
3530 (maybe required for bootstrapping. If not, this is fast anyhow) */
3531 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3533 averageSigma(window, nwins);
3536 /* Get initial potential by simple integration */
3537 if (opt.bInitPotByIntegration)
3539 guessPotByIntegration(window, nwins, &opt);
3542 /* Check if complete reaction coordinate is covered */
3543 checkReactionCoordinateCovered(window, nwins, &opt);
3545 /* Calculate profile */
3546 snew(profile, opt.bins);
3554 if ( (i%opt.stepUpdateContrib) == 0)
3556 setup_acc_wham(profile, window, nwins, &opt);
3558 if (maxchange < opt.Tolerance)
3561 /* if (opt.verbose) */
3562 printf("Switched to exact iteration in iteration %d\n", i);
3564 calc_profile(profile, window, nwins, &opt, bExact);
3565 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3567 printf("\t%4d) Maximum change %e\n", i, maxchange);
3571 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3572 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3574 /* calc error from Kumar's formula */
3575 /* Unclear how the error propagates along reaction coordinate, therefore
3577 /* calc_error_kumar(profile,window, nwins,&opt); */
3579 /* Write profile in energy units? */
3582 prof_normalization_and_unit(profile, &opt);
3583 strcpy(ylabel, en_unit_label[opt.unit]);
3584 strcpy(title, "Umbrella potential");
3588 strcpy(ylabel, "Density of states");
3589 strcpy(title, "Density of states");
3592 /* symmetrize profile around z=0? */
3595 symmetrizeProfile(profile, &opt);
3598 /* write profile or density of states */
3599 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, "z", ylabel, opt.oenv);
3600 for (i = 0; i < opt.bins; ++i)
3602 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3604 gmx_ffclose(profout);
3605 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3607 /* Bootstrap Method */
3610 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3611 opt2fn("-hist", NFILE, fnm),
3612 ylabel, profile, window, nwins, &opt);
3616 freeUmbrellaWindows(window, nfiles);
3618 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3619 please_cite(stdout, "Hub2010");