2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/fileio/tpxio.h"
57 #include "gromacs/fileio/xvgr.h"
58 #include "gromacs/gmxana/gmx_ana.h"
59 #include "gromacs/legacyheaders/copyrite.h"
60 #include "gromacs/legacyheaders/macros.h"
61 #include "gromacs/legacyheaders/names.h"
62 #include "gromacs/legacyheaders/typedefs.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/random/random.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/smalloc.h"
69 //! longest file names allowed in input files
70 #define WHAM_MAXFILELEN 2048
73 * x-axis legend for output files
75 static const char *xlabel = "\\xx\\f{} (nm)";
78 * enum for energy units
81 enSel, en_kJ, en_kCal, en_kT, enNr
84 * enum for type of input files (pdos, tpr, or pullf)
87 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
90 /*! \brief enum for bootstrapping method
92 * These bootstrap methods are supported:
93 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
94 * (bsMethod_BayesianHist)
95 * - bootstrap complete histograms (bsMethod_hist)
96 * - bootstrap trajectories from given umbrella histograms. This generates new
97 * "synthetic" histograms (bsMethod_traj)
98 * - bootstrap trajectories from Gaussian with mu/sigma computed from
99 * the respective histogram (bsMethod_trajGauss). This gives very similar
100 * results compared to bsMethod_traj.
102 * ********************************************************************
103 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
104 * JS Hub, BL de Groot, D van der Spoel
105 * g_wham - A free weighted histogram analysis implementation including
106 * robust error and autocorrelation estimates,
107 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
108 * ********************************************************************
111 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
112 bsMethod_traj, bsMethod_trajGauss
116 //! Parameters of the umbrella potentials
120 * \name Using umbrella pull code since gromacs 4.x
123 int npullcrds; //!< nr of pull coordinates in tpr file
124 int pull_geometry; //!< such as distance, direction
125 ivec pull_dim; //!< pull dimension with geometry distance
126 int pull_ndim; //!< nr of pull_dim != 0
127 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
128 real *k; //!< force constants in tpr file
129 real *init_dist; //!< reference displacements
130 real *umbInitDist; //!< reference displacement in umbrella direction
133 * \name Using PDO files common until gromacs 3.x
141 char PullName[4][256];
143 double UmbCons[4][3];
147 //! Data in the umbrella histograms
150 int nPull; //!< nr of pull groups in this pdo or pullf/x file
151 double **Histo; //!< nPull histograms
152 double **cum; //!< nPull cumulative distribution functions
153 int nBin; //!< nr of bins. identical to opt->bins
154 double *k; //!< force constants for the nPull groups
155 double *pos; //!< umbrella positions for the nPull groups
156 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
157 int *N; //!< nr of data points in nPull histograms.
158 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
160 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
162 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
163 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
166 double *tau; //!< intetrated autocorrelation time (IACT)
167 double *tausmooth; //!< smoothed IACT
169 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
171 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
173 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
175 /*! \brief average force estimated from average displacement, fAv=dzAv*k
177 * Used for integration to guess the potential.
180 real *aver; //!< average of histograms
181 real *sigma; //!< stddev of histograms
182 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
185 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
188 int n; //!< total nr of pull groups in this tpr file
189 int nUse; //!< nr of pull groups used
190 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
193 //! Parameters of WHAM
200 const char *fnTpr, *fnPullf, *fnGroupsel;
201 const char *fnPdo, *fnPullx; //!< file names of input
202 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
203 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
205 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
206 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
207 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
208 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
211 * \name Basic WHAM options
214 int bins; //!< nr of bins, min, max, and dz of profile
216 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
217 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
220 * \name Output control
223 gmx_bool bLog; //!< energy output (instead of probability) for profile
224 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
225 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
226 /*! \brief after wham, set prof to zero at this z-position.
227 * When bootstrapping, set zProf0 to a "stable" reference position.
230 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
232 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
233 gmx_bool bAuto; //!< determine min and max automatically but do not exit
235 gmx_bool verbose; //!< more noisy wham mode
236 int stepchange; //!< print maximum change in prof after how many interations
237 output_env_t oenv; //!< xvgr options
240 * \name Autocorrelation stuff
243 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
244 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
245 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
246 real acTrestart; //!< when computing ACT, time between restarting points
248 /* \brief Enforce the same weight for each umbella window, that is
249 * calculate with the same number of data points for
250 * each window. That can be reasonable, if the histograms
251 * have different length, but due to autocorrelation,
252 * a longer simulation should not have larger weightin wham.
258 * \name Bootstrapping stuff
261 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
263 /* \brief bootstrap method
265 * if == bsMethod_hist, consider complete histograms as independent
266 * data points and, hence, only mix complete histograms.
267 * if == bsMethod_BayesianHist, consider complete histograms
268 * as independent data points, but assign random weights
269 * to the histograms during the bootstrapping ("Bayesian bootstrap")
270 * In case of long correlations (e.g., inside a channel), these
271 * will yield a more realistic error.
272 * if == bsMethod_traj(Gauss), generate synthetic histograms
274 * histogram by generating an autocorrelated random sequence
275 * that is distributed according to the respective given
276 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
277 * (instead of from the umbrella histogram) to generate a new
282 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
285 /* \brief when mixing histograms, mix only histograms withing blocks
286 long the reaction coordinate xi. Avoids gaps along xi. */
287 int histBootStrapBlockLength;
289 int bsSeed; //!< random seed for bootstrapping
291 /* \brief Write cumulative distribution functions (CDFs) of histograms
292 and write the generated histograms for each bootstrap */
296 * \name tabulated umbrella potential stuff
300 double *tabX, *tabY, tabMin, tabMax, tabDz;
303 gmx_rng_t rng; //!< gromacs random number generator
306 //! Make an umbrella window (may contain several histograms)
307 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
309 t_UmbrellaWindow *win;
312 for (i = 0; i < nwin; i++)
314 win[i].Histo = win[i].cum = 0;
315 win[i].k = win[i].pos = win[i].z = 0;
316 win[i].N = win[i].Ntot = 0;
317 win[i].g = win[i].tau = win[i].tausmooth = 0;
321 win[i].aver = win[i].sigma = 0;
327 //! Delete an umbrella window (may contain several histograms)
328 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
331 for (i = 0; i < nwin; i++)
335 for (j = 0; j < win[i].nPull; j++)
337 sfree(win[i].Histo[j]);
342 for (j = 0; j < win[i].nPull; j++)
344 sfree(win[i].cum[j]);
349 for (j = 0; j < win[i].nPull; j++)
351 sfree(win[i].bContrib[j]);
363 sfree(win[i].tausmooth);
364 sfree(win[i].bContrib);
366 sfree(win[i].forceAv);
369 sfree(win[i].bsWeight);
375 * Read and setup tabulated umbrella potential
377 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
382 printf("Setting up tabulated potential from file %s\n", fn);
383 nl = read_xvg(fn, &y, &ny);
387 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
389 opt->tabMin = y[0][0];
390 opt->tabMax = y[0][nl-1];
391 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
394 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
395 "ascending z-direction", fn);
397 for (i = 0; i < nl-1; i++)
399 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
401 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
406 for (i = 0; i < nl; i++)
408 opt->tabX[i] = y[0][i];
409 opt->tabY[i] = y[1][i];
411 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
412 opt->tabMin, opt->tabMax, opt->tabDz);
415 //! Read the header of an PDO file (position, force const, nr of groups)
416 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
419 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
421 std::istringstream ist;
424 if (fgets(line, 2048, file) == NULL)
426 gmx_fatal(FARGS, "Error reading header from pdo file\n");
429 ist >> Buffer0 >> Buffer1 >> Buffer2;
430 if (strcmp(Buffer1, "UMBRELLA"))
432 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
433 "(Found in first line: `%s')\n",
434 Buffer1, "UMBRELLA", line);
436 if (strcmp(Buffer2, "3.0"))
438 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
442 if (fgets(line, 2048, file) == NULL)
444 gmx_fatal(FARGS, "Error reading header from pdo file\n");
447 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
448 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
450 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
451 if (header->nDim != 1)
453 gmx_fatal(FARGS, "Currently only supports one dimension");
457 if (fgets(line, 2048, file) == NULL)
459 gmx_fatal(FARGS, "Error reading header from pdo file\n");
462 ist >> Buffer0 >> Buffer1 >> header->nSkip;
465 if (fgets(line, 2048, file) == NULL)
467 gmx_fatal(FARGS, "Error reading header from pdo file\n");
470 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
473 if (fgets(line, 2048, file) == NULL)
475 gmx_fatal(FARGS, "Error reading header from pdo file\n");
478 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
482 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
486 for (i = 0; i < header->nPull; ++i)
488 if (fgets(line, 2048, file) == NULL)
490 gmx_fatal(FARGS, "Error reading header from pdo file\n");
493 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
494 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
495 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
499 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
500 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
504 if (fgets(line, 2048, file) == NULL)
506 gmx_fatal(FARGS, "Cannot read from file\n");
510 if (strcmp(Buffer3, "#####") != 0)
512 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
517 static char *fgets3(FILE *fp, char ptr[], int *len)
522 if (fgets(ptr, *len-1, fp) == NULL)
527 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
529 /* This line is longer than len characters, let's increase len! */
533 if (fgets(p-1, STRLEN, fp) == NULL)
539 if (ptr[slen-1] == '\n')
547 /*! \brief Read the data columns of and PDO file.
549 * TO DO: Get rid of the scanf function to avoid the clang warning.
550 * At the moment, this warning is avoided by hiding the format string
551 * the variable fmtlf.
553 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
554 int fileno, t_UmbrellaWindow * win,
555 t_UmbrellaOptions *opt,
556 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
558 int i, inttemp, bins, count, ntot;
559 real min, max, minfound = 1e20, maxfound = -1e20;
560 double temp, time, time0 = 0, dt;
562 t_UmbrellaWindow * window = 0;
563 gmx_bool timeok, dt_ok = 1;
564 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
565 int len = STRLEN, dstep = 1;
566 const int blocklen = 4096;
576 /* Need to alocate memory and set up structure */
577 window->nPull = header->nPull;
580 snew(window->Histo, window->nPull);
581 snew(window->z, window->nPull);
582 snew(window->k, window->nPull);
583 snew(window->pos, window->nPull);
584 snew(window->N, window->nPull);
585 snew(window->Ntot, window->nPull);
586 snew(window->g, window->nPull);
587 snew(window->bsWeight, window->nPull);
589 window->bContrib = 0;
591 if (opt->bCalcTauInt)
593 snew(window->ztime, window->nPull);
599 snew(lennow, window->nPull);
601 for (i = 0; i < window->nPull; ++i)
604 window->bsWeight[i] = 1.;
605 snew(window->Histo[i], bins);
606 window->k[i] = header->UmbCons[i][0];
607 window->pos[i] = header->UmbPos[i][0];
611 if (opt->bCalcTauInt)
613 window->ztime[i] = 0;
617 /* Done with setup */
623 min = max = bins = 0; /* Get rid of warnings */
628 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
632 if (ptr[0] == '#' || strlen(ptr) < 2)
637 /* Initiate format string */
639 strcat(fmtign, "%*s");
641 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
642 /* Round time to fs */
643 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
645 /* get time step of pdo file */
655 dstep = static_cast<int>(opt->dt/dt+0.5);
663 window->dt = dt*dstep;
668 dt_ok = ((count-1)%dstep == 0);
669 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
671 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
672 time,opt->tmin, opt->tmax, dt_ok,timeok); */
676 for (i = 0; i < header->nPull; ++i)
679 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
680 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
681 if (sscanf(ptr, fmt, &temp))
683 temp += header->UmbPos[i][0];
697 if (opt->bCalcTauInt)
699 /* save time series for autocorrelation analysis */
700 ntot = window->Ntot[i];
701 if (ntot >= lennow[i])
703 lennow[i] += blocklen;
704 srenew(window->ztime[i], lennow[i]);
706 window->ztime[i][ntot] = temp;
714 inttemp = static_cast<int> (temp);
721 else if (inttemp >= bins)
727 if (inttemp >= 0 && inttemp < bins)
729 window->Histo[i][inttemp] += 1.;
737 if (time > opt->tmax)
741 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
757 /*! \brief Set identical weights for all histograms
759 * Normally, the weight is given by the number data points in each
760 * histogram, together with the autocorrelation time. This can be overwritten
761 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
762 * an appropriate autocorrelation times instead of using this function.
764 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
766 int i, k, j, NEnforced;
769 NEnforced = window[0].Ntot[0];
770 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
771 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
772 /* enforce all histograms to have the same weight as the very first histogram */
774 for (j = 0; j < nWindows; ++j)
776 for (k = 0; k < window[j].nPull; ++k)
778 ratio = 1.0*NEnforced/window[j].Ntot[k];
779 for (i = 0; i < window[0].nBin; ++i)
781 window[j].Histo[k][i] *= ratio;
783 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
788 /*! \brief Simple linear interpolation between two given tabulated points
790 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
793 double pl, pu, dz, dp;
795 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
797 if (jl < 0 || ju >= opt->tabNbins)
799 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
800 "Provide an extended table.", dist, jl, ju);
804 dz = dist-opt->tabX[jl];
805 dp = (pu-pl)*dz/opt->tabDz;
811 * Check which bins substiantially contribute (accelerates WHAM)
813 * Don't worry, that routine does not mean we compute the PMF in limited precision.
814 * After rapid convergence (using only substiantal contributions), we always switch to
817 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
818 t_UmbrellaOptions *opt)
820 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
821 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
822 gmx_bool bAnyContrib;
823 static int bFirst = 1;
824 static double wham_contrib_lim;
828 for (i = 0; i < nWindows; ++i)
830 nGrptot += window[i].nPull;
832 wham_contrib_lim = opt->Tolerance/nGrptot;
835 ztot = opt->max-opt->min;
838 for (i = 0; i < nWindows; ++i)
840 if (!window[i].bContrib)
842 snew(window[i].bContrib, window[i].nPull);
844 for (j = 0; j < window[i].nPull; ++j)
846 if (!window[i].bContrib[j])
848 snew(window[i].bContrib[j], opt->bins);
851 for (k = 0; k < opt->bins; ++k)
853 temp = (1.0*k+0.5)*dz+min;
854 distance = temp - window[i].pos[j]; /* distance to umbrella center */
856 { /* in cyclic wham: */
857 if (distance > ztot_half) /* |distance| < ztot_half */
861 else if (distance < -ztot_half)
866 /* Note: there are two contributions to bin k in the wham equations:
867 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
868 ii) exp(- U/(8.314e-3*opt->Temperature))
869 where U is the umbrella potential
870 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
875 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
879 U = tabulated_pot(distance, opt); /* Use tabulated potential */
882 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
883 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
884 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
885 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
886 if (window[i].bContrib[j][k])
892 /* If this histo is far outside min and max all bContrib may be FALSE,
893 causing a floating point exception later on. To avoid that, switch
897 for (k = 0; k < opt->bins; ++k)
899 window[i].bContrib[j][k] = TRUE;
906 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
907 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
912 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
918 //! Compute the PMF (one of the two main WHAM routines)
919 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
920 t_UmbrellaOptions *opt, gmx_bool bExact)
923 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
924 double denom, U = 0, temp = 0, invg;
926 ztot = opt->max-opt->min;
929 for (i = 0; i < opt->bins; ++i)
932 for (j = 0; j < nWindows; ++j)
934 for (k = 0; k < window[j].nPull; ++k)
936 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
937 temp = (1.0*i+0.5)*dz+min;
938 num += invg*window[j].Histo[k][i];
940 if (!(bExact || window[j].bContrib[k][i]))
944 distance = temp - window[j].pos[k]; /* distance to umbrella center */
946 { /* in cyclic wham: */
947 if (distance > ztot_half) /* |distance| < ztot_half */
951 else if (distance < -ztot_half)
959 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
963 U = tabulated_pot(distance, opt); /* Use tabulated potential */
965 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
968 profile[i] = num/denom;
972 //! Compute the free energy offsets z (one of the two main WHAM routines)
973 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
974 t_UmbrellaOptions *opt, gmx_bool bExact)
977 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
978 double MAX = -1e20, total = 0;
980 ztot = opt->max-opt->min;
983 for (i = 0; i < nWindows; ++i)
985 for (j = 0; j < window[i].nPull; ++j)
988 for (k = 0; k < window[i].nBin; ++k)
990 if (!(bExact || window[i].bContrib[j][k]))
994 temp = (1.0*k+0.5)*dz+min;
995 distance = temp - window[i].pos[j]; /* distance to umbrella center */
997 { /* in cyclic wham: */
998 if (distance > ztot_half) /* |distance| < ztot_half */
1002 else if (distance < -ztot_half)
1010 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1014 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1017 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1019 /* Avoid floating point exception if window is far outside min and max */
1022 total = -log(total);
1028 temp = fabs(total - window[i].z[j]);
1033 window[i].z[j] = total;
1039 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1040 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1042 int i, j, bins = opt->bins;
1043 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1046 if (min > 0. || max < 0.)
1048 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1049 opt->min, opt->max);
1054 for (i = 0; i < bins; i++)
1058 /* bin left of zsym */
1059 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1060 if (j >= 0 && (j+1) < bins)
1062 /* interpolate profile linearly between bins j and j+1 */
1063 z1 = min+(j+0.5)*dz;
1065 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1066 /* average between left and right */
1067 prof2[i] = 0.5*(profsym+profile[i]);
1071 prof2[i] = profile[i];
1075 memcpy(profile, prof2, bins*sizeof(double));
1079 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1080 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1083 double unit_factor = 1., R_MolarGasConst, diff;
1085 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1088 /* Not log? Nothing to do! */
1094 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1095 if (opt->unit == en_kT)
1099 else if (opt->unit == en_kJ)
1101 unit_factor = R_MolarGasConst*opt->Temperature;
1103 else if (opt->unit == en_kCal)
1105 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1109 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1112 for (i = 0; i < bins; i++)
1114 if (profile[i] > 0.0)
1116 profile[i] = -log(profile[i])*unit_factor;
1120 /* shift to zero at z=opt->zProf0 */
1121 if (!opt->bProf0Set)
1127 /* Get bin with shortest distance to opt->zProf0
1128 (-0.5 from bin position and +0.5 from rounding cancel) */
1129 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1134 else if (imin >= bins)
1138 diff = profile[imin];
1142 for (i = 0; i < bins; i++)
1148 //! Make an array of random integers (used for bootstrapping)
1149 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1151 int ipull, blockBase, nr, ipullRandom;
1153 if (blockLength == 0)
1155 blockLength = nPull;
1158 for (ipull = 0; ipull < nPull; ipull++)
1160 blockBase = (ipull/blockLength)*blockLength;
1162 { /* make sure nothing bad happens in the last block */
1163 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1164 ipullRandom = blockBase + nr;
1166 while (ipullRandom >= nPull);
1167 if (ipullRandom < 0 || ipullRandom >= nPull)
1169 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1170 "blockLength = %d, blockBase = %d\n",
1171 ipullRandom, nPull, nr, blockLength, blockBase);
1173 randomArray[ipull] = ipullRandom;
1175 /*for (ipull=0; ipull<nPull; ipull++)
1176 printf("%d ",randomArray[ipull]); printf("\n"); */
1179 /*! \brief Set pull group information of a synthetic histogram
1181 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1182 * but it is not required if we bootstrap complete histograms.
1184 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1185 t_UmbrellaWindow *thisWindow, int pullid)
1187 synthWindow->N [0] = thisWindow->N [pullid];
1188 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1189 synthWindow->pos [0] = thisWindow->pos [pullid];
1190 synthWindow->z [0] = thisWindow->z [pullid];
1191 synthWindow->k [0] = thisWindow->k [pullid];
1192 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1193 synthWindow->g [0] = thisWindow->g [pullid];
1194 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1197 /*! \brief Calculate cumulative distribution function of of all histograms.
1199 * This allow to create random number sequences
1200 * which are distributed according to the histograms. Required to generate
1201 * the "synthetic" histograms for the Bootstrap method
1203 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1204 t_UmbrellaOptions *opt, const char *fnhist)
1208 char *fn = 0, *buf = 0;
1211 if (opt->bs_verbose)
1213 snew(fn, strlen(fnhist)+10);
1214 snew(buf, strlen(fnhist)+10);
1215 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1216 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1220 for (i = 0; i < nWindows; i++)
1222 snew(window[i].cum, window[i].nPull);
1223 for (j = 0; j < window[i].nPull; j++)
1225 snew(window[i].cum[j], nbin+1);
1226 window[i].cum[j][0] = 0.;
1227 for (k = 1; k <= nbin; k++)
1229 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1232 /* normalize CDFs. Ensure cum[nbin]==1 */
1233 last = window[i].cum[j][nbin];
1234 for (k = 0; k <= nbin; k++)
1236 window[i].cum[j][k] /= last;
1241 printf("Cumulative distriubtion functions of all histograms created.\n");
1242 if (opt->bs_verbose)
1244 for (k = 0; k <= nbin; k++)
1246 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1247 for (i = 0; i < nWindows; i++)
1249 for (j = 0; j < window[i].nPull; j++)
1251 fprintf(fp, "%g\t", window[i].cum[j][k]);
1256 printf("Wrote cumulative distribution functions to %s\n", fn);
1264 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1266 * This is used to generate a random sequence distributed according to a histogram
1268 void searchCumulative(double xx[], int n, double x, int *j)
1290 else if (x == xx[n-1])
1300 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1301 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1302 int pullid, t_UmbrellaOptions *opt)
1304 int N, i, nbins, r_index, ibin;
1305 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1308 N = thisWindow->N[pullid];
1309 dt = thisWindow->dt;
1310 nbins = thisWindow->nBin;
1312 /* tau = autocorrelation time */
1313 if (opt->tauBootStrap > 0.0)
1315 tausteps = opt->tauBootStrap/dt;
1317 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1319 /* calc tausteps from g=1+2tausteps */
1320 g = thisWindow->g[pullid];
1326 "When generating hypothetical trajctories from given umbrella histograms,\n"
1327 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1328 "cannot be predicted. You have 3 options:\n"
1329 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1330 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1332 " with option -iiact for all umbrella windows.\n"
1333 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1334 " Use option (3) only if you are sure what you're doing, you may severely\n"
1335 " underestimate the error if a too small ACT is given.\n");
1336 gmx_fatal(FARGS, errstr);
1339 synthWindow->N [0] = N;
1340 synthWindow->pos [0] = thisWindow->pos[pullid];
1341 synthWindow->z [0] = thisWindow->z[pullid];
1342 synthWindow->k [0] = thisWindow->k[pullid];
1343 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1344 synthWindow->g [0] = thisWindow->g [pullid];
1345 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1347 for (i = 0; i < nbins; i++)
1349 synthWindow->Histo[0][i] = 0.;
1352 if (opt->bsMethod == bsMethod_trajGauss)
1354 sig = thisWindow->sigma [pullid];
1355 mu = thisWindow->aver [pullid];
1358 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1360 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1362 z = a*x + sqrt(1-a^2)*y
1363 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1364 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1366 C(t) = exp(-t/tau) with tau=-1/ln(a)
1368 Then, use error function to turn the Gaussian random variable into a uniformly
1369 distributed one in [0,1]. Eventually, use cumulative distribution function of
1370 histogram to get random variables distributed according to histogram.
1371 Note: The ACT of the flat distribution and of the generated histogram is not
1372 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1374 a = exp(-1.0/tausteps);
1376 invsqrt2 = 1./sqrt(2.0);
1378 /* init random sequence */
1379 x = gmx_rng_gaussian_table(opt->rng);
1381 if (opt->bsMethod == bsMethod_traj)
1383 /* bootstrap points from the umbrella histograms */
1384 for (i = 0; i < N; i++)
1386 y = gmx_rng_gaussian_table(opt->rng);
1388 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1389 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1391 r = 0.5*(1+gmx_erf(x*invsqrt2));
1392 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1393 synthWindow->Histo[0][r_index] += 1.;
1396 else if (opt->bsMethod == bsMethod_trajGauss)
1398 /* bootstrap points from a Gaussian with the same average and sigma
1399 as the respective umbrella histogram. The idea was, that -given
1400 limited sampling- the bootstrapped histograms are otherwise biased
1401 from the limited sampling of the US histos. However, bootstrapping from
1402 the Gaussian seems to yield a similar estimate. */
1406 y = gmx_rng_gaussian_table(opt->rng);
1409 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1414 while ( (ibin += nbins) < 0)
1419 else if (ibin >= nbins)
1421 while ( (ibin -= nbins) >= nbins)
1428 if (ibin >= 0 && ibin < nbins)
1430 synthWindow->Histo[0][ibin] += 1.;
1437 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1441 /*! \brief Write all histograms to a file
1443 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1444 * sets of bootstrapped histograms.
1446 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1447 int bs_index, t_UmbrellaOptions *opt)
1449 char *fn = 0, *buf = 0, title[256];
1455 snew(fn, strlen(fnhist)+10);
1456 snew(buf, strlen(fnhist)+1);
1457 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1458 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1462 fn = gmx_strdup(fnhist);
1463 strcpy(title, "Umbrella histograms");
1466 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1469 /* Write histograms */
1470 for (l = 0; l < bins; ++l)
1472 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1473 for (i = 0; i < nWindows; ++i)
1475 for (j = 0; j < window[i].nPull; ++j)
1477 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1484 printf("Wrote %s\n", fn);
1492 //! Used for qsort to sort random numbers
1493 int func_wham_is_larger(const void *a, const void *b)
1512 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1513 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1520 /* generate ordered random numbers between 0 and nAllPull */
1521 for (i = 0; i < nAllPull-1; i++)
1523 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1525 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1526 r[nAllPull-1] = 1.0*nAllPull;
1528 synthwin[0].bsWeight[0] = r[0];
1529 for (i = 1; i < nAllPull; i++)
1531 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1534 /* avoid to have zero weight by adding a tiny value */
1535 for (i = 0; i < nAllPull; i++)
1537 if (synthwin[i].bsWeight[0] < 1e-5)
1539 synthwin[i].bsWeight[0] = 1e-5;
1546 //! The main bootstrapping routine
1547 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1548 char* ylabel, double *profile,
1549 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1551 t_UmbrellaWindow * synthWindow;
1552 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1553 int i, j, *randomArray = 0, winid, pullid, ib;
1554 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1556 gmx_bool bExact = FALSE;
1558 /* init random generator */
1559 if (opt->bsSeed == -1)
1561 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1565 opt->rng = gmx_rng_init(opt->bsSeed);
1568 snew(bsProfile, opt->bins);
1569 snew(bsProfiles_av, opt->bins);
1570 snew(bsProfiles_av2, opt->bins);
1572 /* Create array of all pull groups. Note that different windows
1573 may have different nr of pull groups
1574 First: Get total nr of pull groups */
1576 for (i = 0; i < nWindows; i++)
1578 nAllPull += window[i].nPull;
1580 snew(allPull_winId, nAllPull);
1581 snew(allPull_pullId, nAllPull);
1583 /* Setup one array of all pull groups */
1584 for (i = 0; i < nWindows; i++)
1586 for (j = 0; j < window[i].nPull; j++)
1588 allPull_winId[iAllPull] = i;
1589 allPull_pullId[iAllPull] = j;
1594 /* setup stuff for synthetic windows */
1595 snew(synthWindow, nAllPull);
1596 for (i = 0; i < nAllPull; i++)
1598 synthWindow[i].nPull = 1;
1599 synthWindow[i].nBin = opt->bins;
1600 snew(synthWindow[i].Histo, 1);
1601 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1603 snew(synthWindow[i].Histo[0], opt->bins);
1605 snew(synthWindow[i].N, 1);
1606 snew(synthWindow[i].pos, 1);
1607 snew(synthWindow[i].z, 1);
1608 snew(synthWindow[i].k, 1);
1609 snew(synthWindow[i].bContrib, 1);
1610 snew(synthWindow[i].g, 1);
1611 snew(synthWindow[i].bsWeight, 1);
1614 switch (opt->bsMethod)
1617 snew(randomArray, nAllPull);
1618 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1619 please_cite(stdout, "Hub2006");
1621 case bsMethod_BayesianHist:
1622 /* just copy all histogams into synthWindow array */
1623 for (i = 0; i < nAllPull; i++)
1625 winid = allPull_winId [i];
1626 pullid = allPull_pullId[i];
1627 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1631 case bsMethod_trajGauss:
1632 calc_cumulatives(window, nWindows, opt, fnhist);
1635 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1638 /* do bootstrapping */
1639 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1640 for (ib = 0; ib < opt->nBootStrap; ib++)
1642 printf(" *******************************************\n"
1643 " ******** Start bootstrap nr %d ************\n"
1644 " *******************************************\n", ib+1);
1646 switch (opt->bsMethod)
1649 /* bootstrap complete histograms from given histograms */
1650 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1651 for (i = 0; i < nAllPull; i++)
1653 winid = allPull_winId [randomArray[i]];
1654 pullid = allPull_pullId[randomArray[i]];
1655 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1658 case bsMethod_BayesianHist:
1659 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1660 setRandomBsWeights(synthWindow, nAllPull, opt);
1663 case bsMethod_trajGauss:
1664 /* create new histos from given histos, that is generate new hypothetical
1666 for (i = 0; i < nAllPull; i++)
1668 winid = allPull_winId[i];
1669 pullid = allPull_pullId[i];
1670 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1675 /* write histos in case of verbose output */
1676 if (opt->bs_verbose)
1678 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1685 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1688 if ( (i%opt->stepUpdateContrib) == 0)
1690 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1692 if (maxchange < opt->Tolerance)
1696 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1698 printf("\t%4d) Maximum change %e\n", i, maxchange);
1700 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1703 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1704 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1708 prof_normalization_and_unit(bsProfile, opt);
1711 /* symmetrize profile around z=0 */
1714 symmetrizeProfile(bsProfile, opt);
1717 /* save stuff to get average and stddev */
1718 for (i = 0; i < opt->bins; i++)
1721 bsProfiles_av[i] += tmp;
1722 bsProfiles_av2[i] += tmp*tmp;
1723 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1725 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1729 /* write average and stddev */
1730 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1731 if (output_env_get_print_xvgr_codes(opt->oenv))
1733 fprintf(fp, "@TYPE xydy\n");
1735 for (i = 0; i < opt->bins; i++)
1737 bsProfiles_av [i] /= opt->nBootStrap;
1738 bsProfiles_av2[i] /= opt->nBootStrap;
1739 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1740 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1741 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1744 printf("Wrote boot strap result to %s\n", fnres);
1747 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1748 int whaminFileType(char *fn)
1752 if (strcmp(fn+len-3, "tpr") == 0)
1756 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1758 return whamin_pullxf;
1760 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1766 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1768 return whamin_unknown;
1771 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1772 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1773 t_UmbrellaOptions *opt)
1775 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1776 int nread, sizenow, i, block = 1;
1779 fp = gmx_ffopen(fn, "r");
1782 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1784 if (strlen(tmp) >= WHAM_MAXFILELEN)
1786 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1788 if (nread >= sizenow)
1791 srenew(filename, sizenow);
1792 for (i = sizenow-block; i < sizenow; i++)
1794 snew(filename[i], WHAM_MAXFILELEN);
1797 /* remove newline if there is one */
1798 if (tmp[strlen(tmp)-1] == '\n')
1800 tmp[strlen(tmp)-1] = '\0';
1802 strcpy(filename[nread], tmp);
1805 printf("Found file %s in %s\n", filename[nread], fn);
1809 *filenamesRet = filename;
1813 //! Open a file or a pipe to a gzipped file
1814 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1816 char Buffer[1024], gunzip[1024], *Path = 0;
1818 static gmx_bool bFirst = 1;
1820 /* gzipped pdo file? */
1821 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1823 /* search gunzip executable */
1824 if (!(Path = getenv("GMX_PATH_GZIP")))
1826 if (gmx_fexist("/bin/gunzip"))
1828 sprintf(gunzip, "%s", "/bin/gunzip");
1830 else if (gmx_fexist("/usr/bin/gunzip"))
1832 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1836 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1837 "You may want to define the path to gunzip "
1838 "with the environment variable GMX_PATH_GZIP.", gunzip);
1843 sprintf(gunzip, "%s/gunzip", Path);
1844 if (!gmx_fexist(gunzip))
1846 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1847 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1852 printf("Using gunzig executable %s\n", gunzip);
1855 if (!gmx_fexist(fn))
1857 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1859 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1862 printf("Executing command '%s'\n", Buffer);
1865 if ((pipe = popen(Buffer, "r")) == NULL)
1867 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1870 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1876 pipe = gmx_ffopen(fn, "r");
1883 //! Close file or pipe
1884 void pdo_close_file(FILE *fp)
1893 //! Reading all pdo files
1894 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1895 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1898 real mintmp, maxtmp, done = 0.;
1901 /* char Buffer0[1000]; */
1905 gmx_fatal(FARGS, "No files found. Hick.");
1908 /* if min and max are not given, get min and max from the input files */
1911 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1914 for (i = 0; i < nfiles; ++i)
1916 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1917 /*fgets(Buffer0,999,file);
1918 fprintf(stderr,"First line '%s'\n",Buffer0); */
1919 done = 100.0*(i+1)/nfiles;
1920 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1925 read_pdo_header(file, header, opt);
1926 /* here only determine min and max of this window */
1927 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1928 if (maxtmp > opt->max)
1932 if (mintmp < opt->min)
1938 pdo_close_file(file);
1946 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1947 if (opt->bBoundsOnly)
1949 printf("Found option -boundsonly, now exiting.\n");
1953 /* store stepsize in profile */
1954 opt->dz = (opt->max-opt->min)/opt->bins;
1956 /* Having min and max, we read in all files */
1957 /* Loop over all files */
1958 for (i = 0; i < nfiles; ++i)
1960 done = 100.0*(i+1)/nfiles;
1961 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1966 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1967 read_pdo_header(file, header, opt);
1968 /* load data into window */
1969 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1970 if ((window+i)->Ntot[0] == 0)
1972 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1976 pdo_close_file(file);
1984 for (i = 0; i < nfiles; ++i)
1991 //! translate 0/1 to N/Y to write pull dimensions
1992 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1994 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1995 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2000 static int first = 1;
2002 /* printf("Reading %s \n",fn); */
2003 read_tpx_state(fn, &ir, &state, NULL, NULL);
2005 if (ir.ePull != epullUMBRELLA)
2007 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
2008 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
2011 /* nr of pull groups */
2012 ncrd = ir.pull->ncoord;
2015 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull coordinates\n", ncrd);
2018 header->npullcrds = ir.pull->ncoord;
2019 header->pull_geometry = ir.pull->eGeom;
2020 header->bPrintRef = ir.pull->bPrintRef;
2021 copy_ivec(ir.pull->dim, header->pull_dim);
2022 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2023 snew(header->k, ncrd);
2024 snew(header->init_dist, ncrd);
2025 snew(header->umbInitDist, ncrd);
2027 /* only z-direction with epullgCYL? */
2028 if (header->pull_geometry == epullgCYL)
2030 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2032 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2033 "However, found dimensions [%s %s %s]\n",
2034 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2035 int2YN(header->pull_dim[ZZ]));
2039 for (i = 0; i < ncrd; i++)
2041 header->k[i] = ir.pull->coord[i].k;
2042 if (header->k[i] == 0.0)
2044 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2045 "That doesn't seem to be an Umbrella tpr.\n",
2048 header->init_dist[i] = ir.pull->coord[i].init;
2050 /* initial distance to reference */
2051 switch (header->pull_geometry)
2054 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2058 header->umbInitDist[i] = header->init_dist[i];
2061 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2065 if (opt->verbose || first)
2067 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2068 "\tPull group coordinates%s expected in pullx files.\n",
2069 fn, header->npullcrds, epullg_names[header->pull_geometry],
2070 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2071 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2072 for (i = 0; i < ncrd; i++)
2074 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2077 if (!opt->verbose && first)
2079 printf("\tUse option -v to see this output for all input tpr files\n\n");
2085 //! 2-norm in a ndim-dimensional space
2086 double dist_ndim(double **dx, int ndim, int line)
2090 for (i = 0; i < ndim; i++)
2092 r2 += sqr(dx[i][line]);
2097 //! Read pullx.xvg or pullf.xvg
2098 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2099 t_UmbrellaWindow * window,
2100 t_UmbrellaOptions *opt,
2101 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2102 t_groupselection *groupsel)
2104 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2105 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2106 real min, max, minfound = 1e20, maxfound = -1e20;
2107 gmx_bool dt_ok, timeok, bHaveForce;
2108 const char *quantity;
2109 const int blocklen = 4096;
2111 static gmx_bool bFirst = TRUE;
2114 * Data columns in pull output:
2115 * - in force output pullf.xvg:
2116 * No reference columns, one column per pull coordinate
2118 * - in position output pullx.xvg
2119 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2120 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2123 nColPerCrd = opt->bPullx ? header->pull_ndim : 1;
2124 quantity = opt->bPullx ? "position" : "force";
2126 if (opt->bPullx && header->bPrintRef)
2128 nColRefEachCrd = header->pull_ndim;
2135 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2136 bHaveForce = opt->bPullf;
2138 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2139 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2140 Sorry for the laziness, this is a To-do. */
2141 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2144 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2145 "reading \n(option -if) is supported at present, "
2146 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2147 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2148 epullg_names[header->pull_geometry]);
2151 nt = read_xvg(fn, &y, &ny);
2153 /* Check consistency */
2156 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2160 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2161 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2163 printf("Expecting these columns in pull file:\n"
2164 "\t%d reference columns for each individual pull coordinate\n"
2165 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2166 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullcrds, nColExpect);
2169 if (ny != nColExpect)
2171 gmx_fatal(FARGS, "Found %d pull coodinates in %s,\n but %d data columns in %s (expected %d)\n"
2172 "\nMaybe you confused options -ix and -if ?\n",
2173 header->npullcrds, fntpr, ny-1, fn, nColExpect-1);
2178 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2188 window->dt = y[0][1]-y[0][0];
2190 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2192 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2195 /* Need to alocate memory and set up structure */
2199 /* Use only groups selected with option -is file */
2200 if (header->npullcrds != groupsel->n)
2202 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2203 header->npullcrds, groupsel->n);
2205 window->nPull = groupsel->nUse;
2209 window->nPull = header->npullcrds;
2212 window->nBin = bins;
2213 snew(window->Histo, window->nPull);
2214 snew(window->z, window->nPull);
2215 snew(window->k, window->nPull);
2216 snew(window->pos, window->nPull);
2217 snew(window->N, window->nPull);
2218 snew(window->Ntot, window->nPull);
2219 snew(window->g, window->nPull);
2220 snew(window->bsWeight, window->nPull);
2221 window->bContrib = 0;
2223 if (opt->bCalcTauInt)
2225 snew(window->ztime, window->nPull);
2229 window->ztime = NULL;
2231 snew(lennow, window->nPull);
2233 for (g = 0; g < window->nPull; ++g)
2236 window->bsWeight[g] = 1.;
2237 snew(window->Histo[g], bins);
2239 window->Ntot[g] = 0;
2241 if (opt->bCalcTauInt)
2243 window->ztime[g] = NULL;
2247 /* Copying umbrella center and force const is more involved since not
2248 all pull groups from header (tpr file) may be used in window variable */
2249 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2251 if (groupsel && (groupsel->bUse[g] == FALSE))
2255 window->k[gUsed] = header->k[g];
2256 window->pos[gUsed] = header->umbInitDist[g];
2261 { /* only determine min and max */
2264 min = max = bins = 0; /* Get rid of warnings */
2268 for (i = 0; i < nt; i++)
2270 /* Do you want that time frame? */
2271 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2273 /* get time step of pdo file and get dstep from opt->dt */
2283 dstep = static_cast<int>(opt->dt/dt+0.5);
2291 window->dt = dt*dstep;
2295 dt_ok = (i%dstep == 0);
2296 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2298 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2299 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2302 /* Note: if groupsel == NULL:
2303 * all groups in pullf/x file are stored in this window, and gUsed == g
2304 * if groupsel != NULL:
2305 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2308 for (g = 0; g < header->npullcrds; ++g)
2310 /* was this group selected for application in WHAM? */
2311 if (groupsel && (groupsel->bUse[g] == FALSE))
2320 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2322 pos = -force/header->k[g] + header->umbInitDist[g];
2326 /* pick the right column from:
2327 * time ref1[ndim] coord1[ndim] ref2[ndim] coord2[ndim] ...
2329 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2330 switch (header->pull_geometry)
2333 pos = dist_ndim(y + column, header->pull_ndim, i);
2339 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2343 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2357 if (gUsed >= window->nPull)
2359 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2360 gUsed, window->nPull);
2363 if (opt->bCalcTauInt && !bGetMinMax)
2365 /* save time series for autocorrelation analysis */
2366 ntot = window->Ntot[gUsed];
2367 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2368 if (ntot >= lennow[gUsed])
2370 lennow[gUsed] += blocklen;
2371 srenew(window->ztime[gUsed], lennow[gUsed]);
2373 window->ztime[gUsed][ntot] = pos;
2376 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2381 while ( (ibin += bins) < 0)
2386 else if (ibin >= bins)
2388 while ( (ibin -= bins) >= bins)
2394 if (ibin >= 0 && ibin < bins)
2396 window->Histo[gUsed][ibin] += 1.;
2399 window->Ntot[gUsed]++;
2403 else if (t > opt->tmax)
2407 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2419 for (i = 0; i < ny; i++)
2425 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2426 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2427 t_UmbrellaHeader* header,
2428 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2431 real mintmp, maxtmp;
2433 printf("Reading %d tpr and pullf files\n", nfiles/2);
2435 /* min and max not given? */
2438 printf("Automatic determination of boundaries...\n");
2441 for (i = 0; i < nfiles; i++)
2443 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2445 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2447 read_tpr_header(fnTprs[i], header, opt);
2448 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2450 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2452 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2453 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2454 if (maxtmp > opt->max)
2458 if (mintmp < opt->min)
2463 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2464 if (opt->bBoundsOnly)
2466 printf("Found option -boundsonly, now exiting.\n");
2470 /* store stepsize in profile */
2471 opt->dz = (opt->max-opt->min)/opt->bins;
2473 for (i = 0; i < nfiles; i++)
2475 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2477 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2479 read_tpr_header(fnTprs[i], header, opt);
2480 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2482 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2484 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2485 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2486 if (window[i].Ntot[0] == 0)
2488 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2492 for (i = 0; i < nfiles; i++)
2501 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2503 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2504 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2506 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2508 int nlines, ny, i, ig;
2511 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2512 nlines = read_xvg(fn, &iact, &ny);
2513 if (nlines != nwins)
2515 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2518 for (i = 0; i < nlines; i++)
2520 if (window[i].nPull != ny)
2522 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2523 "number of pull groups is different in different simulations. That is not\n"
2524 "supported yet. Sorry.\n");
2526 for (ig = 0; ig < window[i].nPull; ig++)
2528 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2529 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2531 if (iact[ig][i] <= 0.0)
2533 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2540 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2543 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2544 * that -in case of limited sampling- the ACT may be severely underestimated.
2545 * Note: the g=1+2tau are overwritten.
2546 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2549 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2552 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2554 /* only evaluate within +- 3sigma of the Gausian */
2555 siglim = 3.0*opt->sigSmoothIact;
2556 siglim2 = dsqr(siglim);
2557 /* pre-factor of Gaussian */
2558 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2559 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2561 for (i = 0; i < nwins; i++)
2563 snew(window[i].tausmooth, window[i].nPull);
2564 for (ig = 0; ig < window[i].nPull; ig++)
2568 pos = window[i].pos[ig];
2569 for (j = 0; j < nwins; j++)
2571 for (jg = 0; jg < window[j].nPull; jg++)
2573 dpos2 = dsqr(window[j].pos[jg]-pos);
2574 if (dpos2 < siglim2)
2576 w = gaufact*exp(-dpos2*invtwosig2);
2578 tausm += w*window[j].tau[jg];
2579 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2580 w,dpos2,pos,gaufact,invtwosig2); */
2585 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2587 window[i].tausmooth[ig] = tausm;
2591 window[i].tausmooth[ig] = window[i].tau[ig];
2593 window[i].g[ig] = 1+2*tausm/window[i].dt;
2598 //! Stop integrating autoccorelation function when ACF drops under this value
2599 #define WHAM_AC_ZERO_LIMIT 0.05
2601 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2603 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2604 t_UmbrellaOptions *opt, const char *fn)
2606 int i, ig, ncorr, ntot, j, k, *count, restart;
2607 real *corr, c0, dt, tmp;
2608 real *ztime, av, tausteps;
2609 FILE *fp, *fpcorr = 0;
2613 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2614 "time [ps]", "autocorrelation function", opt->oenv);
2618 for (i = 0; i < nwins; i++)
2620 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2622 ntot = window[i].Ntot[0];
2624 /* using half the maximum time as length of autocorrelation function */
2628 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2629 " points. Provide more pull data!", ntot);
2632 /* snew(corrSq,ncorr); */
2635 snew(window[i].tau, window[i].nPull);
2636 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2642 for (ig = 0; ig < window[i].nPull; ig++)
2644 if (ntot != window[i].Ntot[ig])
2646 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2647 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2649 ztime = window[i].ztime[ig];
2651 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2652 for (j = 0, av = 0; (j < ntot); j++)
2657 for (k = 0; (k < ncorr); k++)
2662 for (j = 0; (j < ntot); j += restart)
2664 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2666 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2668 /* corrSq[k] += tmp*tmp; */
2672 /* divide by nr of frames for each time displacement */
2673 for (k = 0; (k < ncorr); k++)
2675 /* count probably = (ncorr-k+(restart-1))/restart; */
2676 corr[k] = corr[k]/count[k];
2677 /* variance of autocorrelation function */
2678 /* corrSq[k]=corrSq[k]/count[k]; */
2680 /* normalize such that corr[0] == 0 */
2682 for (k = 0; (k < ncorr); k++)
2685 /* corrSq[k]*=c0*c0; */
2688 /* write ACFs in verbose mode */
2691 for (k = 0; (k < ncorr); k++)
2693 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2695 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2698 /* esimate integrated correlation time, fitting is too unstable */
2699 tausteps = 0.5*corr[0];
2700 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2701 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2703 tausteps += corr[j];
2706 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2707 Kumar et al, eq. 28 ff. */
2708 window[i].tau[ig] = tausteps*dt;
2709 window[i].g[ig] = 1+2*tausteps;
2710 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2718 gmx_ffclose(fpcorr);
2721 /* plot IACT along reaction coordinate */
2722 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2723 if (output_env_get_print_xvgr_codes(opt->oenv))
2725 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2726 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2727 for (i = 0; i < nwins; i++)
2729 fprintf(fp, "# %3d ", i);
2730 for (ig = 0; ig < window[i].nPull; ig++)
2732 fprintf(fp, " %11g", window[i].tau[ig]);
2737 for (i = 0; i < nwins; i++)
2739 for (ig = 0; ig < window[i].nPull; ig++)
2741 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2744 if (opt->sigSmoothIact > 0.0)
2746 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2747 opt->sigSmoothIact);
2748 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2749 smoothIact(window, nwins, opt);
2750 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2751 if (output_env_get_print_xvgr_codes(opt->oenv))
2753 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2754 fprintf(fp, "@ s1 symbol color 2\n");
2756 for (i = 0; i < nwins; i++)
2758 for (ig = 0; ig < window[i].nPull; ig++)
2760 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2765 printf("Wrote %s\n", fn);
2769 * compute average and sigma of each umbrella histogram
2771 void averageSigma(t_UmbrellaWindow *window, int nwins)
2774 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2776 for (i = 0; i < nwins; i++)
2778 snew(window[i].aver, window[i].nPull);
2779 snew(window[i].sigma, window[i].nPull);
2781 ntot = window[i].Ntot[0];
2782 for (ig = 0; ig < window[i].nPull; ig++)
2784 ztime = window[i].ztime[ig];
2785 for (k = 0, av = 0.; k < ntot; k++)
2790 for (k = 0, sum2 = 0.; k < ntot; k++)
2795 sig = sqrt(sum2/ntot);
2796 window[i].aver[ig] = av;
2798 /* Note: This estimate for sigma is biased from the limited sampling.
2799 Correct sigma by n/(n-1) where n = number of independent
2800 samples. Only possible if IACT is known.
2804 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2805 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2809 window[i].sigma[ig] = sig;
2811 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2818 * Use histograms to compute average force on pull group.
2820 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2822 int i, j, bins = opt->bins, k;
2823 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2826 dz = (max-min)/bins;
2827 ztot = opt->max-min;
2830 /* Compute average displacement from histograms */
2831 for (j = 0; j < nWindows; ++j)
2833 snew(window[j].forceAv, window[j].nPull);
2834 for (k = 0; k < window[j].nPull; ++k)
2838 for (i = 0; i < opt->bins; ++i)
2840 temp = (1.0*i+0.5)*dz+min;
2841 distance = temp - window[j].pos[k];
2843 { /* in cyclic wham: */
2844 if (distance > ztot_half) /* |distance| < ztot_half */
2848 else if (distance < -ztot_half)
2853 w = window[j].Histo[k][i]/window[j].g[k];
2854 displAv += w*distance;
2856 /* Are we near min or max? We are getting wrong forces from the histgrams since
2857 the histograms are zero outside [min,max). Therefore, assume that the position
2858 on the other side of the histomgram center is equally likely. */
2861 posmirrored = window[j].pos[k]-distance;
2862 if (posmirrored >= max || posmirrored < min)
2864 displAv += -w*distance;
2871 /* average force from average displacement */
2872 window[j].forceAv[k] = displAv*window[j].k[k];
2873 /* sigma from average square displacement */
2874 /* window[j].sigma [k] = sqrt(displAv2); */
2875 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2881 * Check if the complete reaction coordinate is covered by the histograms
2883 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2884 t_UmbrellaOptions *opt)
2886 int i, ig, j, bins = opt->bins, bBoundary;
2887 real avcount = 0, z, relcount, *count;
2888 snew(count, opt->bins);
2890 for (j = 0; j < opt->bins; ++j)
2892 for (i = 0; i < nwins; i++)
2894 for (ig = 0; ig < window[i].nPull; ig++)
2896 count[j] += window[i].Histo[ig][j];
2899 avcount += 1.0*count[j];
2902 for (j = 0; j < bins; ++j)
2904 relcount = count[j]/avcount;
2905 z = (j+0.5)*opt->dz+opt->min;
2906 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2907 /* check for bins with no data */
2910 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2911 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2913 /* and check for poor sampling */
2914 else if (relcount < 0.005 && !bBoundary)
2916 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2922 /*! \brief Compute initial potential by integrating the average force
2924 * This speeds up the convergence by roughly a factor of 2
2926 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2928 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2929 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2932 dz = (opt->max-min)/bins;
2934 printf("Getting initial potential by integration.\n");
2936 /* Compute average displacement from histograms */
2937 computeAverageForce(window, nWindows, opt);
2939 /* Get force for each bin from all histograms in this bin, or, alternatively,
2940 if no histograms are inside this bin, from the closest histogram */
2943 for (j = 0; j < opt->bins; ++j)
2945 pos = (1.0*j+0.5)*dz+min;
2949 groupmin = winmin = 0;
2950 for (i = 0; i < nWindows; i++)
2952 for (ig = 0; ig < window[i].nPull; ig++)
2954 hispos = window[i].pos[ig];
2955 dist = fabs(hispos-pos);
2956 /* average force within bin */
2960 fAv += window[i].forceAv[ig];
2962 /* at the same time, remember closest histogram */
2971 /* if no histogram found in this bin, use closest histogram */
2978 fAv = window[winmin].forceAv[groupmin];
2982 for (j = 1; j < opt->bins; ++j)
2984 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2987 /* cyclic wham: linearly correct possible offset */
2990 diff = (pot[bins-1]-pot[0])/(bins-1);
2991 for (j = 1; j < opt->bins; ++j)
2998 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
2999 for (j = 0; j < opt->bins; ++j)
3001 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3004 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3007 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3008 energy offsets which are usually determined by wham
3009 First: turn pot into probabilities:
3011 for (j = 0; j < opt->bins; ++j)
3013 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3015 calc_z(pot, window, nWindows, opt, TRUE);
3021 //! Count number of words in an line
3022 static int wordcount(char *ptr)
3027 if (strlen(ptr) == 0)
3031 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3033 for (i = 0; (ptr[i] != '\0'); i++)
3035 is[cur] = isspace(ptr[i]);
3036 if ((i > 0) && (is[cur] && !is[1-cur]))
3045 /*! \brief Read input file for pull group selection (option -is)
3047 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3049 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3052 int i, iline, n, len = STRLEN, temp;
3053 char *ptr = 0, *tmpbuf = 0;
3054 char fmt[1024], fmtign[1024];
3055 int block = 1, sizenow;
3057 fp = gmx_ffopen(opt->fnGroupsel, "r");
3058 opt->groupsel = NULL;
3063 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3068 if (iline >= sizenow)
3071 srenew(opt->groupsel, sizenow);
3073 opt->groupsel[iline].n = n;
3074 opt->groupsel[iline].nUse = 0;
3075 snew(opt->groupsel[iline].bUse, n);
3078 for (i = 0; i < n; i++)
3080 strcpy(fmt, fmtign);
3082 if (sscanf(ptr, fmt, &temp))
3084 opt->groupsel[iline].bUse[i] = (temp > 0);
3085 if (opt->groupsel[iline].bUse[i])
3087 opt->groupsel[iline].nUse++;
3090 strcat(fmtign, "%*s");
3094 opt->nGroupsel = iline;
3095 if (nTpr != opt->nGroupsel)
3097 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3101 printf("\nUse only these pull groups:\n");
3102 for (iline = 0; iline < nTpr; iline++)
3104 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3105 for (i = 0; i < opt->groupsel[iline].n; i++)
3107 if (opt->groupsel[iline].bUse[i])
3120 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3122 //! Number of elements in fnm (used for command line parsing)
3123 #define NFILE asize(fnm)
3125 //! The main g_wham routine
3126 int gmx_wham(int argc, char *argv[])
3128 const char *desc[] = {
3129 "[THISMODULE] is an analysis program that implements the Weighted",
3130 "Histogram Analysis Method (WHAM). It is intended to analyze",
3131 "output files generated by umbrella sampling simulations to ",
3132 "compute a potential of mean force (PMF). [PAR] ",
3133 "At present, three input modes are supported.[BR]",
3134 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3135 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3136 " AND, with option [TT]-ix[tt], a file which contains file names of",
3137 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3138 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3139 " first pullx, etc.[BR]",
3140 "[TT]*[tt] Same as the previous input mode, except that the the user",
3141 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3142 " From the pull force the position in the umbrella potential is",
3143 " computed. This does not work with tabulated umbrella potentials.[BR]"
3144 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3145 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3146 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3147 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [TT].pdo[tt] file header",
3148 " must be similar to the following:[PAR]",
3149 "[TT]# UMBRELLA 3.0[BR]",
3150 "# Component selection: 0 0 1[BR]",
3152 "# Ref. Group 'TestAtom'[BR]",
3153 "# Nr. of pull groups 2[BR]",
3154 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3155 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3157 "The number of pull groups, umbrella positions, force constants, and names ",
3158 "may (of course) differ. Following the header, a time column and ",
3159 "a data column for each pull group follows (i.e. the displacement",
3160 "with respect to the umbrella center). Up to four pull groups are possible ",
3161 "per [TT].pdo[tt] file at present.[PAR]",
3162 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3163 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3164 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3165 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3166 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3167 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3168 "used, groupsel.dat looks like this:[BR]",
3172 "By default, the output files are[BR]",
3173 " [TT]-o[tt] PMF output file[BR]",
3174 " [TT]-hist[tt] Histograms output file[BR]",
3175 "Always check whether the histograms sufficiently overlap.[PAR]",
3176 "The umbrella potential is assumed to be harmonic and the force constants are ",
3177 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3178 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3179 "WHAM OPTIONS[BR]------------[BR]",
3180 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3181 " [TT]-temp[tt] Temperature in the simulations[BR]",
3182 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3183 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3184 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3185 "The data points that are used to compute the profile",
3186 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3187 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3188 "umbrella window.[PAR]",
3189 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3190 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3191 "With energy output, the energy in the first bin is defined to be zero. ",
3192 "If you want the free energy at a different ",
3193 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3194 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3195 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3196 "[THISMODULE] will make use of the",
3197 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3198 "reaction coordinate will assumed be be neighbors.[PAR]",
3199 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3200 "which may be useful for, e.g. membranes.[PAR]",
3201 "AUTOCORRELATIONS[BR]----------------[BR]",
3202 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3203 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3204 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3205 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3206 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3207 "Because the IACTs can be severely underestimated in case of limited ",
3208 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3209 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3210 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3211 "integration of the ACFs while the ACFs are larger 0.05.",
3212 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3213 "less robust) method such as fitting to a double exponential, you can ",
3214 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3215 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3216 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3217 "ERROR ANALYSIS[BR]--------------[BR]",
3218 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3219 "otherwise the statistical error may be substantially underestimated. ",
3220 "More background and examples for the bootstrap technique can be found in ",
3221 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3222 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3223 "Four bootstrapping methods are supported and ",
3224 "selected with [TT]-bs-method[tt].[BR]",
3225 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3226 "data points, and the bootstrap is carried out by assigning random weights to the ",
3227 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3228 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3229 "statistical error is underestimated.[BR]",
3230 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3231 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3232 "(allowing duplication, i.e. sampling with replacement).",
3233 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3234 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3235 "divided into blocks and only histograms within each block are mixed. Note that ",
3236 "the histograms within each block must be representative for all possible histograms, ",
3237 "otherwise the statistical error is underestimated.[BR]",
3238 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3239 "such that the generated data points are distributed according the given histograms ",
3240 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3241 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3242 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3243 "Note that this method may severely underestimate the error in case of limited sampling, ",
3244 "that is if individual histograms do not represent the complete phase space at ",
3245 "the respective positions.[BR]",
3246 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3247 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3248 "and width of the umbrella histograms. That method yields similar error estimates ",
3249 "like method [TT]traj[tt].[PAR]"
3250 "Bootstrapping output:[BR]",
3251 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3252 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3253 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3254 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3258 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3259 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3260 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3262 static t_UmbrellaOptions opt;
3265 { "-min", FALSE, etREAL, {&opt.min},
3266 "Minimum coordinate in profile"},
3267 { "-max", FALSE, etREAL, {&opt.max},
3268 "Maximum coordinate in profile"},
3269 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3270 "Determine min and max automatically"},
3271 { "-bins", FALSE, etINT, {&opt.bins},
3272 "Number of bins in profile"},
3273 { "-temp", FALSE, etREAL, {&opt.Temperature},
3275 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3277 { "-v", FALSE, etBOOL, {&opt.verbose},
3279 { "-b", FALSE, etREAL, {&opt.tmin},
3280 "First time to analyse (ps)"},
3281 { "-e", FALSE, etREAL, {&opt.tmax},
3282 "Last time to analyse (ps)"},
3283 { "-dt", FALSE, etREAL, {&opt.dt},
3284 "Analyse only every dt ps"},
3285 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3286 "Write histograms and exit"},
3287 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3288 "Determine min and max and exit (with [TT]-auto[tt])"},
3289 { "-log", FALSE, etBOOL, {&opt.bLog},
3290 "Calculate the log of the profile before printing"},
3291 { "-unit", FALSE, etENUM, {en_unit},
3292 "Energy unit in case of log output" },
3293 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3294 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3295 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3296 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3297 { "-sym", FALSE, etBOOL, {&opt.bSym},
3298 "Symmetrize profile around z=0"},
3299 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3300 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3301 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3302 "Calculate integrated autocorrelation times and use in wham"},
3303 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3304 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3305 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3306 "When computing autocorrelation functions, restart computing every .. (ps)"},
3307 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3308 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3309 "during smoothing"},
3310 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3311 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3312 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3313 "Bootstrap method" },
3314 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3315 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3316 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3317 "Seed for bootstrapping. (-1 = use time)"},
3318 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3319 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3320 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3321 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3322 { "-stepout", FALSE, etINT, {&opt.stepchange},
3323 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3324 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3325 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3329 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3330 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3331 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3332 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3333 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3334 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3335 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3336 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3337 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3338 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3339 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3340 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3343 int i, j, l, nfiles, nwins, nfiles2;
3344 t_UmbrellaHeader header;
3345 t_UmbrellaWindow * window = NULL;
3346 double *profile, maxchange = 1e20;
3347 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3348 char **fninTpr, **fninPull, **fninPdo;
3350 FILE *histout, *profout;
3351 char ylabel[256], title[256];
3354 opt.verbose = FALSE;
3355 opt.bHistOnly = FALSE;
3365 /* bootstrapping stuff */
3367 opt.bsMethod = bsMethod_hist;
3368 opt.tauBootStrap = 0.0;
3370 opt.histBootStrapBlockLength = 8;
3371 opt.bs_verbose = FALSE;
3376 opt.Temperature = 298;
3377 opt.Tolerance = 1e-6;
3378 opt.bBoundsOnly = FALSE;
3380 opt.bCalcTauInt = FALSE;
3381 opt.sigSmoothIact = 0.0;
3382 opt.bAllowReduceIact = TRUE;
3383 opt.bInitPotByIntegration = TRUE;
3384 opt.acTrestart = 1.0;
3385 opt.stepchange = 100;
3386 opt.stepUpdateContrib = 100;
3388 if (!parse_common_args(&argc, argv, 0,
3389 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3394 opt.unit = nenum(en_unit);
3395 opt.bsMethod = nenum(en_bsMethod);
3397 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3399 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3400 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3401 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3402 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3403 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3404 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3405 if (opt.bTab && opt.bPullf)
3407 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3408 "Provide pullx.xvg or pdo files!");
3411 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3413 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3415 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3417 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3418 "\n\n Check g_wham -h !");
3421 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3422 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3423 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3424 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3425 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3427 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3428 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3429 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3430 if ( (bMinSet || bMaxSet) && bAutoSet)
3432 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3435 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3437 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3440 if (bMinSet && opt.bAuto)
3442 printf("Note: min and max given, switching off -auto.\n");
3446 if (opt.bTauIntGiven && opt.bCalcTauInt)
3448 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3449 "the autocorrelation times. Not both.");
3452 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3454 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3455 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3457 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3459 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3460 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3463 /* Reading gmx4 pull output and tpr files */
3464 if (opt.bTpr || opt.bPullf || opt.bPullx)
3466 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3468 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3469 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3470 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3471 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3472 if (nfiles != nfiles2)
3474 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3475 opt.fnTpr, nfiles2, fnPull);
3478 /* Read file that selects the pull group to be used */
3479 if (opt.fnGroupsel != NULL)
3481 readPullGroupSelection(&opt, fninTpr, nfiles);
3484 window = initUmbrellaWindows(nfiles);
3485 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3488 { /* reading pdo files */
3489 if (opt.fnGroupsel != NULL)
3491 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3492 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3494 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3495 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3496 window = initUmbrellaWindows(nfiles);
3497 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3501 /* enforce equal weight for all histograms? */
3504 enforceEqualWeights(window, nwins);
3507 /* write histograms */
3508 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3509 xlabel, "count", opt.oenv);
3510 for (l = 0; l < opt.bins; ++l)
3512 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3513 for (i = 0; i < nwins; ++i)
3515 for (j = 0; j < window[i].nPull; ++j)
3517 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3520 fprintf(histout, "\n");
3522 gmx_ffclose(histout);
3523 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3526 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3530 /* Using tabulated umbrella potential */
3533 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3536 /* Integrated autocorrelation times provided ? */
3537 if (opt.bTauIntGiven)
3539 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3542 /* Compute integrated autocorrelation times */
3543 if (opt.bCalcTauInt)
3545 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3548 /* calc average and sigma for each histogram
3549 (maybe required for bootstrapping. If not, this is fast anyhow) */
3550 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3552 averageSigma(window, nwins);
3555 /* Get initial potential by simple integration */
3556 if (opt.bInitPotByIntegration)
3558 guessPotByIntegration(window, nwins, &opt);
3561 /* Check if complete reaction coordinate is covered */
3562 checkReactionCoordinateCovered(window, nwins, &opt);
3564 /* Calculate profile */
3565 snew(profile, opt.bins);
3573 if ( (i%opt.stepUpdateContrib) == 0)
3575 setup_acc_wham(profile, window, nwins, &opt);
3577 if (maxchange < opt.Tolerance)
3580 /* if (opt.verbose) */
3581 printf("Switched to exact iteration in iteration %d\n", i);
3583 calc_profile(profile, window, nwins, &opt, bExact);
3584 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3586 printf("\t%4d) Maximum change %e\n", i, maxchange);
3590 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3591 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3593 /* calc error from Kumar's formula */
3594 /* Unclear how the error propagates along reaction coordinate, therefore
3596 /* calc_error_kumar(profile,window, nwins,&opt); */
3598 /* Write profile in energy units? */
3601 prof_normalization_and_unit(profile, &opt);
3602 strcpy(ylabel, en_unit_label[opt.unit]);
3603 strcpy(title, "Umbrella potential");
3607 strcpy(ylabel, "Density of states");
3608 strcpy(title, "Density of states");
3611 /* symmetrize profile around z=0? */
3614 symmetrizeProfile(profile, &opt);
3617 /* write profile or density of states */
3618 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3619 for (i = 0; i < opt.bins; ++i)
3621 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3623 gmx_ffclose(profout);
3624 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3626 /* Bootstrap Method */
3629 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3630 opt2fn("-hist", NFILE, fnm),
3631 ylabel, profile, window, nwins, &opt);
3635 freeUmbrellaWindows(window, nfiles);
3637 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3638 please_cite(stdout, "Hub2010");