2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
55 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/utility/smalloc.h"
58 #include "gromacs/math/vec.h"
60 #include "gromacs/fileio/tpxio.h"
62 #include "gromacs/random/random.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/fileio/xvgr.h"
68 #include "gromacs/utility/fatalerror.h"
70 //! longest file names allowed in input files
71 #define WHAM_MAXFILELEN 2048
74 * enum for energy units
77 enSel, en_kJ, en_kCal, en_kT, enNr
80 * enum for type of input files (pdos, tpr, or pullf)
83 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
86 /*! \brief enum for bootstrapping method
88 * These bootstrap methods are supported:
89 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
90 * (bsMethod_BayesianHist)
91 * - bootstrap complete histograms (bsMethod_hist)
92 * - bootstrap trajectories from given umbrella histograms. This generates new
93 * "synthetic" histograms (bsMethod_traj)
94 * - bootstrap trajectories from Gaussian with mu/sigma computed from
95 * the respective histogram (bsMethod_trajGauss). This gives very similar
96 * results compared to bsMethod_traj.
98 * ********************************************************************
99 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
100 * JS Hub, BL de Groot, D van der Spoel
101 * g_wham - A free weighted histogram analysis implementation including
102 * robust error and autocorrelation estimates,
103 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
104 * ********************************************************************
107 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
108 bsMethod_traj, bsMethod_trajGauss
112 //! Parameters of the umbrella potentials
116 * \name Using umbrella pull code since gromacs 4.x
119 int npullcrds; //!< nr of pull coordinates in tpr file
120 int pull_geometry; //!< such as distance, direction
121 ivec pull_dim; //!< pull dimension with geometry distance
122 int pull_ndim; //!< nr of pull_dim != 0
123 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
124 real *k; //!< force constants in tpr file
125 real *init_dist; //!< reference displacements
126 real *umbInitDist; //!< reference displacement in umbrella direction
129 * \name Using PDO files common until gromacs 3.x
137 char PullName[4][256];
139 double UmbCons[4][3];
143 //! Data in the umbrella histograms
146 int nPull; //!< nr of pull groups in this pdo or pullf/x file
147 double **Histo; //!< nPull histograms
148 double **cum; //!< nPull cumulative distribution functions
149 int nBin; //!< nr of bins. identical to opt->bins
150 double *k; //!< force constants for the nPull groups
151 double *pos; //!< umbrella positions for the nPull groups
152 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
153 int *N; //!< nr of data points in nPull histograms.
154 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
156 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
158 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
159 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
162 double *tau; //!< intetrated autocorrelation time (IACT)
163 double *tausmooth; //!< smoothed IACT
165 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
167 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
169 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
171 /*! \brief average force estimated from average displacement, fAv=dzAv*k
173 * Used for integration to guess the potential.
176 real *aver; //!< average of histograms
177 real *sigma; //!< stddev of histograms
178 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
181 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
184 int n; //!< total nr of pull groups in this tpr file
185 int nUse; //!< nr of pull groups used
186 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
189 //! Parameters of WHAM
196 const char *fnTpr, *fnPullf, *fnGroupsel;
197 const char *fnPdo, *fnPullx; //!< file names of input
198 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
199 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
201 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
202 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
203 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
204 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
207 * \name Basic WHAM options
210 int bins; //!< nr of bins, min, max, and dz of profile
212 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
213 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
216 * \name Output control
219 gmx_bool bLog; //!< energy output (instead of probability) for profile
220 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
221 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
222 /*! \brief after wham, set prof to zero at this z-position.
223 * When bootstrapping, set zProf0 to a "stable" reference position.
226 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
228 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
229 gmx_bool bAuto; //!< determine min and max automatically but do not exit
231 gmx_bool verbose; //!< more noisy wham mode
232 int stepchange; //!< print maximum change in prof after how many interations
233 output_env_t oenv; //!< xvgr options
236 * \name Autocorrelation stuff
239 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
240 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
241 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
242 real acTrestart; //!< when computing ACT, time between restarting points
244 /* \brief Enforce the same weight for each umbella window, that is
245 * calculate with the same number of data points for
246 * each window. That can be reasonable, if the histograms
247 * have different length, but due to autocorrelation,
248 * a longer simulation should not have larger weightin wham.
254 * \name Bootstrapping stuff
257 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
259 /* \brief bootstrap method
261 * if == bsMethod_hist, consider complete histograms as independent
262 * data points and, hence, only mix complete histograms.
263 * if == bsMethod_BayesianHist, consider complete histograms
264 * as independent data points, but assign random weights
265 * to the histograms during the bootstrapping ("Bayesian bootstrap")
266 * In case of long correlations (e.g., inside a channel), these
267 * will yield a more realistic error.
268 * if == bsMethod_traj(Gauss), generate synthetic histograms
270 * histogram by generating an autocorrelated random sequence
271 * that is distributed according to the respective given
272 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
273 * (instead of from the umbrella histogram) to generate a new
278 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
281 /* \brief when mixing histograms, mix only histograms withing blocks
282 long the reaction coordinate xi. Avoids gaps along xi. */
283 int histBootStrapBlockLength;
285 int bsSeed; //!< random seed for bootstrapping
287 /* \brief Write cumulative distribution functions (CDFs) of histograms
288 and write the generated histograms for each bootstrap */
292 * \name tabulated umbrella potential stuff
296 double *tabX, *tabY, tabMin, tabMax, tabDz;
299 gmx_rng_t rng; //!< gromacs random number generator
302 //! Make an umbrella window (may contain several histograms)
303 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
305 t_UmbrellaWindow *win;
308 for (i = 0; i < nwin; i++)
310 win[i].Histo = win[i].cum = 0;
311 win[i].k = win[i].pos = win[i].z = 0;
312 win[i].N = win[i].Ntot = 0;
313 win[i].g = win[i].tau = win[i].tausmooth = 0;
317 win[i].aver = win[i].sigma = 0;
323 //! Delete an umbrella window (may contain several histograms)
324 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
327 for (i = 0; i < nwin; i++)
331 for (j = 0; j < win[i].nPull; j++)
333 sfree(win[i].Histo[j]);
338 for (j = 0; j < win[i].nPull; j++)
340 sfree(win[i].cum[j]);
345 for (j = 0; j < win[i].nPull; j++)
347 sfree(win[i].bContrib[j]);
359 sfree(win[i].tausmooth);
360 sfree(win[i].bContrib);
362 sfree(win[i].forceAv);
365 sfree(win[i].bsWeight);
371 * Read and setup tabulated umbrella potential
373 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
378 printf("Setting up tabulated potential from file %s\n", fn);
379 nl = read_xvg(fn, &y, &ny);
383 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
385 opt->tabMin = y[0][0];
386 opt->tabMax = y[0][nl-1];
387 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
390 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
391 "ascending z-direction", fn);
393 for (i = 0; i < nl-1; i++)
395 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
397 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
402 for (i = 0; i < nl; i++)
404 opt->tabX[i] = y[0][i];
405 opt->tabY[i] = y[1][i];
407 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
408 opt->tabMin, opt->tabMax, opt->tabDz);
411 //! Read the header of an PDO file (position, force const, nr of groups)
412 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
415 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
417 std::istringstream ist;
420 if (fgets(line, 2048, file) == NULL)
422 gmx_fatal(FARGS, "Error reading header from pdo file\n");
425 ist >> Buffer0 >> Buffer1 >> Buffer2;
426 if (strcmp(Buffer1, "UMBRELLA"))
428 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
429 "(Found in first line: `%s')\n",
430 Buffer1, "UMBRELLA", line);
432 if (strcmp(Buffer2, "3.0"))
434 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
438 if (fgets(line, 2048, file) == NULL)
440 gmx_fatal(FARGS, "Error reading header from pdo file\n");
443 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
444 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
446 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
447 if (header->nDim != 1)
449 gmx_fatal(FARGS, "Currently only supports one dimension");
453 if (fgets(line, 2048, file) == NULL)
455 gmx_fatal(FARGS, "Error reading header from pdo file\n");
458 ist >> Buffer0 >> Buffer1 >> header->nSkip;
461 if (fgets(line, 2048, file) == NULL)
463 gmx_fatal(FARGS, "Error reading header from pdo file\n");
466 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
469 if (fgets(line, 2048, file) == NULL)
471 gmx_fatal(FARGS, "Error reading header from pdo file\n");
474 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
478 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
482 for (i = 0; i < header->nPull; ++i)
484 if (fgets(line, 2048, file) == NULL)
486 gmx_fatal(FARGS, "Error reading header from pdo file\n");
489 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
490 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
491 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
495 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
496 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
500 if (fgets(line, 2048, file) == NULL)
502 gmx_fatal(FARGS, "Cannot read from file\n");
506 if (strcmp(Buffer3, "#####") != 0)
508 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
513 static char *fgets3(FILE *fp, char ptr[], int *len)
518 if (fgets(ptr, *len-1, fp) == NULL)
523 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
525 /* This line is longer than len characters, let's increase len! */
529 if (fgets(p-1, STRLEN, fp) == NULL)
535 if (ptr[slen-1] == '\n')
543 /*! \brief Read the data columns of and PDO file.
545 * TO DO: Get rid of the scanf function to avoid the clang warning.
546 * At the moment, this warning is avoided by hiding the format string
547 * the variable fmtlf.
549 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
550 int fileno, t_UmbrellaWindow * win,
551 t_UmbrellaOptions *opt,
552 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
554 int i, inttemp, bins, count, ntot;
555 real min, max, minfound = 1e20, maxfound = -1e20;
556 double temp, time, time0 = 0, dt;
558 t_UmbrellaWindow * window = 0;
559 gmx_bool timeok, dt_ok = 1;
560 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
561 int len = STRLEN, dstep = 1;
562 const int blocklen = 4096;
572 /* Need to alocate memory and set up structure */
573 window->nPull = header->nPull;
576 snew(window->Histo, window->nPull);
577 snew(window->z, window->nPull);
578 snew(window->k, window->nPull);
579 snew(window->pos, window->nPull);
580 snew(window->N, window->nPull);
581 snew(window->Ntot, window->nPull);
582 snew(window->g, window->nPull);
583 snew(window->bsWeight, window->nPull);
585 window->bContrib = 0;
587 if (opt->bCalcTauInt)
589 snew(window->ztime, window->nPull);
595 snew(lennow, window->nPull);
597 for (i = 0; i < window->nPull; ++i)
600 window->bsWeight[i] = 1.;
601 snew(window->Histo[i], bins);
602 window->k[i] = header->UmbCons[i][0];
603 window->pos[i] = header->UmbPos[i][0];
607 if (opt->bCalcTauInt)
609 window->ztime[i] = 0;
613 /* Done with setup */
619 min = max = bins = 0; /* Get rid of warnings */
624 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
628 if (ptr[0] == '#' || strlen(ptr) < 2)
633 /* Initiate format string */
635 strcat(fmtign, "%*s");
637 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
638 /* Round time to fs */
639 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
641 /* get time step of pdo file */
651 dstep = static_cast<int>(opt->dt/dt+0.5);
659 window->dt = dt*dstep;
664 dt_ok = ((count-1)%dstep == 0);
665 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
667 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
668 time,opt->tmin, opt->tmax, dt_ok,timeok); */
672 for (i = 0; i < header->nPull; ++i)
675 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
676 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
677 if (sscanf(ptr, fmt, &temp))
679 temp += header->UmbPos[i][0];
693 if (opt->bCalcTauInt)
695 /* save time series for autocorrelation analysis */
696 ntot = window->Ntot[i];
697 if (ntot >= lennow[i])
699 lennow[i] += blocklen;
700 srenew(window->ztime[i], lennow[i]);
702 window->ztime[i][ntot] = temp;
710 inttemp = static_cast<int> (temp);
717 else if (inttemp >= bins)
723 if (inttemp >= 0 && inttemp < bins)
725 window->Histo[i][inttemp] += 1.;
733 if (time > opt->tmax)
737 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
753 /*! \brief Set identical weights for all histograms
755 * Normally, the weight is given by the number data points in each
756 * histogram, together with the autocorrelation time. This can be overwritten
757 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
758 * an appropriate autocorrelation times instead of using this function.
760 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
762 int i, k, j, NEnforced;
765 NEnforced = window[0].Ntot[0];
766 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
767 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
768 /* enforce all histograms to have the same weight as the very first histogram */
770 for (j = 0; j < nWindows; ++j)
772 for (k = 0; k < window[j].nPull; ++k)
774 ratio = 1.0*NEnforced/window[j].Ntot[k];
775 for (i = 0; i < window[0].nBin; ++i)
777 window[j].Histo[k][i] *= ratio;
779 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
784 /*! \brief Simple linear interpolation between two given tabulated points
786 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
789 double pl, pu, dz, dp;
791 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
793 if (jl < 0 || ju >= opt->tabNbins)
795 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
796 "Provide an extended table.", dist, jl, ju);
800 dz = dist-opt->tabX[jl];
801 dp = (pu-pl)*dz/opt->tabDz;
807 * Check which bins substiantially contribute (accelerates WHAM)
809 * Don't worry, that routine does not mean we compute the PMF in limited precision.
810 * After rapid convergence (using only substiantal contributions), we always switch to
813 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
814 t_UmbrellaOptions *opt)
816 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
817 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
818 gmx_bool bAnyContrib;
819 static int bFirst = 1;
820 static double wham_contrib_lim;
824 for (i = 0; i < nWindows; ++i)
826 nGrptot += window[i].nPull;
828 wham_contrib_lim = opt->Tolerance/nGrptot;
831 ztot = opt->max-opt->min;
834 for (i = 0; i < nWindows; ++i)
836 if (!window[i].bContrib)
838 snew(window[i].bContrib, window[i].nPull);
840 for (j = 0; j < window[i].nPull; ++j)
842 if (!window[i].bContrib[j])
844 snew(window[i].bContrib[j], opt->bins);
847 for (k = 0; k < opt->bins; ++k)
849 temp = (1.0*k+0.5)*dz+min;
850 distance = temp - window[i].pos[j]; /* distance to umbrella center */
852 { /* in cyclic wham: */
853 if (distance > ztot_half) /* |distance| < ztot_half */
857 else if (distance < -ztot_half)
862 /* Note: there are two contributions to bin k in the wham equations:
863 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
864 ii) exp(- U/(8.314e-3*opt->Temperature))
865 where U is the umbrella potential
866 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
871 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
875 U = tabulated_pot(distance, opt); /* Use tabulated potential */
878 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
879 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
880 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
881 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
882 if (window[i].bContrib[j][k])
888 /* If this histo is far outside min and max all bContrib may be FALSE,
889 causing a floating point exception later on. To avoid that, switch
893 for (k = 0; k < opt->bins; ++k)
895 window[i].bContrib[j][k] = TRUE;
902 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
903 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
908 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
914 //! Compute the PMF (one of the two main WHAM routines)
915 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
916 t_UmbrellaOptions *opt, gmx_bool bExact)
919 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
920 double denom, U = 0, temp = 0, invg;
922 ztot = opt->max-opt->min;
925 for (i = 0; i < opt->bins; ++i)
928 for (j = 0; j < nWindows; ++j)
930 for (k = 0; k < window[j].nPull; ++k)
932 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
933 temp = (1.0*i+0.5)*dz+min;
934 num += invg*window[j].Histo[k][i];
936 if (!(bExact || window[j].bContrib[k][i]))
940 distance = temp - window[j].pos[k]; /* distance to umbrella center */
942 { /* in cyclic wham: */
943 if (distance > ztot_half) /* |distance| < ztot_half */
947 else if (distance < -ztot_half)
955 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
959 U = tabulated_pot(distance, opt); /* Use tabulated potential */
961 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
964 profile[i] = num/denom;
968 //! Compute the free energy offsets z (one of the two main WHAM routines)
969 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
970 t_UmbrellaOptions *opt, gmx_bool bExact)
973 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
974 double MAX = -1e20, total = 0;
976 ztot = opt->max-opt->min;
979 for (i = 0; i < nWindows; ++i)
981 for (j = 0; j < window[i].nPull; ++j)
984 for (k = 0; k < window[i].nBin; ++k)
986 if (!(bExact || window[i].bContrib[j][k]))
990 temp = (1.0*k+0.5)*dz+min;
991 distance = temp - window[i].pos[j]; /* distance to umbrella center */
993 { /* in cyclic wham: */
994 if (distance > ztot_half) /* |distance| < ztot_half */
998 else if (distance < -ztot_half)
1006 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1010 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1013 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1015 /* Avoid floating point exception if window is far outside min and max */
1018 total = -log(total);
1024 temp = fabs(total - window[i].z[j]);
1029 window[i].z[j] = total;
1035 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1036 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1038 int i, j, bins = opt->bins;
1039 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1042 if (min > 0. || max < 0.)
1044 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1045 opt->min, opt->max);
1050 for (i = 0; i < bins; i++)
1054 /* bin left of zsym */
1055 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1056 if (j >= 0 && (j+1) < bins)
1058 /* interpolate profile linearly between bins j and j+1 */
1059 z1 = min+(j+0.5)*dz;
1061 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1062 /* average between left and right */
1063 prof2[i] = 0.5*(profsym+profile[i]);
1067 prof2[i] = profile[i];
1071 memcpy(profile, prof2, bins*sizeof(double));
1075 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1076 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1079 double unit_factor = 1., R_MolarGasConst, diff;
1081 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1084 /* Not log? Nothing to do! */
1090 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1091 if (opt->unit == en_kT)
1095 else if (opt->unit == en_kJ)
1097 unit_factor = R_MolarGasConst*opt->Temperature;
1099 else if (opt->unit == en_kCal)
1101 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1105 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1108 for (i = 0; i < bins; i++)
1110 if (profile[i] > 0.0)
1112 profile[i] = -log(profile[i])*unit_factor;
1116 /* shift to zero at z=opt->zProf0 */
1117 if (!opt->bProf0Set)
1123 /* Get bin with shortest distance to opt->zProf0
1124 (-0.5 from bin position and +0.5 from rounding cancel) */
1125 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1130 else if (imin >= bins)
1134 diff = profile[imin];
1138 for (i = 0; i < bins; i++)
1144 //! Make an array of random integers (used for bootstrapping)
1145 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1147 int ipull, blockBase, nr, ipullRandom;
1149 if (blockLength == 0)
1151 blockLength = nPull;
1154 for (ipull = 0; ipull < nPull; ipull++)
1156 blockBase = (ipull/blockLength)*blockLength;
1158 { /* make sure nothing bad happens in the last block */
1159 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1160 ipullRandom = blockBase + nr;
1162 while (ipullRandom >= nPull);
1163 if (ipullRandom < 0 || ipullRandom >= nPull)
1165 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1166 "blockLength = %d, blockBase = %d\n",
1167 ipullRandom, nPull, nr, blockLength, blockBase);
1169 randomArray[ipull] = ipullRandom;
1171 /*for (ipull=0; ipull<nPull; ipull++)
1172 printf("%d ",randomArray[ipull]); printf("\n"); */
1175 /*! \brief Set pull group information of a synthetic histogram
1177 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1178 * but it is not required if we bootstrap complete histograms.
1180 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1181 t_UmbrellaWindow *thisWindow, int pullid)
1183 synthWindow->N [0] = thisWindow->N [pullid];
1184 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1185 synthWindow->pos [0] = thisWindow->pos [pullid];
1186 synthWindow->z [0] = thisWindow->z [pullid];
1187 synthWindow->k [0] = thisWindow->k [pullid];
1188 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1189 synthWindow->g [0] = thisWindow->g [pullid];
1190 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1193 /*! \brief Calculate cumulative distribution function of of all histograms.
1195 * This allow to create random number sequences
1196 * which are distributed according to the histograms. Required to generate
1197 * the "synthetic" histograms for the Bootstrap method
1199 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1200 t_UmbrellaOptions *opt, const char *fnhist)
1204 char *fn = 0, *buf = 0;
1207 if (opt->bs_verbose)
1209 snew(fn, strlen(fnhist)+10);
1210 snew(buf, strlen(fnhist)+10);
1211 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1212 fp = xvgropen(fn, "CDFs of umbrella windows", "z", "CDF", opt->oenv);
1216 for (i = 0; i < nWindows; i++)
1218 snew(window[i].cum, window[i].nPull);
1219 for (j = 0; j < window[i].nPull; j++)
1221 snew(window[i].cum[j], nbin+1);
1222 window[i].cum[j][0] = 0.;
1223 for (k = 1; k <= nbin; k++)
1225 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1228 /* normalize CDFs. Ensure cum[nbin]==1 */
1229 last = window[i].cum[j][nbin];
1230 for (k = 0; k <= nbin; k++)
1232 window[i].cum[j][k] /= last;
1237 printf("Cumulative distriubtion functions of all histograms created.\n");
1238 if (opt->bs_verbose)
1240 for (k = 0; k <= nbin; k++)
1242 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1243 for (i = 0; i < nWindows; i++)
1245 for (j = 0; j < window[i].nPull; j++)
1247 fprintf(fp, "%g\t", window[i].cum[j][k]);
1252 printf("Wrote cumulative distribution functions to %s\n", fn);
1260 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1262 * This is used to generate a random sequence distributed according to a histogram
1264 void searchCumulative(double xx[], int n, double x, int *j)
1286 else if (x == xx[n-1])
1296 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1297 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1298 int pullid, t_UmbrellaOptions *opt)
1300 int N, i, nbins, r_index, ibin;
1301 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1304 N = thisWindow->N[pullid];
1305 dt = thisWindow->dt;
1306 nbins = thisWindow->nBin;
1308 /* tau = autocorrelation time */
1309 if (opt->tauBootStrap > 0.0)
1311 tausteps = opt->tauBootStrap/dt;
1313 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1315 /* calc tausteps from g=1+2tausteps */
1316 g = thisWindow->g[pullid];
1322 "When generating hypothetical trajctories from given umbrella histograms,\n"
1323 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1324 "cannot be predicted. You have 3 options:\n"
1325 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1326 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1328 " with option -iiact for all umbrella windows.\n"
1329 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1330 " Use option (3) only if you are sure what you're doing, you may severely\n"
1331 " underestimate the error if a too small ACT is given.\n");
1332 gmx_fatal(FARGS, errstr);
1335 synthWindow->N [0] = N;
1336 synthWindow->pos [0] = thisWindow->pos[pullid];
1337 synthWindow->z [0] = thisWindow->z[pullid];
1338 synthWindow->k [0] = thisWindow->k[pullid];
1339 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1340 synthWindow->g [0] = thisWindow->g [pullid];
1341 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1343 for (i = 0; i < nbins; i++)
1345 synthWindow->Histo[0][i] = 0.;
1348 if (opt->bsMethod == bsMethod_trajGauss)
1350 sig = thisWindow->sigma [pullid];
1351 mu = thisWindow->aver [pullid];
1354 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1356 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1358 z = a*x + sqrt(1-a^2)*y
1359 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1360 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1362 C(t) = exp(-t/tau) with tau=-1/ln(a)
1364 Then, use error function to turn the Gaussian random variable into a uniformly
1365 distributed one in [0,1]. Eventually, use cumulative distribution function of
1366 histogram to get random variables distributed according to histogram.
1367 Note: The ACT of the flat distribution and of the generated histogram is not
1368 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1370 a = exp(-1.0/tausteps);
1372 invsqrt2 = 1./sqrt(2.0);
1374 /* init random sequence */
1375 x = gmx_rng_gaussian_table(opt->rng);
1377 if (opt->bsMethod == bsMethod_traj)
1379 /* bootstrap points from the umbrella histograms */
1380 for (i = 0; i < N; i++)
1382 y = gmx_rng_gaussian_table(opt->rng);
1384 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1385 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1387 r = 0.5*(1+gmx_erf(x*invsqrt2));
1388 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1389 synthWindow->Histo[0][r_index] += 1.;
1392 else if (opt->bsMethod == bsMethod_trajGauss)
1394 /* bootstrap points from a Gaussian with the same average and sigma
1395 as the respective umbrella histogram. The idea was, that -given
1396 limited sampling- the bootstrapped histograms are otherwise biased
1397 from the limited sampling of the US histos. However, bootstrapping from
1398 the Gaussian seems to yield a similar estimate. */
1402 y = gmx_rng_gaussian_table(opt->rng);
1405 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1410 while ( (ibin += nbins) < 0)
1415 else if (ibin >= nbins)
1417 while ( (ibin -= nbins) >= nbins)
1424 if (ibin >= 0 && ibin < nbins)
1426 synthWindow->Histo[0][ibin] += 1.;
1433 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1437 /*! \brief Write all histograms to a file
1439 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1440 * sets of bootstrapped histograms.
1442 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1443 int bs_index, t_UmbrellaOptions *opt)
1445 char *fn = 0, *buf = 0, title[256];
1451 snew(fn, strlen(fnhist)+10);
1452 snew(buf, strlen(fnhist)+1);
1453 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1454 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1458 fn = strdup(fnhist);
1459 strcpy(title, "Umbrella histograms");
1462 fp = xvgropen(fn, title, "z", "count", opt->oenv);
1465 /* Write histograms */
1466 for (l = 0; l < bins; ++l)
1468 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1469 for (i = 0; i < nWindows; ++i)
1471 for (j = 0; j < window[i].nPull; ++j)
1473 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1480 printf("Wrote %s\n", fn);
1488 //! Used for qsort to sort random numbers
1489 int func_wham_is_larger(const void *a, const void *b)
1508 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1509 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1516 /* generate ordered random numbers between 0 and nAllPull */
1517 for (i = 0; i < nAllPull-1; i++)
1519 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1521 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1522 r[nAllPull-1] = 1.0*nAllPull;
1524 synthwin[0].bsWeight[0] = r[0];
1525 for (i = 1; i < nAllPull; i++)
1527 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1530 /* avoid to have zero weight by adding a tiny value */
1531 for (i = 0; i < nAllPull; i++)
1533 if (synthwin[i].bsWeight[0] < 1e-5)
1535 synthwin[i].bsWeight[0] = 1e-5;
1542 //! The main bootstrapping routine
1543 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1544 char* ylabel, double *profile,
1545 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1547 t_UmbrellaWindow * synthWindow;
1548 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1549 int i, j, *randomArray = 0, winid, pullid, ib;
1550 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1552 gmx_bool bExact = FALSE;
1554 /* init random generator */
1555 if (opt->bsSeed == -1)
1557 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1561 opt->rng = gmx_rng_init(opt->bsSeed);
1564 snew(bsProfile, opt->bins);
1565 snew(bsProfiles_av, opt->bins);
1566 snew(bsProfiles_av2, opt->bins);
1568 /* Create array of all pull groups. Note that different windows
1569 may have different nr of pull groups
1570 First: Get total nr of pull groups */
1572 for (i = 0; i < nWindows; i++)
1574 nAllPull += window[i].nPull;
1576 snew(allPull_winId, nAllPull);
1577 snew(allPull_pullId, nAllPull);
1579 /* Setup one array of all pull groups */
1580 for (i = 0; i < nWindows; i++)
1582 for (j = 0; j < window[i].nPull; j++)
1584 allPull_winId[iAllPull] = i;
1585 allPull_pullId[iAllPull] = j;
1590 /* setup stuff for synthetic windows */
1591 snew(synthWindow, nAllPull);
1592 for (i = 0; i < nAllPull; i++)
1594 synthWindow[i].nPull = 1;
1595 synthWindow[i].nBin = opt->bins;
1596 snew(synthWindow[i].Histo, 1);
1597 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1599 snew(synthWindow[i].Histo[0], opt->bins);
1601 snew(synthWindow[i].N, 1);
1602 snew(synthWindow[i].pos, 1);
1603 snew(synthWindow[i].z, 1);
1604 snew(synthWindow[i].k, 1);
1605 snew(synthWindow[i].bContrib, 1);
1606 snew(synthWindow[i].g, 1);
1607 snew(synthWindow[i].bsWeight, 1);
1610 switch (opt->bsMethod)
1613 snew(randomArray, nAllPull);
1614 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1615 please_cite(stdout, "Hub2006");
1617 case bsMethod_BayesianHist:
1618 /* just copy all histogams into synthWindow array */
1619 for (i = 0; i < nAllPull; i++)
1621 winid = allPull_winId [i];
1622 pullid = allPull_pullId[i];
1623 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1627 case bsMethod_trajGauss:
1628 calc_cumulatives(window, nWindows, opt, fnhist);
1631 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1634 /* do bootstrapping */
1635 fp = xvgropen(fnprof, "Boot strap profiles", "z", ylabel, opt->oenv);
1636 for (ib = 0; ib < opt->nBootStrap; ib++)
1638 printf(" *******************************************\n"
1639 " ******** Start bootstrap nr %d ************\n"
1640 " *******************************************\n", ib+1);
1642 switch (opt->bsMethod)
1645 /* bootstrap complete histograms from given histograms */
1646 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1647 for (i = 0; i < nAllPull; i++)
1649 winid = allPull_winId [randomArray[i]];
1650 pullid = allPull_pullId[randomArray[i]];
1651 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1654 case bsMethod_BayesianHist:
1655 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1656 setRandomBsWeights(synthWindow, nAllPull, opt);
1659 case bsMethod_trajGauss:
1660 /* create new histos from given histos, that is generate new hypothetical
1662 for (i = 0; i < nAllPull; i++)
1664 winid = allPull_winId[i];
1665 pullid = allPull_pullId[i];
1666 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1671 /* write histos in case of verbose output */
1672 if (opt->bs_verbose)
1674 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1681 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1684 if ( (i%opt->stepUpdateContrib) == 0)
1686 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1688 if (maxchange < opt->Tolerance)
1692 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1694 printf("\t%4d) Maximum change %e\n", i, maxchange);
1696 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1699 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1700 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1704 prof_normalization_and_unit(bsProfile, opt);
1707 /* symmetrize profile around z=0 */
1710 symmetrizeProfile(bsProfile, opt);
1713 /* save stuff to get average and stddev */
1714 for (i = 0; i < opt->bins; i++)
1717 bsProfiles_av[i] += tmp;
1718 bsProfiles_av2[i] += tmp*tmp;
1719 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1725 /* write average and stddev */
1726 fp = xvgropen(fnres, "Average and stddev from bootstrapping", "z", ylabel, opt->oenv);
1727 fprintf(fp, "@TYPE xydy\n");
1728 for (i = 0; i < opt->bins; i++)
1730 bsProfiles_av [i] /= opt->nBootStrap;
1731 bsProfiles_av2[i] /= opt->nBootStrap;
1732 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1733 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1734 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1737 printf("Wrote boot strap result to %s\n", fnres);
1740 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1741 int whaminFileType(char *fn)
1745 if (strcmp(fn+len-3, "tpr") == 0)
1749 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1751 return whamin_pullxf;
1753 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1759 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1761 return whamin_unknown;
1764 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1765 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1766 t_UmbrellaOptions *opt)
1768 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1769 int nread, sizenow, i, block = 1;
1772 fp = gmx_ffopen(fn, "r");
1775 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1777 if (strlen(tmp) >= WHAM_MAXFILELEN)
1779 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1781 if (nread >= sizenow)
1784 srenew(filename, sizenow);
1785 for (i = sizenow-block; i < sizenow; i++)
1787 snew(filename[i], WHAM_MAXFILELEN);
1790 /* remove newline if there is one */
1791 if (tmp[strlen(tmp)-1] == '\n')
1793 tmp[strlen(tmp)-1] = '\0';
1795 strcpy(filename[nread], tmp);
1798 printf("Found file %s in %s\n", filename[nread], fn);
1802 *filenamesRet = filename;
1806 //! Open a file or a pipe to a gzipped file
1807 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1809 char Buffer[1024], gunzip[1024], *Path = 0;
1811 static gmx_bool bFirst = 1;
1813 /* gzipped pdo file? */
1814 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1816 /* search gunzip executable */
1817 if (!(Path = getenv("GMX_PATH_GZIP")))
1819 if (gmx_fexist("/bin/gunzip"))
1821 sprintf(gunzip, "%s", "/bin/gunzip");
1823 else if (gmx_fexist("/usr/bin/gunzip"))
1825 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1829 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1830 "You may want to define the path to gunzip "
1831 "with the environment variable GMX_PATH_GZIP.", gunzip);
1836 sprintf(gunzip, "%s/gunzip", Path);
1837 if (!gmx_fexist(gunzip))
1839 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1840 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1845 printf("Using gunzig executable %s\n", gunzip);
1848 if (!gmx_fexist(fn))
1850 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1852 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1855 printf("Executing command '%s'\n", Buffer);
1858 if ((pipe = popen(Buffer, "r")) == NULL)
1860 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1863 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1869 pipe = gmx_ffopen(fn, "r");
1876 //! Close file or pipe
1877 void pdo_close_file(FILE *fp)
1886 //! Reading all pdo files
1887 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1888 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1891 real mintmp, maxtmp, done = 0.;
1894 /* char Buffer0[1000]; */
1898 gmx_fatal(FARGS, "No files found. Hick.");
1901 /* if min and max are not given, get min and max from the input files */
1904 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1907 for (i = 0; i < nfiles; ++i)
1909 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1910 /*fgets(Buffer0,999,file);
1911 fprintf(stderr,"First line '%s'\n",Buffer0); */
1912 done = 100.0*(i+1)/nfiles;
1913 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1918 read_pdo_header(file, header, opt);
1919 /* here only determine min and max of this window */
1920 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1921 if (maxtmp > opt->max)
1925 if (mintmp < opt->min)
1931 pdo_close_file(file);
1939 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1940 if (opt->bBoundsOnly)
1942 printf("Found option -boundsonly, now exiting.\n");
1946 /* store stepsize in profile */
1947 opt->dz = (opt->max-opt->min)/opt->bins;
1949 /* Having min and max, we read in all files */
1950 /* Loop over all files */
1951 for (i = 0; i < nfiles; ++i)
1953 done = 100.0*(i+1)/nfiles;
1954 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1959 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1960 read_pdo_header(file, header, opt);
1961 /* load data into window */
1962 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1963 if ((window+i)->Ntot[0] == 0)
1965 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1969 pdo_close_file(file);
1977 for (i = 0; i < nfiles; ++i)
1984 //! translate 0/1 to N/Y to write pull dimensions
1985 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1987 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1988 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
1993 static int first = 1;
1995 /* printf("Reading %s \n",fn); */
1996 read_tpx_state(fn, &ir, &state, NULL, NULL);
1998 if (ir.ePull != epullUMBRELLA)
2000 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
2001 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
2004 /* nr of pull groups */
2005 ncrd = ir.pull->ncoord;
2008 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull coordinates\n", ncrd);
2011 header->npullcrds = ir.pull->ncoord;
2012 header->pull_geometry = ir.pull->eGeom;
2013 header->bPrintRef = ir.pull->bPrintRef;
2014 copy_ivec(ir.pull->dim, header->pull_dim);
2015 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2016 snew(header->k, ncrd);
2017 snew(header->init_dist, ncrd);
2018 snew(header->umbInitDist, ncrd);
2020 /* only z-direction with epullgCYL? */
2021 if (header->pull_geometry == epullgCYL)
2023 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2025 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2026 "However, found dimensions [%s %s %s]\n",
2027 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2028 int2YN(header->pull_dim[ZZ]));
2032 for (i = 0; i < ncrd; i++)
2034 header->k[i] = ir.pull->coord[i].k;
2035 if (header->k[i] == 0.0)
2037 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2038 "That doesn't seem to be an Umbrella tpr.\n",
2041 header->init_dist[i] = ir.pull->coord[i].init;
2043 /* initial distance to reference */
2044 switch (header->pull_geometry)
2047 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2051 header->umbInitDist[i] = header->init_dist[i];
2054 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2058 if (opt->verbose || first)
2060 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2061 "\tPull group coordinates%s expected in pullx files.\n",
2062 fn, header->npullcrds, epullg_names[header->pull_geometry],
2063 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2064 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2065 for (i = 0; i < ncrd; i++)
2067 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2070 if (!opt->verbose && first)
2072 printf("\tUse option -v to see this output for all input tpr files\n\n");
2078 //! 2-norm in a ndim-dimensional space
2079 double dist_ndim(double **dx, int ndim, int line)
2083 for (i = 0; i < ndim; i++)
2085 r2 += sqr(dx[i][line]);
2090 //! Read pullx.xvg or pullf.xvg
2091 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2092 t_UmbrellaWindow * window,
2093 t_UmbrellaOptions *opt,
2094 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2095 t_groupselection *groupsel)
2097 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2098 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2099 real min, max, minfound = 1e20, maxfound = -1e20;
2100 gmx_bool dt_ok, timeok, bHaveForce;
2101 const char *quantity;
2102 const int blocklen = 4096;
2104 static gmx_bool bFirst = TRUE;
2107 * Data columns in pull output:
2108 * - in force output pullf.xvg:
2109 * No reference columns, one column per pull coordinate
2111 * - in position output pullx.xvg
2112 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2113 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2116 nColPerCrd = opt->bPullx ? header->pull_ndim : 1;
2117 quantity = opt->bPullx ? "position" : "force";
2119 if (opt->bPullx && header->bPrintRef)
2121 nColRefEachCrd = header->pull_ndim;
2128 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2129 bHaveForce = opt->bPullf;
2131 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2132 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2133 Sorry for the laziness, this is a To-do. */
2134 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2137 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2138 "reading \n(option -if) is supported at present, "
2139 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2140 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2141 epullg_names[header->pull_geometry]);
2144 nt = read_xvg(fn, &y, &ny);
2146 /* Check consistency */
2149 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2153 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2154 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2156 printf("Expecting these columns in pull file:\n"
2157 "\t%d reference columns for each individual pull coordinate\n"
2158 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2159 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullcrds, nColExpect);
2162 if (ny != nColExpect)
2164 gmx_fatal(FARGS, "Found %d pull coodinates in %s,\n but %d data columns in %s (expected %d)\n"
2165 "\nMaybe you confused options -ix and -if ?\n",
2166 header->npullcrds, fntpr, ny-1, fn, nColExpect-1);
2171 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2181 window->dt = y[0][1]-y[0][0];
2183 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2185 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2188 /* Need to alocate memory and set up structure */
2192 /* Use only groups selected with option -is file */
2193 if (header->npullcrds != groupsel->n)
2195 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2196 header->npullcrds, groupsel->n);
2198 window->nPull = groupsel->nUse;
2202 window->nPull = header->npullcrds;
2205 window->nBin = bins;
2206 snew(window->Histo, window->nPull);
2207 snew(window->z, window->nPull);
2208 snew(window->k, window->nPull);
2209 snew(window->pos, window->nPull);
2210 snew(window->N, window->nPull);
2211 snew(window->Ntot, window->nPull);
2212 snew(window->g, window->nPull);
2213 snew(window->bsWeight, window->nPull);
2214 window->bContrib = 0;
2216 if (opt->bCalcTauInt)
2218 snew(window->ztime, window->nPull);
2222 window->ztime = NULL;
2224 snew(lennow, window->nPull);
2226 for (g = 0; g < window->nPull; ++g)
2229 window->bsWeight[g] = 1.;
2230 snew(window->Histo[g], bins);
2232 window->Ntot[g] = 0;
2234 if (opt->bCalcTauInt)
2236 window->ztime[g] = NULL;
2240 /* Copying umbrella center and force const is more involved since not
2241 all pull groups from header (tpr file) may be used in window variable */
2242 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2244 if (groupsel && (groupsel->bUse[g] == FALSE))
2248 window->k[gUsed] = header->k[g];
2249 window->pos[gUsed] = header->umbInitDist[g];
2254 { /* only determine min and max */
2257 min = max = bins = 0; /* Get rid of warnings */
2261 for (i = 0; i < nt; i++)
2263 /* Do you want that time frame? */
2264 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2266 /* get time step of pdo file and get dstep from opt->dt */
2276 dstep = static_cast<int>(opt->dt/dt+0.5);
2284 window->dt = dt*dstep;
2288 dt_ok = (i%dstep == 0);
2289 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2291 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2292 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2295 /* Note: if groupsel == NULL:
2296 * all groups in pullf/x file are stored in this window, and gUsed == g
2297 * if groupsel != NULL:
2298 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2301 for (g = 0; g < header->npullcrds; ++g)
2303 /* was this group selected for application in WHAM? */
2304 if (groupsel && (groupsel->bUse[g] == FALSE))
2313 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2315 pos = -force/header->k[g] + header->umbInitDist[g];
2319 /* pick the right column from:
2320 * time ref1[ndim] coord1[ndim] ref2[ndim] coord2[ndim] ...
2322 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2323 switch (header->pull_geometry)
2326 pos = dist_ndim(y + column, header->pull_ndim, i);
2332 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2336 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2350 if (gUsed >= window->nPull)
2352 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2353 gUsed, window->nPull);
2356 if (opt->bCalcTauInt && !bGetMinMax)
2358 /* save time series for autocorrelation analysis */
2359 ntot = window->Ntot[gUsed];
2360 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2361 if (ntot >= lennow[gUsed])
2363 lennow[gUsed] += blocklen;
2364 srenew(window->ztime[gUsed], lennow[gUsed]);
2366 window->ztime[gUsed][ntot] = pos;
2369 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2374 while ( (ibin += bins) < 0)
2379 else if (ibin >= bins)
2381 while ( (ibin -= bins) >= bins)
2387 if (ibin >= 0 && ibin < bins)
2389 window->Histo[gUsed][ibin] += 1.;
2392 window->Ntot[gUsed]++;
2396 else if (t > opt->tmax)
2400 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2412 for (i = 0; i < ny; i++)
2418 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2419 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2420 t_UmbrellaHeader* header,
2421 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2424 real mintmp, maxtmp;
2426 printf("Reading %d tpr and pullf files\n", nfiles/2);
2428 /* min and max not given? */
2431 printf("Automatic determination of boundaries...\n");
2434 for (i = 0; i < nfiles; i++)
2436 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2438 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2440 read_tpr_header(fnTprs[i], header, opt);
2441 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2443 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2445 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2446 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2447 if (maxtmp > opt->max)
2451 if (mintmp < opt->min)
2456 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2457 if (opt->bBoundsOnly)
2459 printf("Found option -boundsonly, now exiting.\n");
2463 /* store stepsize in profile */
2464 opt->dz = (opt->max-opt->min)/opt->bins;
2466 for (i = 0; i < nfiles; i++)
2468 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2470 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2472 read_tpr_header(fnTprs[i], header, opt);
2473 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2475 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2477 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2478 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2479 if (window[i].Ntot[0] == 0)
2481 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2485 for (i = 0; i < nfiles; i++)
2494 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2496 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2497 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2499 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2501 int nlines, ny, i, ig;
2504 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2505 nlines = read_xvg(fn, &iact, &ny);
2506 if (nlines != nwins)
2508 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2511 for (i = 0; i < nlines; i++)
2513 if (window[i].nPull != ny)
2515 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2516 "number of pull groups is different in different simulations. That is not\n"
2517 "supported yet. Sorry.\n");
2519 for (ig = 0; ig < window[i].nPull; ig++)
2521 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2522 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2524 if (iact[ig][i] <= 0.0)
2526 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2533 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2536 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2537 * that -in case of limited sampling- the ACT may be severely underestimated.
2538 * Note: the g=1+2tau are overwritten.
2539 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2542 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2545 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2547 /* only evaluate within +- 3sigma of the Gausian */
2548 siglim = 3.0*opt->sigSmoothIact;
2549 siglim2 = dsqr(siglim);
2550 /* pre-factor of Gaussian */
2551 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2552 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2554 for (i = 0; i < nwins; i++)
2556 snew(window[i].tausmooth, window[i].nPull);
2557 for (ig = 0; ig < window[i].nPull; ig++)
2561 pos = window[i].pos[ig];
2562 for (j = 0; j < nwins; j++)
2564 for (jg = 0; jg < window[j].nPull; jg++)
2566 dpos2 = dsqr(window[j].pos[jg]-pos);
2567 if (dpos2 < siglim2)
2569 w = gaufact*exp(-dpos2*invtwosig2);
2571 tausm += w*window[j].tau[jg];
2572 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2573 w,dpos2,pos,gaufact,invtwosig2); */
2578 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2580 window[i].tausmooth[ig] = tausm;
2584 window[i].tausmooth[ig] = window[i].tau[ig];
2586 window[i].g[ig] = 1+2*tausm/window[i].dt;
2591 //! Stop integrating autoccorelation function when ACF drops under this value
2592 #define WHAM_AC_ZERO_LIMIT 0.05
2594 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2596 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2597 t_UmbrellaOptions *opt, const char *fn)
2599 int i, ig, ncorr, ntot, j, k, *count, restart;
2600 real *corr, c0, dt, tmp;
2601 real *ztime, av, tausteps;
2602 FILE *fp, *fpcorr = 0;
2606 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2607 "time [ps]", "autocorrelation function", opt->oenv);
2611 for (i = 0; i < nwins; i++)
2613 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2615 ntot = window[i].Ntot[0];
2617 /* using half the maximum time as length of autocorrelation function */
2621 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2622 " points. Provide more pull data!", ntot);
2625 /* snew(corrSq,ncorr); */
2628 snew(window[i].tau, window[i].nPull);
2629 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2635 for (ig = 0; ig < window[i].nPull; ig++)
2637 if (ntot != window[i].Ntot[ig])
2639 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2640 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2642 ztime = window[i].ztime[ig];
2644 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2645 for (j = 0, av = 0; (j < ntot); j++)
2650 for (k = 0; (k < ncorr); k++)
2655 for (j = 0; (j < ntot); j += restart)
2657 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2659 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2661 /* corrSq[k] += tmp*tmp; */
2665 /* divide by nr of frames for each time displacement */
2666 for (k = 0; (k < ncorr); k++)
2668 /* count probably = (ncorr-k+(restart-1))/restart; */
2669 corr[k] = corr[k]/count[k];
2670 /* variance of autocorrelation function */
2671 /* corrSq[k]=corrSq[k]/count[k]; */
2673 /* normalize such that corr[0] == 0 */
2675 for (k = 0; (k < ncorr); k++)
2678 /* corrSq[k]*=c0*c0; */
2681 /* write ACFs in verbose mode */
2684 for (k = 0; (k < ncorr); k++)
2686 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2688 fprintf(fpcorr, "&\n");
2691 /* esimate integrated correlation time, fitting is too unstable */
2692 tausteps = 0.5*corr[0];
2693 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2694 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2696 tausteps += corr[j];
2699 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2700 Kumar et al, eq. 28 ff. */
2701 window[i].tau[ig] = tausteps*dt;
2702 window[i].g[ig] = 1+2*tausteps;
2703 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2711 gmx_ffclose(fpcorr);
2714 /* plot IACT along reaction coordinate */
2715 fp = xvgropen(fn, "Integrated autocorrelation times", "z", "IACT [ps]", opt->oenv);
2716 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2717 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2718 for (i = 0; i < nwins; i++)
2720 fprintf(fp, "# %3d ", i);
2721 for (ig = 0; ig < window[i].nPull; ig++)
2723 fprintf(fp, " %11g", window[i].tau[ig]);
2727 for (i = 0; i < nwins; i++)
2729 for (ig = 0; ig < window[i].nPull; ig++)
2731 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2734 if (opt->sigSmoothIact > 0.0)
2736 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2737 opt->sigSmoothIact);
2738 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2739 smoothIact(window, nwins, opt);
2740 fprintf(fp, "&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2741 fprintf(fp, "@ s1 symbol color 2\n");
2742 for (i = 0; i < nwins; i++)
2744 for (ig = 0; ig < window[i].nPull; ig++)
2746 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2751 printf("Wrote %s\n", fn);
2755 * compute average and sigma of each umbrella histogram
2757 void averageSigma(t_UmbrellaWindow *window, int nwins)
2760 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2762 for (i = 0; i < nwins; i++)
2764 snew(window[i].aver, window[i].nPull);
2765 snew(window[i].sigma, window[i].nPull);
2767 ntot = window[i].Ntot[0];
2768 for (ig = 0; ig < window[i].nPull; ig++)
2770 ztime = window[i].ztime[ig];
2771 for (k = 0, av = 0.; k < ntot; k++)
2776 for (k = 0, sum2 = 0.; k < ntot; k++)
2781 sig = sqrt(sum2/ntot);
2782 window[i].aver[ig] = av;
2784 /* Note: This estimate for sigma is biased from the limited sampling.
2785 Correct sigma by n/(n-1) where n = number of independent
2786 samples. Only possible if IACT is known.
2790 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2791 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2795 window[i].sigma[ig] = sig;
2797 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2804 * Use histograms to compute average force on pull group.
2806 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2808 int i, j, bins = opt->bins, k;
2809 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2812 dz = (max-min)/bins;
2813 ztot = opt->max-min;
2816 /* Compute average displacement from histograms */
2817 for (j = 0; j < nWindows; ++j)
2819 snew(window[j].forceAv, window[j].nPull);
2820 for (k = 0; k < window[j].nPull; ++k)
2824 for (i = 0; i < opt->bins; ++i)
2826 temp = (1.0*i+0.5)*dz+min;
2827 distance = temp - window[j].pos[k];
2829 { /* in cyclic wham: */
2830 if (distance > ztot_half) /* |distance| < ztot_half */
2834 else if (distance < -ztot_half)
2839 w = window[j].Histo[k][i]/window[j].g[k];
2840 displAv += w*distance;
2842 /* Are we near min or max? We are getting wrong forces from the histgrams since
2843 the histograms are zero outside [min,max). Therefore, assume that the position
2844 on the other side of the histomgram center is equally likely. */
2847 posmirrored = window[j].pos[k]-distance;
2848 if (posmirrored >= max || posmirrored < min)
2850 displAv += -w*distance;
2857 /* average force from average displacement */
2858 window[j].forceAv[k] = displAv*window[j].k[k];
2859 /* sigma from average square displacement */
2860 /* window[j].sigma [k] = sqrt(displAv2); */
2861 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2867 * Check if the complete reaction coordinate is covered by the histograms
2869 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2870 t_UmbrellaOptions *opt)
2872 int i, ig, j, bins = opt->bins, bBoundary;
2873 real avcount = 0, z, relcount, *count;
2874 snew(count, opt->bins);
2876 for (j = 0; j < opt->bins; ++j)
2878 for (i = 0; i < nwins; i++)
2880 for (ig = 0; ig < window[i].nPull; ig++)
2882 count[j] += window[i].Histo[ig][j];
2885 avcount += 1.0*count[j];
2888 for (j = 0; j < bins; ++j)
2890 relcount = count[j]/avcount;
2891 z = (j+0.5)*opt->dz+opt->min;
2892 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2893 /* check for bins with no data */
2896 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2897 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2899 /* and check for poor sampling */
2900 else if (relcount < 0.005 && !bBoundary)
2902 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2908 /*! \brief Compute initial potential by integrating the average force
2910 * This speeds up the convergence by roughly a factor of 2
2912 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2914 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2915 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2918 dz = (opt->max-min)/bins;
2920 printf("Getting initial potential by integration.\n");
2922 /* Compute average displacement from histograms */
2923 computeAverageForce(window, nWindows, opt);
2925 /* Get force for each bin from all histograms in this bin, or, alternatively,
2926 if no histograms are inside this bin, from the closest histogram */
2929 for (j = 0; j < opt->bins; ++j)
2931 pos = (1.0*j+0.5)*dz+min;
2935 groupmin = winmin = 0;
2936 for (i = 0; i < nWindows; i++)
2938 for (ig = 0; ig < window[i].nPull; ig++)
2940 hispos = window[i].pos[ig];
2941 dist = fabs(hispos-pos);
2942 /* average force within bin */
2946 fAv += window[i].forceAv[ig];
2948 /* at the same time, rememer closest histogram */
2957 /* if no histogram found in this bin, use closest histogram */
2964 fAv = window[winmin].forceAv[groupmin];
2968 for (j = 1; j < opt->bins; ++j)
2970 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2973 /* cyclic wham: linearly correct possible offset */
2976 diff = (pot[bins-1]-pot[0])/(bins-1);
2977 for (j = 1; j < opt->bins; ++j)
2984 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", "z", "PMF [kJ/mol]", opt->oenv);
2985 for (j = 0; j < opt->bins; ++j)
2987 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
2990 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
2993 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2994 energy offsets which are usually determined by wham
2995 First: turn pot into probabilities:
2997 for (j = 0; j < opt->bins; ++j)
2999 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3001 calc_z(pot, window, nWindows, opt, TRUE);
3007 //! Count number of words in an line
3008 static int wordcount(char *ptr)
3013 if (strlen(ptr) == 0)
3017 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3019 for (i = 0; (ptr[i] != '\0'); i++)
3021 is[cur] = isspace(ptr[i]);
3022 if ((i > 0) && (is[cur] && !is[1-cur]))
3031 /*! \brief Read input file for pull group selection (option -is)
3033 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3035 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3038 int i, iline, n, len = STRLEN, temp;
3039 char *ptr = 0, *tmpbuf = 0;
3040 char fmt[1024], fmtign[1024];
3041 int block = 1, sizenow;
3043 fp = gmx_ffopen(opt->fnGroupsel, "r");
3044 opt->groupsel = NULL;
3049 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3054 if (iline >= sizenow)
3057 srenew(opt->groupsel, sizenow);
3059 opt->groupsel[iline].n = n;
3060 opt->groupsel[iline].nUse = 0;
3061 snew(opt->groupsel[iline].bUse, n);
3064 for (i = 0; i < n; i++)
3066 strcpy(fmt, fmtign);
3068 if (sscanf(ptr, fmt, &temp))
3070 opt->groupsel[iline].bUse[i] = (temp > 0);
3071 if (opt->groupsel[iline].bUse[i])
3073 opt->groupsel[iline].nUse++;
3076 strcat(fmtign, "%*s");
3080 opt->nGroupsel = iline;
3081 if (nTpr != opt->nGroupsel)
3083 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3087 printf("\nUse only these pull groups:\n");
3088 for (iline = 0; iline < nTpr; iline++)
3090 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3091 for (i = 0; i < opt->groupsel[iline].n; i++)
3093 if (opt->groupsel[iline].bUse[i])
3106 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3108 //! Number of elements in fnm (used for command line parsing)
3109 #define NFILE asize(fnm)
3111 //! The main g_wham routine
3112 int gmx_wham(int argc, char *argv[])
3114 const char *desc[] = {
3115 "[THISMODULE] is an analysis program that implements the Weighted",
3116 "Histogram Analysis Method (WHAM). It is intended to analyze",
3117 "output files generated by umbrella sampling simulations to ",
3118 "compute a potential of mean force (PMF). [PAR] ",
3119 "At present, three input modes are supported.[BR]",
3120 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3121 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3122 " AND, with option [TT]-ix[tt], a file which contains file names of",
3123 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3124 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3125 " first pullx, etc.[BR]",
3126 "[TT]*[tt] Same as the previous input mode, except that the the user",
3127 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3128 " From the pull force the position in the umbrella potential is",
3129 " computed. This does not work with tabulated umbrella potentials.[BR]"
3130 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3131 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3132 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3133 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [TT].pdo[tt] file header",
3134 " must be similar to the following:[PAR]",
3135 "[TT]# UMBRELLA 3.0[BR]",
3136 "# Component selection: 0 0 1[BR]",
3138 "# Ref. Group 'TestAtom'[BR]",
3139 "# Nr. of pull groups 2[BR]",
3140 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3141 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3143 "The number of pull groups, umbrella positions, force constants, and names ",
3144 "may (of course) differ. Following the header, a time column and ",
3145 "a data column for each pull group follows (i.e. the displacement",
3146 "with respect to the umbrella center). Up to four pull groups are possible ",
3147 "per [TT].pdo[tt] file at present.[PAR]",
3148 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3149 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3150 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3151 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3152 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3153 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3154 "used, groupsel.dat looks like this:[BR]",
3158 "By default, the output files are[BR]",
3159 " [TT]-o[tt] PMF output file[BR]",
3160 " [TT]-hist[tt] Histograms output file[BR]",
3161 "Always check whether the histograms sufficiently overlap.[PAR]",
3162 "The umbrella potential is assumed to be harmonic and the force constants are ",
3163 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3164 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3165 "WHAM OPTIONS[BR]------------[BR]",
3166 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3167 " [TT]-temp[tt] Temperature in the simulations[BR]",
3168 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3169 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3170 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3171 "The data points that are used to compute the profile",
3172 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3173 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3174 "umbrella window.[PAR]",
3175 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3176 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3177 "With energy output, the energy in the first bin is defined to be zero. ",
3178 "If you want the free energy at a different ",
3179 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3180 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3181 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3182 "[THISMODULE] will make use of the",
3183 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3184 "reaction coordinate will assumed be be neighbors.[PAR]",
3185 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3186 "which may be useful for, e.g. membranes.[PAR]",
3187 "AUTOCORRELATIONS[BR]----------------[BR]",
3188 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3189 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3190 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3191 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3192 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3193 "Because the IACTs can be severely underestimated in case of limited ",
3194 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3195 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3196 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3197 "integration of the ACFs while the ACFs are larger 0.05.",
3198 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3199 "less robust) method such as fitting to a double exponential, you can ",
3200 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3201 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3202 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3203 "ERROR ANALYSIS[BR]--------------[BR]",
3204 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3205 "otherwise the statistical error may be substantially underestimated. ",
3206 "More background and examples for the bootstrap technique can be found in ",
3207 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3208 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3209 "Four bootstrapping methods are supported and ",
3210 "selected with [TT]-bs-method[tt].[BR]",
3211 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3212 "data points, and the bootstrap is carried out by assigning random weights to the ",
3213 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3214 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3215 "statistical error is underestimated.[BR]",
3216 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3217 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3218 "(allowing duplication, i.e. sampling with replacement).",
3219 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3220 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3221 "divided into blocks and only histograms within each block are mixed. Note that ",
3222 "the histograms within each block must be representative for all possible histograms, ",
3223 "otherwise the statistical error is underestimated.[BR]",
3224 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3225 "such that the generated data points are distributed according the given histograms ",
3226 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3227 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3228 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3229 "Note that this method may severely underestimate the error in case of limited sampling, ",
3230 "that is if individual histograms do not represent the complete phase space at ",
3231 "the respective positions.[BR]",
3232 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3233 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3234 "and width of the umbrella histograms. That method yields similar error estimates ",
3235 "like method [TT]traj[tt].[PAR]"
3236 "Bootstrapping output:[BR]",
3237 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3238 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3239 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3240 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3244 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3245 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3246 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3248 static t_UmbrellaOptions opt;
3251 { "-min", FALSE, etREAL, {&opt.min},
3252 "Minimum coordinate in profile"},
3253 { "-max", FALSE, etREAL, {&opt.max},
3254 "Maximum coordinate in profile"},
3255 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3256 "Determine min and max automatically"},
3257 { "-bins", FALSE, etINT, {&opt.bins},
3258 "Number of bins in profile"},
3259 { "-temp", FALSE, etREAL, {&opt.Temperature},
3261 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3263 { "-v", FALSE, etBOOL, {&opt.verbose},
3265 { "-b", FALSE, etREAL, {&opt.tmin},
3266 "First time to analyse (ps)"},
3267 { "-e", FALSE, etREAL, {&opt.tmax},
3268 "Last time to analyse (ps)"},
3269 { "-dt", FALSE, etREAL, {&opt.dt},
3270 "Analyse only every dt ps"},
3271 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3272 "Write histograms and exit"},
3273 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3274 "Determine min and max and exit (with [TT]-auto[tt])"},
3275 { "-log", FALSE, etBOOL, {&opt.bLog},
3276 "Calculate the log of the profile before printing"},
3277 { "-unit", FALSE, etENUM, {en_unit},
3278 "Energy unit in case of log output" },
3279 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3280 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3281 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3282 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3283 { "-sym", FALSE, etBOOL, {&opt.bSym},
3284 "Symmetrize profile around z=0"},
3285 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3286 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3287 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3288 "Calculate integrated autocorrelation times and use in wham"},
3289 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3290 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3291 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3292 "When computing autocorrelation functions, restart computing every .. (ps)"},
3293 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3294 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3295 "during smoothing"},
3296 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3297 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3298 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3299 "Bootstrap method" },
3300 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3301 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3302 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3303 "Seed for bootstrapping. (-1 = use time)"},
3304 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3305 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3306 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3307 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3308 { "-stepout", FALSE, etINT, {&opt.stepchange},
3309 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3310 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3311 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3315 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3316 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3317 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3318 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3319 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3320 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3321 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3322 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3323 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3324 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3325 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3326 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3329 int i, j, l, nfiles, nwins, nfiles2;
3330 t_UmbrellaHeader header;
3331 t_UmbrellaWindow * window = NULL;
3332 double *profile, maxchange = 1e20;
3333 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3334 char **fninTpr, **fninPull, **fninPdo;
3336 FILE *histout, *profout;
3337 char ylabel[256], title[256];
3340 opt.verbose = FALSE;
3341 opt.bHistOnly = FALSE;
3351 /* bootstrapping stuff */
3353 opt.bsMethod = bsMethod_hist;
3354 opt.tauBootStrap = 0.0;
3356 opt.histBootStrapBlockLength = 8;
3357 opt.bs_verbose = FALSE;
3362 opt.Temperature = 298;
3363 opt.Tolerance = 1e-6;
3364 opt.bBoundsOnly = FALSE;
3366 opt.bCalcTauInt = FALSE;
3367 opt.sigSmoothIact = 0.0;
3368 opt.bAllowReduceIact = TRUE;
3369 opt.bInitPotByIntegration = TRUE;
3370 opt.acTrestart = 1.0;
3371 opt.stepchange = 100;
3372 opt.stepUpdateContrib = 100;
3374 if (!parse_common_args(&argc, argv, PCA_BE_NICE,
3375 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3380 opt.unit = nenum(en_unit);
3381 opt.bsMethod = nenum(en_bsMethod);
3383 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3385 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3386 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3387 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3388 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3389 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3390 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3391 if (opt.bTab && opt.bPullf)
3393 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3394 "Provide pullx.xvg or pdo files!");
3397 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3399 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3401 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3403 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3404 "\n\n Check g_wham -h !");
3407 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3408 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3409 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3410 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3411 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3413 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3414 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3415 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3416 if ( (bMinSet || bMaxSet) && bAutoSet)
3418 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3421 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3423 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3426 if (bMinSet && opt.bAuto)
3428 printf("Note: min and max given, switching off -auto.\n");
3432 if (opt.bTauIntGiven && opt.bCalcTauInt)
3434 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3435 "the autocorrelation times. Not both.");
3438 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3440 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3441 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3443 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3445 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3446 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3449 /* Reading gmx4 pull output and tpr files */
3450 if (opt.bTpr || opt.bPullf || opt.bPullx)
3452 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3454 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3455 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3456 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3457 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3458 if (nfiles != nfiles2)
3460 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3461 opt.fnTpr, nfiles2, fnPull);
3464 /* Read file that selects the pull group to be used */
3465 if (opt.fnGroupsel != NULL)
3467 readPullGroupSelection(&opt, fninTpr, nfiles);
3470 window = initUmbrellaWindows(nfiles);
3471 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3474 { /* reading pdo files */
3475 if (opt.fnGroupsel != NULL)
3477 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3478 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3480 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3481 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3482 window = initUmbrellaWindows(nfiles);
3483 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3487 /* enforce equal weight for all histograms? */
3490 enforceEqualWeights(window, nwins);
3493 /* write histograms */
3494 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3495 "z", "count", opt.oenv);
3496 for (l = 0; l < opt.bins; ++l)
3498 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3499 for (i = 0; i < nwins; ++i)
3501 for (j = 0; j < window[i].nPull; ++j)
3503 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3506 fprintf(histout, "\n");
3508 gmx_ffclose(histout);
3509 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3512 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3516 /* Using tabulated umbrella potential */
3519 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3522 /* Integrated autocorrelation times provided ? */
3523 if (opt.bTauIntGiven)
3525 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3528 /* Compute integrated autocorrelation times */
3529 if (opt.bCalcTauInt)
3531 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3534 /* calc average and sigma for each histogram
3535 (maybe required for bootstrapping. If not, this is fast anyhow) */
3536 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3538 averageSigma(window, nwins);
3541 /* Get initial potential by simple integration */
3542 if (opt.bInitPotByIntegration)
3544 guessPotByIntegration(window, nwins, &opt);
3547 /* Check if complete reaction coordinate is covered */
3548 checkReactionCoordinateCovered(window, nwins, &opt);
3550 /* Calculate profile */
3551 snew(profile, opt.bins);
3559 if ( (i%opt.stepUpdateContrib) == 0)
3561 setup_acc_wham(profile, window, nwins, &opt);
3563 if (maxchange < opt.Tolerance)
3566 /* if (opt.verbose) */
3567 printf("Switched to exact iteration in iteration %d\n", i);
3569 calc_profile(profile, window, nwins, &opt, bExact);
3570 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3572 printf("\t%4d) Maximum change %e\n", i, maxchange);
3576 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3577 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3579 /* calc error from Kumar's formula */
3580 /* Unclear how the error propagates along reaction coordinate, therefore
3582 /* calc_error_kumar(profile,window, nwins,&opt); */
3584 /* Write profile in energy units? */
3587 prof_normalization_and_unit(profile, &opt);
3588 strcpy(ylabel, en_unit_label[opt.unit]);
3589 strcpy(title, "Umbrella potential");
3593 strcpy(ylabel, "Density of states");
3594 strcpy(title, "Density of states");
3597 /* symmetrize profile around z=0? */
3600 symmetrizeProfile(profile, &opt);
3603 /* write profile or density of states */
3604 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, "z", ylabel, opt.oenv);
3605 for (i = 0; i < opt.bins; ++i)
3607 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3609 gmx_ffclose(profout);
3610 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3612 /* Bootstrap Method */
3615 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3616 opt2fn("-hist", NFILE, fnm),
3617 ylabel, profile, window, nwins, &opt);
3621 freeUmbrellaWindows(window, nfiles);
3623 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3624 please_cite(stdout, "Hub2010");