2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/legacyheaders/typedefs.h"
57 #include "gromacs/utility/smalloc.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/legacyheaders/copyrite.h"
60 #include "gromacs/fileio/tpxio.h"
61 #include "gromacs/legacyheaders/names.h"
62 #include "gromacs/random/random.h"
64 #include "gromacs/legacyheaders/macros.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/fileio/xvgr.h"
68 #include "gromacs/utility/fatalerror.h"
70 //! longest file names allowed in input files
71 #define WHAM_MAXFILELEN 2048
74 * x-axis legend for output files
76 static const char *xlabel = "\\xx\\f{} (nm)";
79 * enum for energy units
82 enSel, en_kJ, en_kCal, en_kT, enNr
85 * enum for type of input files (pdos, tpr, or pullf)
88 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
91 /*! \brief enum for bootstrapping method
93 * These bootstrap methods are supported:
94 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
95 * (bsMethod_BayesianHist)
96 * - bootstrap complete histograms (bsMethod_hist)
97 * - bootstrap trajectories from given umbrella histograms. This generates new
98 * "synthetic" histograms (bsMethod_traj)
99 * - bootstrap trajectories from Gaussian with mu/sigma computed from
100 * the respective histogram (bsMethod_trajGauss). This gives very similar
101 * results compared to bsMethod_traj.
103 * ********************************************************************
104 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
105 * JS Hub, BL de Groot, D van der Spoel
106 * g_wham - A free weighted histogram analysis implementation including
107 * robust error and autocorrelation estimates,
108 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
109 * ********************************************************************
112 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
113 bsMethod_traj, bsMethod_trajGauss
117 //! Parameters of the umbrella potentials
121 * \name Using umbrella pull code since gromacs 4.x
124 int npullcrds; //!< nr of pull coordinates in tpr file
125 int pull_geometry; //!< such as distance, direction
126 ivec pull_dim; //!< pull dimension with geometry distance
127 int pull_ndim; //!< nr of pull_dim != 0
128 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
129 real *k; //!< force constants in tpr file
130 real *init_dist; //!< reference displacements
131 real *umbInitDist; //!< reference displacement in umbrella direction
134 * \name Using PDO files common until gromacs 3.x
142 char PullName[4][256];
144 double UmbCons[4][3];
148 //! Data in the umbrella histograms
151 int nPull; //!< nr of pull groups in this pdo or pullf/x file
152 double **Histo; //!< nPull histograms
153 double **cum; //!< nPull cumulative distribution functions
154 int nBin; //!< nr of bins. identical to opt->bins
155 double *k; //!< force constants for the nPull groups
156 double *pos; //!< umbrella positions for the nPull groups
157 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
158 int *N; //!< nr of data points in nPull histograms.
159 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
161 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
163 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
164 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
167 double *tau; //!< intetrated autocorrelation time (IACT)
168 double *tausmooth; //!< smoothed IACT
170 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
172 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
174 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
176 /*! \brief average force estimated from average displacement, fAv=dzAv*k
178 * Used for integration to guess the potential.
181 real *aver; //!< average of histograms
182 real *sigma; //!< stddev of histograms
183 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
186 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
189 int n; //!< total nr of pull groups in this tpr file
190 int nUse; //!< nr of pull groups used
191 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
194 //! Parameters of WHAM
201 const char *fnTpr, *fnPullf, *fnGroupsel;
202 const char *fnPdo, *fnPullx; //!< file names of input
203 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
204 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
206 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
207 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
208 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
209 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
212 * \name Basic WHAM options
215 int bins; //!< nr of bins, min, max, and dz of profile
217 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
218 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
221 * \name Output control
224 gmx_bool bLog; //!< energy output (instead of probability) for profile
225 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
226 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
227 /*! \brief after wham, set prof to zero at this z-position.
228 * When bootstrapping, set zProf0 to a "stable" reference position.
231 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
233 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
234 gmx_bool bAuto; //!< determine min and max automatically but do not exit
236 gmx_bool verbose; //!< more noisy wham mode
237 int stepchange; //!< print maximum change in prof after how many interations
238 output_env_t oenv; //!< xvgr options
241 * \name Autocorrelation stuff
244 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
245 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
246 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
247 real acTrestart; //!< when computing ACT, time between restarting points
249 /* \brief Enforce the same weight for each umbella window, that is
250 * calculate with the same number of data points for
251 * each window. That can be reasonable, if the histograms
252 * have different length, but due to autocorrelation,
253 * a longer simulation should not have larger weightin wham.
259 * \name Bootstrapping stuff
262 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
264 /* \brief bootstrap method
266 * if == bsMethod_hist, consider complete histograms as independent
267 * data points and, hence, only mix complete histograms.
268 * if == bsMethod_BayesianHist, consider complete histograms
269 * as independent data points, but assign random weights
270 * to the histograms during the bootstrapping ("Bayesian bootstrap")
271 * In case of long correlations (e.g., inside a channel), these
272 * will yield a more realistic error.
273 * if == bsMethod_traj(Gauss), generate synthetic histograms
275 * histogram by generating an autocorrelated random sequence
276 * that is distributed according to the respective given
277 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
278 * (instead of from the umbrella histogram) to generate a new
283 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
286 /* \brief when mixing histograms, mix only histograms withing blocks
287 long the reaction coordinate xi. Avoids gaps along xi. */
288 int histBootStrapBlockLength;
290 int bsSeed; //!< random seed for bootstrapping
292 /* \brief Write cumulative distribution functions (CDFs) of histograms
293 and write the generated histograms for each bootstrap */
297 * \name tabulated umbrella potential stuff
301 double *tabX, *tabY, tabMin, tabMax, tabDz;
304 gmx_rng_t rng; //!< gromacs random number generator
307 //! Make an umbrella window (may contain several histograms)
308 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
310 t_UmbrellaWindow *win;
313 for (i = 0; i < nwin; i++)
315 win[i].Histo = win[i].cum = 0;
316 win[i].k = win[i].pos = win[i].z = 0;
317 win[i].N = win[i].Ntot = 0;
318 win[i].g = win[i].tau = win[i].tausmooth = 0;
322 win[i].aver = win[i].sigma = 0;
328 //! Delete an umbrella window (may contain several histograms)
329 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
332 for (i = 0; i < nwin; i++)
336 for (j = 0; j < win[i].nPull; j++)
338 sfree(win[i].Histo[j]);
343 for (j = 0; j < win[i].nPull; j++)
345 sfree(win[i].cum[j]);
350 for (j = 0; j < win[i].nPull; j++)
352 sfree(win[i].bContrib[j]);
364 sfree(win[i].tausmooth);
365 sfree(win[i].bContrib);
367 sfree(win[i].forceAv);
370 sfree(win[i].bsWeight);
376 * Read and setup tabulated umbrella potential
378 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
383 printf("Setting up tabulated potential from file %s\n", fn);
384 nl = read_xvg(fn, &y, &ny);
388 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
390 opt->tabMin = y[0][0];
391 opt->tabMax = y[0][nl-1];
392 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
395 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
396 "ascending z-direction", fn);
398 for (i = 0; i < nl-1; i++)
400 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
402 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
407 for (i = 0; i < nl; i++)
409 opt->tabX[i] = y[0][i];
410 opt->tabY[i] = y[1][i];
412 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
413 opt->tabMin, opt->tabMax, opt->tabDz);
416 //! Read the header of an PDO file (position, force const, nr of groups)
417 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
420 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
422 std::istringstream ist;
425 if (fgets(line, 2048, file) == NULL)
427 gmx_fatal(FARGS, "Error reading header from pdo file\n");
430 ist >> Buffer0 >> Buffer1 >> Buffer2;
431 if (strcmp(Buffer1, "UMBRELLA"))
433 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
434 "(Found in first line: `%s')\n",
435 Buffer1, "UMBRELLA", line);
437 if (strcmp(Buffer2, "3.0"))
439 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
443 if (fgets(line, 2048, file) == NULL)
445 gmx_fatal(FARGS, "Error reading header from pdo file\n");
448 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
449 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
451 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
452 if (header->nDim != 1)
454 gmx_fatal(FARGS, "Currently only supports one dimension");
458 if (fgets(line, 2048, file) == NULL)
460 gmx_fatal(FARGS, "Error reading header from pdo file\n");
463 ist >> Buffer0 >> Buffer1 >> header->nSkip;
466 if (fgets(line, 2048, file) == NULL)
468 gmx_fatal(FARGS, "Error reading header from pdo file\n");
471 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
474 if (fgets(line, 2048, file) == NULL)
476 gmx_fatal(FARGS, "Error reading header from pdo file\n");
479 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
483 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
487 for (i = 0; i < header->nPull; ++i)
489 if (fgets(line, 2048, file) == NULL)
491 gmx_fatal(FARGS, "Error reading header from pdo file\n");
494 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
495 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
496 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
500 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
501 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
505 if (fgets(line, 2048, file) == NULL)
507 gmx_fatal(FARGS, "Cannot read from file\n");
511 if (strcmp(Buffer3, "#####") != 0)
513 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
518 static char *fgets3(FILE *fp, char ptr[], int *len)
523 if (fgets(ptr, *len-1, fp) == NULL)
528 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
530 /* This line is longer than len characters, let's increase len! */
534 if (fgets(p-1, STRLEN, fp) == NULL)
540 if (ptr[slen-1] == '\n')
548 /*! \brief Read the data columns of and PDO file.
550 * TO DO: Get rid of the scanf function to avoid the clang warning.
551 * At the moment, this warning is avoided by hiding the format string
552 * the variable fmtlf.
554 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
555 int fileno, t_UmbrellaWindow * win,
556 t_UmbrellaOptions *opt,
557 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
559 int i, inttemp, bins, count, ntot;
560 real min, max, minfound = 1e20, maxfound = -1e20;
561 double temp, time, time0 = 0, dt;
563 t_UmbrellaWindow * window = 0;
564 gmx_bool timeok, dt_ok = 1;
565 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
566 int len = STRLEN, dstep = 1;
567 const int blocklen = 4096;
577 /* Need to alocate memory and set up structure */
578 window->nPull = header->nPull;
581 snew(window->Histo, window->nPull);
582 snew(window->z, window->nPull);
583 snew(window->k, window->nPull);
584 snew(window->pos, window->nPull);
585 snew(window->N, window->nPull);
586 snew(window->Ntot, window->nPull);
587 snew(window->g, window->nPull);
588 snew(window->bsWeight, window->nPull);
590 window->bContrib = 0;
592 if (opt->bCalcTauInt)
594 snew(window->ztime, window->nPull);
600 snew(lennow, window->nPull);
602 for (i = 0; i < window->nPull; ++i)
605 window->bsWeight[i] = 1.;
606 snew(window->Histo[i], bins);
607 window->k[i] = header->UmbCons[i][0];
608 window->pos[i] = header->UmbPos[i][0];
612 if (opt->bCalcTauInt)
614 window->ztime[i] = 0;
618 /* Done with setup */
624 min = max = bins = 0; /* Get rid of warnings */
629 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
633 if (ptr[0] == '#' || strlen(ptr) < 2)
638 /* Initiate format string */
640 strcat(fmtign, "%*s");
642 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
643 /* Round time to fs */
644 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
646 /* get time step of pdo file */
656 dstep = static_cast<int>(opt->dt/dt+0.5);
664 window->dt = dt*dstep;
669 dt_ok = ((count-1)%dstep == 0);
670 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
672 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
673 time,opt->tmin, opt->tmax, dt_ok,timeok); */
677 for (i = 0; i < header->nPull; ++i)
680 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
681 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
682 if (sscanf(ptr, fmt, &temp))
684 temp += header->UmbPos[i][0];
698 if (opt->bCalcTauInt)
700 /* save time series for autocorrelation analysis */
701 ntot = window->Ntot[i];
702 if (ntot >= lennow[i])
704 lennow[i] += blocklen;
705 srenew(window->ztime[i], lennow[i]);
707 window->ztime[i][ntot] = temp;
715 inttemp = static_cast<int> (temp);
722 else if (inttemp >= bins)
728 if (inttemp >= 0 && inttemp < bins)
730 window->Histo[i][inttemp] += 1.;
738 if (time > opt->tmax)
742 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
758 /*! \brief Set identical weights for all histograms
760 * Normally, the weight is given by the number data points in each
761 * histogram, together with the autocorrelation time. This can be overwritten
762 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
763 * an appropriate autocorrelation times instead of using this function.
765 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
767 int i, k, j, NEnforced;
770 NEnforced = window[0].Ntot[0];
771 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
772 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
773 /* enforce all histograms to have the same weight as the very first histogram */
775 for (j = 0; j < nWindows; ++j)
777 for (k = 0; k < window[j].nPull; ++k)
779 ratio = 1.0*NEnforced/window[j].Ntot[k];
780 for (i = 0; i < window[0].nBin; ++i)
782 window[j].Histo[k][i] *= ratio;
784 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
789 /*! \brief Simple linear interpolation between two given tabulated points
791 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
794 double pl, pu, dz, dp;
796 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
798 if (jl < 0 || ju >= opt->tabNbins)
800 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
801 "Provide an extended table.", dist, jl, ju);
805 dz = dist-opt->tabX[jl];
806 dp = (pu-pl)*dz/opt->tabDz;
812 * Check which bins substiantially contribute (accelerates WHAM)
814 * Don't worry, that routine does not mean we compute the PMF in limited precision.
815 * After rapid convergence (using only substiantal contributions), we always switch to
818 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
819 t_UmbrellaOptions *opt)
821 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
822 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
823 gmx_bool bAnyContrib;
824 static int bFirst = 1;
825 static double wham_contrib_lim;
829 for (i = 0; i < nWindows; ++i)
831 nGrptot += window[i].nPull;
833 wham_contrib_lim = opt->Tolerance/nGrptot;
836 ztot = opt->max-opt->min;
839 for (i = 0; i < nWindows; ++i)
841 if (!window[i].bContrib)
843 snew(window[i].bContrib, window[i].nPull);
845 for (j = 0; j < window[i].nPull; ++j)
847 if (!window[i].bContrib[j])
849 snew(window[i].bContrib[j], opt->bins);
852 for (k = 0; k < opt->bins; ++k)
854 temp = (1.0*k+0.5)*dz+min;
855 distance = temp - window[i].pos[j]; /* distance to umbrella center */
857 { /* in cyclic wham: */
858 if (distance > ztot_half) /* |distance| < ztot_half */
862 else if (distance < -ztot_half)
867 /* Note: there are two contributions to bin k in the wham equations:
868 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
869 ii) exp(- U/(8.314e-3*opt->Temperature))
870 where U is the umbrella potential
871 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
876 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
880 U = tabulated_pot(distance, opt); /* Use tabulated potential */
883 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
884 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
885 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
886 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
887 if (window[i].bContrib[j][k])
893 /* If this histo is far outside min and max all bContrib may be FALSE,
894 causing a floating point exception later on. To avoid that, switch
898 for (k = 0; k < opt->bins; ++k)
900 window[i].bContrib[j][k] = TRUE;
907 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
908 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
913 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
919 //! Compute the PMF (one of the two main WHAM routines)
920 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
921 t_UmbrellaOptions *opt, gmx_bool bExact)
924 double num, ztot_half, ztot, distance, min = opt->min, dz = opt->dz;
925 double denom, U = 0, temp = 0, invg;
927 ztot = opt->max-opt->min;
930 for (i = 0; i < opt->bins; ++i)
933 for (j = 0; j < nWindows; ++j)
935 for (k = 0; k < window[j].nPull; ++k)
937 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
938 temp = (1.0*i+0.5)*dz+min;
939 num += invg*window[j].Histo[k][i];
941 if (!(bExact || window[j].bContrib[k][i]))
945 distance = temp - window[j].pos[k]; /* distance to umbrella center */
947 { /* in cyclic wham: */
948 if (distance > ztot_half) /* |distance| < ztot_half */
952 else if (distance < -ztot_half)
960 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
964 U = tabulated_pot(distance, opt); /* Use tabulated potential */
966 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
969 profile[i] = num/denom;
973 //! Compute the free energy offsets z (one of the two main WHAM routines)
974 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
975 t_UmbrellaOptions *opt, gmx_bool bExact)
978 double U = 0, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot;
979 double MAX = -1e20, total = 0;
981 ztot = opt->max-opt->min;
984 for (i = 0; i < nWindows; ++i)
986 for (j = 0; j < window[i].nPull; ++j)
989 for (k = 0; k < window[i].nBin; ++k)
991 if (!(bExact || window[i].bContrib[j][k]))
995 temp = (1.0*k+0.5)*dz+min;
996 distance = temp - window[i].pos[j]; /* distance to umbrella center */
998 { /* in cyclic wham: */
999 if (distance > ztot_half) /* |distance| < ztot_half */
1003 else if (distance < -ztot_half)
1011 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1015 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1018 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1020 /* Avoid floating point exception if window is far outside min and max */
1023 total = -log(total);
1029 temp = fabs(total - window[i].z[j]);
1034 window[i].z[j] = total;
1040 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1041 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1043 int i, j, bins = opt->bins;
1044 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1047 if (min > 0. || max < 0.)
1049 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1050 opt->min, opt->max);
1055 for (i = 0; i < bins; i++)
1059 /* bin left of zsym */
1060 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1061 if (j >= 0 && (j+1) < bins)
1063 /* interpolate profile linearly between bins j and j+1 */
1064 z1 = min+(j+0.5)*dz;
1066 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1067 /* average between left and right */
1068 prof2[i] = 0.5*(profsym+profile[i]);
1072 prof2[i] = profile[i];
1076 memcpy(profile, prof2, bins*sizeof(double));
1080 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1081 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1084 double unit_factor = 1., R_MolarGasConst, diff;
1086 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1089 /* Not log? Nothing to do! */
1095 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1096 if (opt->unit == en_kT)
1100 else if (opt->unit == en_kJ)
1102 unit_factor = R_MolarGasConst*opt->Temperature;
1104 else if (opt->unit == en_kCal)
1106 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1110 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1113 for (i = 0; i < bins; i++)
1115 if (profile[i] > 0.0)
1117 profile[i] = -log(profile[i])*unit_factor;
1121 /* shift to zero at z=opt->zProf0 */
1122 if (!opt->bProf0Set)
1128 /* Get bin with shortest distance to opt->zProf0
1129 (-0.5 from bin position and +0.5 from rounding cancel) */
1130 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1135 else if (imin >= bins)
1139 diff = profile[imin];
1143 for (i = 0; i < bins; i++)
1149 //! Make an array of random integers (used for bootstrapping)
1150 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1152 int ipull, blockBase, nr, ipullRandom;
1154 if (blockLength == 0)
1156 blockLength = nPull;
1159 for (ipull = 0; ipull < nPull; ipull++)
1161 blockBase = (ipull/blockLength)*blockLength;
1163 { /* make sure nothing bad happens in the last block */
1164 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1165 ipullRandom = blockBase + nr;
1167 while (ipullRandom >= nPull);
1168 if (ipullRandom < 0 || ipullRandom >= nPull)
1170 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1171 "blockLength = %d, blockBase = %d\n",
1172 ipullRandom, nPull, nr, blockLength, blockBase);
1174 randomArray[ipull] = ipullRandom;
1176 /*for (ipull=0; ipull<nPull; ipull++)
1177 printf("%d ",randomArray[ipull]); printf("\n"); */
1180 /*! \brief Set pull group information of a synthetic histogram
1182 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1183 * but it is not required if we bootstrap complete histograms.
1185 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1186 t_UmbrellaWindow *thisWindow, int pullid)
1188 synthWindow->N [0] = thisWindow->N [pullid];
1189 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1190 synthWindow->pos [0] = thisWindow->pos [pullid];
1191 synthWindow->z [0] = thisWindow->z [pullid];
1192 synthWindow->k [0] = thisWindow->k [pullid];
1193 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1194 synthWindow->g [0] = thisWindow->g [pullid];
1195 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1198 /*! \brief Calculate cumulative distribution function of of all histograms.
1200 * This allow to create random number sequences
1201 * which are distributed according to the histograms. Required to generate
1202 * the "synthetic" histograms for the Bootstrap method
1204 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1205 t_UmbrellaOptions *opt, const char *fnhist)
1209 char *fn = 0, *buf = 0;
1212 if (opt->bs_verbose)
1214 snew(fn, strlen(fnhist)+10);
1215 snew(buf, strlen(fnhist)+10);
1216 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1217 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1221 for (i = 0; i < nWindows; i++)
1223 snew(window[i].cum, window[i].nPull);
1224 for (j = 0; j < window[i].nPull; j++)
1226 snew(window[i].cum[j], nbin+1);
1227 window[i].cum[j][0] = 0.;
1228 for (k = 1; k <= nbin; k++)
1230 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1233 /* normalize CDFs. Ensure cum[nbin]==1 */
1234 last = window[i].cum[j][nbin];
1235 for (k = 0; k <= nbin; k++)
1237 window[i].cum[j][k] /= last;
1242 printf("Cumulative distriubtion functions of all histograms created.\n");
1243 if (opt->bs_verbose)
1245 for (k = 0; k <= nbin; k++)
1247 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1248 for (i = 0; i < nWindows; i++)
1250 for (j = 0; j < window[i].nPull; j++)
1252 fprintf(fp, "%g\t", window[i].cum[j][k]);
1257 printf("Wrote cumulative distribution functions to %s\n", fn);
1265 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1267 * This is used to generate a random sequence distributed according to a histogram
1269 void searchCumulative(double xx[], int n, double x, int *j)
1291 else if (x == xx[n-1])
1301 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1302 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1303 int pullid, t_UmbrellaOptions *opt)
1305 int N, i, nbins, r_index, ibin;
1306 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1309 N = thisWindow->N[pullid];
1310 dt = thisWindow->dt;
1311 nbins = thisWindow->nBin;
1313 /* tau = autocorrelation time */
1314 if (opt->tauBootStrap > 0.0)
1316 tausteps = opt->tauBootStrap/dt;
1318 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1320 /* calc tausteps from g=1+2tausteps */
1321 g = thisWindow->g[pullid];
1327 "When generating hypothetical trajctories from given umbrella histograms,\n"
1328 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1329 "cannot be predicted. You have 3 options:\n"
1330 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1331 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1333 " with option -iiact for all umbrella windows.\n"
1334 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1335 " Use option (3) only if you are sure what you're doing, you may severely\n"
1336 " underestimate the error if a too small ACT is given.\n");
1337 gmx_fatal(FARGS, errstr);
1340 synthWindow->N [0] = N;
1341 synthWindow->pos [0] = thisWindow->pos[pullid];
1342 synthWindow->z [0] = thisWindow->z[pullid];
1343 synthWindow->k [0] = thisWindow->k[pullid];
1344 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1345 synthWindow->g [0] = thisWindow->g [pullid];
1346 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1348 for (i = 0; i < nbins; i++)
1350 synthWindow->Histo[0][i] = 0.;
1353 if (opt->bsMethod == bsMethod_trajGauss)
1355 sig = thisWindow->sigma [pullid];
1356 mu = thisWindow->aver [pullid];
1359 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1361 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1363 z = a*x + sqrt(1-a^2)*y
1364 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1365 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1367 C(t) = exp(-t/tau) with tau=-1/ln(a)
1369 Then, use error function to turn the Gaussian random variable into a uniformly
1370 distributed one in [0,1]. Eventually, use cumulative distribution function of
1371 histogram to get random variables distributed according to histogram.
1372 Note: The ACT of the flat distribution and of the generated histogram is not
1373 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1375 a = exp(-1.0/tausteps);
1377 invsqrt2 = 1./sqrt(2.0);
1379 /* init random sequence */
1380 x = gmx_rng_gaussian_table(opt->rng);
1382 if (opt->bsMethod == bsMethod_traj)
1384 /* bootstrap points from the umbrella histograms */
1385 for (i = 0; i < N; i++)
1387 y = gmx_rng_gaussian_table(opt->rng);
1389 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1390 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1392 r = 0.5*(1+gmx_erf(x*invsqrt2));
1393 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1394 synthWindow->Histo[0][r_index] += 1.;
1397 else if (opt->bsMethod == bsMethod_trajGauss)
1399 /* bootstrap points from a Gaussian with the same average and sigma
1400 as the respective umbrella histogram. The idea was, that -given
1401 limited sampling- the bootstrapped histograms are otherwise biased
1402 from the limited sampling of the US histos. However, bootstrapping from
1403 the Gaussian seems to yield a similar estimate. */
1407 y = gmx_rng_gaussian_table(opt->rng);
1410 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1415 while ( (ibin += nbins) < 0)
1420 else if (ibin >= nbins)
1422 while ( (ibin -= nbins) >= nbins)
1429 if (ibin >= 0 && ibin < nbins)
1431 synthWindow->Histo[0][ibin] += 1.;
1438 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1442 /*! \brief Write all histograms to a file
1444 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1445 * sets of bootstrapped histograms.
1447 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1448 int bs_index, t_UmbrellaOptions *opt)
1450 char *fn = 0, *buf = 0, title[256];
1456 snew(fn, strlen(fnhist)+10);
1457 snew(buf, strlen(fnhist)+1);
1458 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1459 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1463 fn = gmx_strdup(fnhist);
1464 strcpy(title, "Umbrella histograms");
1467 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1470 /* Write histograms */
1471 for (l = 0; l < bins; ++l)
1473 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1474 for (i = 0; i < nWindows; ++i)
1476 for (j = 0; j < window[i].nPull; ++j)
1478 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1485 printf("Wrote %s\n", fn);
1493 //! Used for qsort to sort random numbers
1494 int func_wham_is_larger(const void *a, const void *b)
1513 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1514 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1521 /* generate ordered random numbers between 0 and nAllPull */
1522 for (i = 0; i < nAllPull-1; i++)
1524 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1526 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1527 r[nAllPull-1] = 1.0*nAllPull;
1529 synthwin[0].bsWeight[0] = r[0];
1530 for (i = 1; i < nAllPull; i++)
1532 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1535 /* avoid to have zero weight by adding a tiny value */
1536 for (i = 0; i < nAllPull; i++)
1538 if (synthwin[i].bsWeight[0] < 1e-5)
1540 synthwin[i].bsWeight[0] = 1e-5;
1547 //! The main bootstrapping routine
1548 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1549 char* ylabel, double *profile,
1550 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1552 t_UmbrellaWindow * synthWindow;
1553 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1554 int i, j, *randomArray = 0, winid, pullid, ib;
1555 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1557 gmx_bool bExact = FALSE;
1559 /* init random generator */
1560 if (opt->bsSeed == -1)
1562 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1566 opt->rng = gmx_rng_init(opt->bsSeed);
1569 snew(bsProfile, opt->bins);
1570 snew(bsProfiles_av, opt->bins);
1571 snew(bsProfiles_av2, opt->bins);
1573 /* Create array of all pull groups. Note that different windows
1574 may have different nr of pull groups
1575 First: Get total nr of pull groups */
1577 for (i = 0; i < nWindows; i++)
1579 nAllPull += window[i].nPull;
1581 snew(allPull_winId, nAllPull);
1582 snew(allPull_pullId, nAllPull);
1584 /* Setup one array of all pull groups */
1585 for (i = 0; i < nWindows; i++)
1587 for (j = 0; j < window[i].nPull; j++)
1589 allPull_winId[iAllPull] = i;
1590 allPull_pullId[iAllPull] = j;
1595 /* setup stuff for synthetic windows */
1596 snew(synthWindow, nAllPull);
1597 for (i = 0; i < nAllPull; i++)
1599 synthWindow[i].nPull = 1;
1600 synthWindow[i].nBin = opt->bins;
1601 snew(synthWindow[i].Histo, 1);
1602 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1604 snew(synthWindow[i].Histo[0], opt->bins);
1606 snew(synthWindow[i].N, 1);
1607 snew(synthWindow[i].pos, 1);
1608 snew(synthWindow[i].z, 1);
1609 snew(synthWindow[i].k, 1);
1610 snew(synthWindow[i].bContrib, 1);
1611 snew(synthWindow[i].g, 1);
1612 snew(synthWindow[i].bsWeight, 1);
1615 switch (opt->bsMethod)
1618 snew(randomArray, nAllPull);
1619 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1620 please_cite(stdout, "Hub2006");
1622 case bsMethod_BayesianHist:
1623 /* just copy all histogams into synthWindow array */
1624 for (i = 0; i < nAllPull; i++)
1626 winid = allPull_winId [i];
1627 pullid = allPull_pullId[i];
1628 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1632 case bsMethod_trajGauss:
1633 calc_cumulatives(window, nWindows, opt, fnhist);
1636 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1639 /* do bootstrapping */
1640 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1641 for (ib = 0; ib < opt->nBootStrap; ib++)
1643 printf(" *******************************************\n"
1644 " ******** Start bootstrap nr %d ************\n"
1645 " *******************************************\n", ib+1);
1647 switch (opt->bsMethod)
1650 /* bootstrap complete histograms from given histograms */
1651 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1652 for (i = 0; i < nAllPull; i++)
1654 winid = allPull_winId [randomArray[i]];
1655 pullid = allPull_pullId[randomArray[i]];
1656 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1659 case bsMethod_BayesianHist:
1660 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1661 setRandomBsWeights(synthWindow, nAllPull, opt);
1664 case bsMethod_trajGauss:
1665 /* create new histos from given histos, that is generate new hypothetical
1667 for (i = 0; i < nAllPull; i++)
1669 winid = allPull_winId[i];
1670 pullid = allPull_pullId[i];
1671 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1676 /* write histos in case of verbose output */
1677 if (opt->bs_verbose)
1679 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1686 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1689 if ( (i%opt->stepUpdateContrib) == 0)
1691 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1693 if (maxchange < opt->Tolerance)
1697 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1699 printf("\t%4d) Maximum change %e\n", i, maxchange);
1701 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1704 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1705 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1709 prof_normalization_and_unit(bsProfile, opt);
1712 /* symmetrize profile around z=0 */
1715 symmetrizeProfile(bsProfile, opt);
1718 /* save stuff to get average and stddev */
1719 for (i = 0; i < opt->bins; i++)
1722 bsProfiles_av[i] += tmp;
1723 bsProfiles_av2[i] += tmp*tmp;
1724 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1726 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1730 /* write average and stddev */
1731 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1732 if (output_env_get_print_xvgr_codes(opt->oenv))
1734 fprintf(fp, "@TYPE xydy\n");
1736 for (i = 0; i < opt->bins; i++)
1738 bsProfiles_av [i] /= opt->nBootStrap;
1739 bsProfiles_av2[i] /= opt->nBootStrap;
1740 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1741 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1742 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1745 printf("Wrote boot strap result to %s\n", fnres);
1748 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1749 int whaminFileType(char *fn)
1753 if (strcmp(fn+len-3, "tpr") == 0)
1757 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1759 return whamin_pullxf;
1761 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1767 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1769 return whamin_unknown;
1772 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1773 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1774 t_UmbrellaOptions *opt)
1776 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1777 int nread, sizenow, i, block = 1;
1780 fp = gmx_ffopen(fn, "r");
1783 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1785 if (strlen(tmp) >= WHAM_MAXFILELEN)
1787 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1789 if (nread >= sizenow)
1792 srenew(filename, sizenow);
1793 for (i = sizenow-block; i < sizenow; i++)
1795 snew(filename[i], WHAM_MAXFILELEN);
1798 /* remove newline if there is one */
1799 if (tmp[strlen(tmp)-1] == '\n')
1801 tmp[strlen(tmp)-1] = '\0';
1803 strcpy(filename[nread], tmp);
1806 printf("Found file %s in %s\n", filename[nread], fn);
1810 *filenamesRet = filename;
1814 //! Open a file or a pipe to a gzipped file
1815 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1817 char Buffer[1024], gunzip[1024], *Path = 0;
1819 static gmx_bool bFirst = 1;
1821 /* gzipped pdo file? */
1822 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1824 /* search gunzip executable */
1825 if (!(Path = getenv("GMX_PATH_GZIP")))
1827 if (gmx_fexist("/bin/gunzip"))
1829 sprintf(gunzip, "%s", "/bin/gunzip");
1831 else if (gmx_fexist("/usr/bin/gunzip"))
1833 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1837 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1838 "You may want to define the path to gunzip "
1839 "with the environment variable GMX_PATH_GZIP.", gunzip);
1844 sprintf(gunzip, "%s/gunzip", Path);
1845 if (!gmx_fexist(gunzip))
1847 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1848 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1853 printf("Using gunzig executable %s\n", gunzip);
1856 if (!gmx_fexist(fn))
1858 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1860 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1863 printf("Executing command '%s'\n", Buffer);
1866 if ((pipe = popen(Buffer, "r")) == NULL)
1868 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1871 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1877 pipe = gmx_ffopen(fn, "r");
1884 //! Close file or pipe
1885 void pdo_close_file(FILE *fp)
1894 //! Reading all pdo files
1895 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1896 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1899 real mintmp, maxtmp, done = 0.;
1902 /* char Buffer0[1000]; */
1906 gmx_fatal(FARGS, "No files found. Hick.");
1909 /* if min and max are not given, get min and max from the input files */
1912 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1915 for (i = 0; i < nfiles; ++i)
1917 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1918 /*fgets(Buffer0,999,file);
1919 fprintf(stderr,"First line '%s'\n",Buffer0); */
1920 done = 100.0*(i+1)/nfiles;
1921 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1926 read_pdo_header(file, header, opt);
1927 /* here only determine min and max of this window */
1928 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1929 if (maxtmp > opt->max)
1933 if (mintmp < opt->min)
1939 pdo_close_file(file);
1947 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1948 if (opt->bBoundsOnly)
1950 printf("Found option -boundsonly, now exiting.\n");
1954 /* store stepsize in profile */
1955 opt->dz = (opt->max-opt->min)/opt->bins;
1957 /* Having min and max, we read in all files */
1958 /* Loop over all files */
1959 for (i = 0; i < nfiles; ++i)
1961 done = 100.0*(i+1)/nfiles;
1962 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1967 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1968 read_pdo_header(file, header, opt);
1969 /* load data into window */
1970 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
1971 if ((window+i)->Ntot[0] == 0)
1973 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1977 pdo_close_file(file);
1985 for (i = 0; i < nfiles; ++i)
1992 //! translate 0/1 to N/Y to write pull dimensions
1993 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1995 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1996 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2001 static int first = 1;
2003 /* printf("Reading %s \n",fn); */
2004 read_tpx_state(fn, &ir, &state, NULL, NULL);
2006 if (ir.ePull != epullUMBRELLA)
2008 gmx_fatal(FARGS, "This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
2009 " (ir.ePull = %d)\n", epull_names[ir.ePull], ir.ePull);
2012 /* nr of pull groups */
2013 ncrd = ir.pull->ncoord;
2016 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d pull coordinates\n", ncrd);
2019 header->npullcrds = ir.pull->ncoord;
2020 header->pull_geometry = ir.pull->eGeom;
2021 header->bPrintRef = ir.pull->bPrintRef;
2022 copy_ivec(ir.pull->dim, header->pull_dim);
2023 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2024 snew(header->k, ncrd);
2025 snew(header->init_dist, ncrd);
2026 snew(header->umbInitDist, ncrd);
2028 /* only z-direction with epullgCYL? */
2029 if (header->pull_geometry == epullgCYL)
2031 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2033 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2034 "However, found dimensions [%s %s %s]\n",
2035 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2036 int2YN(header->pull_dim[ZZ]));
2040 for (i = 0; i < ncrd; i++)
2042 header->k[i] = ir.pull->coord[i].k;
2043 if (header->k[i] == 0.0)
2045 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2046 "That doesn't seem to be an Umbrella tpr.\n",
2049 header->init_dist[i] = ir.pull->coord[i].init;
2051 /* initial distance to reference */
2052 switch (header->pull_geometry)
2055 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2059 header->umbInitDist[i] = header->init_dist[i];
2062 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2066 if (opt->verbose || first)
2068 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2069 "\tPull group coordinates%s expected in pullx files.\n",
2070 fn, header->npullcrds, epullg_names[header->pull_geometry],
2071 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2072 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2073 for (i = 0; i < ncrd; i++)
2075 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2078 if (!opt->verbose && first)
2080 printf("\tUse option -v to see this output for all input tpr files\n\n");
2086 //! 2-norm in a ndim-dimensional space
2087 double dist_ndim(double **dx, int ndim, int line)
2091 for (i = 0; i < ndim; i++)
2093 r2 += sqr(dx[i][line]);
2098 //! Read pullx.xvg or pullf.xvg
2099 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2100 t_UmbrellaWindow * window,
2101 t_UmbrellaOptions *opt,
2102 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2103 t_groupselection *groupsel)
2105 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2106 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2107 real min, max, minfound = 1e20, maxfound = -1e20;
2108 gmx_bool dt_ok, timeok, bHaveForce;
2109 const char *quantity;
2110 const int blocklen = 4096;
2112 static gmx_bool bFirst = TRUE;
2115 * Data columns in pull output:
2116 * - in force output pullf.xvg:
2117 * No reference columns, one column per pull coordinate
2119 * - in position output pullx.xvg
2120 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2121 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2124 nColPerCrd = opt->bPullx ? header->pull_ndim : 1;
2125 quantity = opt->bPullx ? "position" : "force";
2127 if (opt->bPullx && header->bPrintRef)
2129 nColRefEachCrd = header->pull_ndim;
2136 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2137 bHaveForce = opt->bPullf;
2139 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2140 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2141 Sorry for the laziness, this is a To-do. */
2142 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2145 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2146 "reading \n(option -if) is supported at present, "
2147 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2148 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2149 epullg_names[header->pull_geometry]);
2152 nt = read_xvg(fn, &y, &ny);
2154 /* Check consistency */
2157 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2161 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2162 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2164 printf("Expecting these columns in pull file:\n"
2165 "\t%d reference columns for each individual pull coordinate\n"
2166 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2167 printf("With %d pull groups, expect %d columns (including the time column)\n", header->npullcrds, nColExpect);
2170 if (ny != nColExpect)
2172 gmx_fatal(FARGS, "Found %d pull coodinates in %s,\n but %d data columns in %s (expected %d)\n"
2173 "\nMaybe you confused options -ix and -if ?\n",
2174 header->npullcrds, fntpr, ny-1, fn, nColExpect-1);
2179 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2189 window->dt = y[0][1]-y[0][0];
2191 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2193 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2196 /* Need to alocate memory and set up structure */
2200 /* Use only groups selected with option -is file */
2201 if (header->npullcrds != groupsel->n)
2203 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2204 header->npullcrds, groupsel->n);
2206 window->nPull = groupsel->nUse;
2210 window->nPull = header->npullcrds;
2213 window->nBin = bins;
2214 snew(window->Histo, window->nPull);
2215 snew(window->z, window->nPull);
2216 snew(window->k, window->nPull);
2217 snew(window->pos, window->nPull);
2218 snew(window->N, window->nPull);
2219 snew(window->Ntot, window->nPull);
2220 snew(window->g, window->nPull);
2221 snew(window->bsWeight, window->nPull);
2222 window->bContrib = 0;
2224 if (opt->bCalcTauInt)
2226 snew(window->ztime, window->nPull);
2230 window->ztime = NULL;
2232 snew(lennow, window->nPull);
2234 for (g = 0; g < window->nPull; ++g)
2237 window->bsWeight[g] = 1.;
2238 snew(window->Histo[g], bins);
2240 window->Ntot[g] = 0;
2242 if (opt->bCalcTauInt)
2244 window->ztime[g] = NULL;
2248 /* Copying umbrella center and force const is more involved since not
2249 all pull groups from header (tpr file) may be used in window variable */
2250 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2252 if (groupsel && (groupsel->bUse[g] == FALSE))
2256 window->k[gUsed] = header->k[g];
2257 window->pos[gUsed] = header->umbInitDist[g];
2262 { /* only determine min and max */
2265 min = max = bins = 0; /* Get rid of warnings */
2269 for (i = 0; i < nt; i++)
2271 /* Do you want that time frame? */
2272 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2274 /* get time step of pdo file and get dstep from opt->dt */
2284 dstep = static_cast<int>(opt->dt/dt+0.5);
2292 window->dt = dt*dstep;
2296 dt_ok = (i%dstep == 0);
2297 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2299 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2300 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2303 /* Note: if groupsel == NULL:
2304 * all groups in pullf/x file are stored in this window, and gUsed == g
2305 * if groupsel != NULL:
2306 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2309 for (g = 0; g < header->npullcrds; ++g)
2311 /* was this group selected for application in WHAM? */
2312 if (groupsel && (groupsel->bUse[g] == FALSE))
2321 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2323 pos = -force/header->k[g] + header->umbInitDist[g];
2327 /* pick the right column from:
2328 * time ref1[ndim] coord1[ndim] ref2[ndim] coord2[ndim] ...
2330 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2331 switch (header->pull_geometry)
2334 pos = dist_ndim(y + column, header->pull_ndim, i);
2340 gmx_fatal(FARGS, "Bad error, this error should have been catched before. Ups.\n");
2344 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2358 if (gUsed >= window->nPull)
2360 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2361 gUsed, window->nPull);
2364 if (opt->bCalcTauInt && !bGetMinMax)
2366 /* save time series for autocorrelation analysis */
2367 ntot = window->Ntot[gUsed];
2368 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2369 if (ntot >= lennow[gUsed])
2371 lennow[gUsed] += blocklen;
2372 srenew(window->ztime[gUsed], lennow[gUsed]);
2374 window->ztime[gUsed][ntot] = pos;
2377 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2382 while ( (ibin += bins) < 0)
2387 else if (ibin >= bins)
2389 while ( (ibin -= bins) >= bins)
2395 if (ibin >= 0 && ibin < bins)
2397 window->Histo[gUsed][ibin] += 1.;
2400 window->Ntot[gUsed]++;
2404 else if (t > opt->tmax)
2408 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2420 for (i = 0; i < ny; i++)
2426 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2427 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2428 t_UmbrellaHeader* header,
2429 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2432 real mintmp, maxtmp;
2434 printf("Reading %d tpr and pullf files\n", nfiles/2);
2436 /* min and max not given? */
2439 printf("Automatic determination of boundaries...\n");
2442 for (i = 0; i < nfiles; i++)
2444 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2446 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2448 read_tpr_header(fnTprs[i], header, opt);
2449 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2451 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2453 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2454 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2455 if (maxtmp > opt->max)
2459 if (mintmp < opt->min)
2464 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2465 if (opt->bBoundsOnly)
2467 printf("Found option -boundsonly, now exiting.\n");
2471 /* store stepsize in profile */
2472 opt->dz = (opt->max-opt->min)/opt->bins;
2474 for (i = 0; i < nfiles; i++)
2476 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2478 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2480 read_tpr_header(fnTprs[i], header, opt);
2481 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2483 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2485 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2486 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2487 if (window[i].Ntot[0] == 0)
2489 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2493 for (i = 0; i < nfiles; i++)
2502 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2504 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2505 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2507 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2509 int nlines, ny, i, ig;
2512 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2513 nlines = read_xvg(fn, &iact, &ny);
2514 if (nlines != nwins)
2516 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2519 for (i = 0; i < nlines; i++)
2521 if (window[i].nPull != ny)
2523 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2524 "number of pull groups is different in different simulations. That is not\n"
2525 "supported yet. Sorry.\n");
2527 for (ig = 0; ig < window[i].nPull; ig++)
2529 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2530 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2532 if (iact[ig][i] <= 0.0)
2534 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2541 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2544 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2545 * that -in case of limited sampling- the ACT may be severely underestimated.
2546 * Note: the g=1+2tau are overwritten.
2547 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2550 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2553 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2555 /* only evaluate within +- 3sigma of the Gausian */
2556 siglim = 3.0*opt->sigSmoothIact;
2557 siglim2 = dsqr(siglim);
2558 /* pre-factor of Gaussian */
2559 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2560 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2562 for (i = 0; i < nwins; i++)
2564 snew(window[i].tausmooth, window[i].nPull);
2565 for (ig = 0; ig < window[i].nPull; ig++)
2569 pos = window[i].pos[ig];
2570 for (j = 0; j < nwins; j++)
2572 for (jg = 0; jg < window[j].nPull; jg++)
2574 dpos2 = dsqr(window[j].pos[jg]-pos);
2575 if (dpos2 < siglim2)
2577 w = gaufact*exp(-dpos2*invtwosig2);
2579 tausm += w*window[j].tau[jg];
2580 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2581 w,dpos2,pos,gaufact,invtwosig2); */
2586 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2588 window[i].tausmooth[ig] = tausm;
2592 window[i].tausmooth[ig] = window[i].tau[ig];
2594 window[i].g[ig] = 1+2*tausm/window[i].dt;
2599 //! Stop integrating autoccorelation function when ACF drops under this value
2600 #define WHAM_AC_ZERO_LIMIT 0.05
2602 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2604 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2605 t_UmbrellaOptions *opt, const char *fn)
2607 int i, ig, ncorr, ntot, j, k, *count, restart;
2608 real *corr, c0, dt, tmp;
2609 real *ztime, av, tausteps;
2610 FILE *fp, *fpcorr = 0;
2614 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2615 "time [ps]", "autocorrelation function", opt->oenv);
2619 for (i = 0; i < nwins; i++)
2621 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2623 ntot = window[i].Ntot[0];
2625 /* using half the maximum time as length of autocorrelation function */
2629 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2630 " points. Provide more pull data!", ntot);
2633 /* snew(corrSq,ncorr); */
2636 snew(window[i].tau, window[i].nPull);
2637 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2643 for (ig = 0; ig < window[i].nPull; ig++)
2645 if (ntot != window[i].Ntot[ig])
2647 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2648 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2650 ztime = window[i].ztime[ig];
2652 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2653 for (j = 0, av = 0; (j < ntot); j++)
2658 for (k = 0; (k < ncorr); k++)
2663 for (j = 0; (j < ntot); j += restart)
2665 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2667 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2669 /* corrSq[k] += tmp*tmp; */
2673 /* divide by nr of frames for each time displacement */
2674 for (k = 0; (k < ncorr); k++)
2676 /* count probably = (ncorr-k+(restart-1))/restart; */
2677 corr[k] = corr[k]/count[k];
2678 /* variance of autocorrelation function */
2679 /* corrSq[k]=corrSq[k]/count[k]; */
2681 /* normalize such that corr[0] == 0 */
2683 for (k = 0; (k < ncorr); k++)
2686 /* corrSq[k]*=c0*c0; */
2689 /* write ACFs in verbose mode */
2692 for (k = 0; (k < ncorr); k++)
2694 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2696 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2699 /* esimate integrated correlation time, fitting is too unstable */
2700 tausteps = 0.5*corr[0];
2701 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2702 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2704 tausteps += corr[j];
2707 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2708 Kumar et al, eq. 28 ff. */
2709 window[i].tau[ig] = tausteps*dt;
2710 window[i].g[ig] = 1+2*tausteps;
2711 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2719 gmx_ffclose(fpcorr);
2722 /* plot IACT along reaction coordinate */
2723 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2724 if (output_env_get_print_xvgr_codes(opt->oenv))
2726 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2727 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2728 for (i = 0; i < nwins; i++)
2730 fprintf(fp, "# %3d ", i);
2731 for (ig = 0; ig < window[i].nPull; ig++)
2733 fprintf(fp, " %11g", window[i].tau[ig]);
2738 for (i = 0; i < nwins; i++)
2740 for (ig = 0; ig < window[i].nPull; ig++)
2742 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2745 if (opt->sigSmoothIact > 0.0)
2747 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2748 opt->sigSmoothIact);
2749 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2750 smoothIact(window, nwins, opt);
2751 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2752 if (output_env_get_print_xvgr_codes(opt->oenv))
2754 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2755 fprintf(fp, "@ s1 symbol color 2\n");
2757 for (i = 0; i < nwins; i++)
2759 for (ig = 0; ig < window[i].nPull; ig++)
2761 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2766 printf("Wrote %s\n", fn);
2770 * compute average and sigma of each umbrella histogram
2772 void averageSigma(t_UmbrellaWindow *window, int nwins)
2775 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2777 for (i = 0; i < nwins; i++)
2779 snew(window[i].aver, window[i].nPull);
2780 snew(window[i].sigma, window[i].nPull);
2782 ntot = window[i].Ntot[0];
2783 for (ig = 0; ig < window[i].nPull; ig++)
2785 ztime = window[i].ztime[ig];
2786 for (k = 0, av = 0.; k < ntot; k++)
2791 for (k = 0, sum2 = 0.; k < ntot; k++)
2796 sig = sqrt(sum2/ntot);
2797 window[i].aver[ig] = av;
2799 /* Note: This estimate for sigma is biased from the limited sampling.
2800 Correct sigma by n/(n-1) where n = number of independent
2801 samples. Only possible if IACT is known.
2805 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2806 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2810 window[i].sigma[ig] = sig;
2812 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2819 * Use histograms to compute average force on pull group.
2821 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2823 int i, j, bins = opt->bins, k;
2824 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2827 dz = (max-min)/bins;
2828 ztot = opt->max-min;
2831 /* Compute average displacement from histograms */
2832 for (j = 0; j < nWindows; ++j)
2834 snew(window[j].forceAv, window[j].nPull);
2835 for (k = 0; k < window[j].nPull; ++k)
2839 for (i = 0; i < opt->bins; ++i)
2841 temp = (1.0*i+0.5)*dz+min;
2842 distance = temp - window[j].pos[k];
2844 { /* in cyclic wham: */
2845 if (distance > ztot_half) /* |distance| < ztot_half */
2849 else if (distance < -ztot_half)
2854 w = window[j].Histo[k][i]/window[j].g[k];
2855 displAv += w*distance;
2857 /* Are we near min or max? We are getting wrong forces from the histgrams since
2858 the histograms are zero outside [min,max). Therefore, assume that the position
2859 on the other side of the histomgram center is equally likely. */
2862 posmirrored = window[j].pos[k]-distance;
2863 if (posmirrored >= max || posmirrored < min)
2865 displAv += -w*distance;
2872 /* average force from average displacement */
2873 window[j].forceAv[k] = displAv*window[j].k[k];
2874 /* sigma from average square displacement */
2875 /* window[j].sigma [k] = sqrt(displAv2); */
2876 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2882 * Check if the complete reaction coordinate is covered by the histograms
2884 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2885 t_UmbrellaOptions *opt)
2887 int i, ig, j, bins = opt->bins, bBoundary;
2888 real avcount = 0, z, relcount, *count;
2889 snew(count, opt->bins);
2891 for (j = 0; j < opt->bins; ++j)
2893 for (i = 0; i < nwins; i++)
2895 for (ig = 0; ig < window[i].nPull; ig++)
2897 count[j] += window[i].Histo[ig][j];
2900 avcount += 1.0*count[j];
2903 for (j = 0; j < bins; ++j)
2905 relcount = count[j]/avcount;
2906 z = (j+0.5)*opt->dz+opt->min;
2907 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2908 /* check for bins with no data */
2911 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2912 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2914 /* and check for poor sampling */
2915 else if (relcount < 0.005 && !bBoundary)
2917 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2923 /*! \brief Compute initial potential by integrating the average force
2925 * This speeds up the convergence by roughly a factor of 2
2927 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2929 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2930 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2933 dz = (opt->max-min)/bins;
2935 printf("Getting initial potential by integration.\n");
2937 /* Compute average displacement from histograms */
2938 computeAverageForce(window, nWindows, opt);
2940 /* Get force for each bin from all histograms in this bin, or, alternatively,
2941 if no histograms are inside this bin, from the closest histogram */
2944 for (j = 0; j < opt->bins; ++j)
2946 pos = (1.0*j+0.5)*dz+min;
2950 groupmin = winmin = 0;
2951 for (i = 0; i < nWindows; i++)
2953 for (ig = 0; ig < window[i].nPull; ig++)
2955 hispos = window[i].pos[ig];
2956 dist = fabs(hispos-pos);
2957 /* average force within bin */
2961 fAv += window[i].forceAv[ig];
2963 /* at the same time, remember closest histogram */
2972 /* if no histogram found in this bin, use closest histogram */
2979 fAv = window[winmin].forceAv[groupmin];
2983 for (j = 1; j < opt->bins; ++j)
2985 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2988 /* cyclic wham: linearly correct possible offset */
2991 diff = (pot[bins-1]-pot[0])/(bins-1);
2992 for (j = 1; j < opt->bins; ++j)
2999 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3000 for (j = 0; j < opt->bins; ++j)
3002 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3005 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3008 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3009 energy offsets which are usually determined by wham
3010 First: turn pot into probabilities:
3012 for (j = 0; j < opt->bins; ++j)
3014 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3016 calc_z(pot, window, nWindows, opt, TRUE);
3022 //! Count number of words in an line
3023 static int wordcount(char *ptr)
3028 if (strlen(ptr) == 0)
3032 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3034 for (i = 0; (ptr[i] != '\0'); i++)
3036 is[cur] = isspace(ptr[i]);
3037 if ((i > 0) && (is[cur] && !is[1-cur]))
3046 /*! \brief Read input file for pull group selection (option -is)
3048 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3050 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3053 int i, iline, n, len = STRLEN, temp;
3054 char *ptr = 0, *tmpbuf = 0;
3055 char fmt[1024], fmtign[1024];
3056 int block = 1, sizenow;
3058 fp = gmx_ffopen(opt->fnGroupsel, "r");
3059 opt->groupsel = NULL;
3064 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3069 if (iline >= sizenow)
3072 srenew(opt->groupsel, sizenow);
3074 opt->groupsel[iline].n = n;
3075 opt->groupsel[iline].nUse = 0;
3076 snew(opt->groupsel[iline].bUse, n);
3079 for (i = 0; i < n; i++)
3081 strcpy(fmt, fmtign);
3083 if (sscanf(ptr, fmt, &temp))
3085 opt->groupsel[iline].bUse[i] = (temp > 0);
3086 if (opt->groupsel[iline].bUse[i])
3088 opt->groupsel[iline].nUse++;
3091 strcat(fmtign, "%*s");
3095 opt->nGroupsel = iline;
3096 if (nTpr != opt->nGroupsel)
3098 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3102 printf("\nUse only these pull groups:\n");
3103 for (iline = 0; iline < nTpr; iline++)
3105 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3106 for (i = 0; i < opt->groupsel[iline].n; i++)
3108 if (opt->groupsel[iline].bUse[i])
3121 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3123 //! Number of elements in fnm (used for command line parsing)
3124 #define NFILE asize(fnm)
3126 //! The main g_wham routine
3127 int gmx_wham(int argc, char *argv[])
3129 const char *desc[] = {
3130 "[THISMODULE] is an analysis program that implements the Weighted",
3131 "Histogram Analysis Method (WHAM). It is intended to analyze",
3132 "output files generated by umbrella sampling simulations to ",
3133 "compute a potential of mean force (PMF). [PAR] ",
3134 "At present, three input modes are supported.[BR]",
3135 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
3136 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
3137 " AND, with option [TT]-ix[tt], a file which contains file names of",
3138 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
3139 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
3140 " first pullx, etc.[BR]",
3141 "[TT]*[tt] Same as the previous input mode, except that the the user",
3142 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3143 " From the pull force the position in the umbrella potential is",
3144 " computed. This does not work with tabulated umbrella potentials.[BR]"
3145 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
3146 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3147 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
3148 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [TT].pdo[tt] file header",
3149 " must be similar to the following:[PAR]",
3150 "[TT]# UMBRELLA 3.0[BR]",
3151 "# Component selection: 0 0 1[BR]",
3153 "# Ref. Group 'TestAtom'[BR]",
3154 "# Nr. of pull groups 2[BR]",
3155 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
3156 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
3158 "The number of pull groups, umbrella positions, force constants, and names ",
3159 "may (of course) differ. Following the header, a time column and ",
3160 "a data column for each pull group follows (i.e. the displacement",
3161 "with respect to the umbrella center). Up to four pull groups are possible ",
3162 "per [TT].pdo[tt] file at present.[PAR]",
3163 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3164 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3165 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3166 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3167 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3168 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3169 "used, groupsel.dat looks like this:[BR]",
3173 "By default, the output files are[BR]",
3174 " [TT]-o[tt] PMF output file[BR]",
3175 " [TT]-hist[tt] Histograms output file[BR]",
3176 "Always check whether the histograms sufficiently overlap.[PAR]",
3177 "The umbrella potential is assumed to be harmonic and the force constants are ",
3178 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
3179 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
3180 "WHAM OPTIONS[BR]------------[BR]",
3181 " [TT]-bins[tt] Number of bins used in analysis[BR]",
3182 " [TT]-temp[tt] Temperature in the simulations[BR]",
3183 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
3184 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
3185 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
3186 "The data points that are used to compute the profile",
3187 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3188 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3189 "umbrella window.[PAR]",
3190 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3191 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3192 "With energy output, the energy in the first bin is defined to be zero. ",
3193 "If you want the free energy at a different ",
3194 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3195 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3196 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3197 "[THISMODULE] will make use of the",
3198 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3199 "reaction coordinate will assumed be be neighbors.[PAR]",
3200 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3201 "which may be useful for, e.g. membranes.[PAR]",
3202 "AUTOCORRELATIONS[BR]----------------[BR]",
3203 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3204 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3205 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3206 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3207 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3208 "Because the IACTs can be severely underestimated in case of limited ",
3209 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3210 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3211 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3212 "integration of the ACFs while the ACFs are larger 0.05.",
3213 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3214 "less robust) method such as fitting to a double exponential, you can ",
3215 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3216 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3217 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
3218 "ERROR ANALYSIS[BR]--------------[BR]",
3219 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3220 "otherwise the statistical error may be substantially underestimated. ",
3221 "More background and examples for the bootstrap technique can be found in ",
3222 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
3223 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3224 "Four bootstrapping methods are supported and ",
3225 "selected with [TT]-bs-method[tt].[BR]",
3226 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3227 "data points, and the bootstrap is carried out by assigning random weights to the ",
3228 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3229 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3230 "statistical error is underestimated.[BR]",
3231 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
3232 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3233 "(allowing duplication, i.e. sampling with replacement).",
3234 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
3235 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3236 "divided into blocks and only histograms within each block are mixed. Note that ",
3237 "the histograms within each block must be representative for all possible histograms, ",
3238 "otherwise the statistical error is underestimated.[BR]",
3239 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3240 "such that the generated data points are distributed according the given histograms ",
3241 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3242 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3243 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3244 "Note that this method may severely underestimate the error in case of limited sampling, ",
3245 "that is if individual histograms do not represent the complete phase space at ",
3246 "the respective positions.[BR]",
3247 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3248 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3249 "and width of the umbrella histograms. That method yields similar error estimates ",
3250 "like method [TT]traj[tt].[PAR]"
3251 "Bootstrapping output:[BR]",
3252 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
3253 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
3254 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3255 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3259 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3260 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3261 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3263 static t_UmbrellaOptions opt;
3266 { "-min", FALSE, etREAL, {&opt.min},
3267 "Minimum coordinate in profile"},
3268 { "-max", FALSE, etREAL, {&opt.max},
3269 "Maximum coordinate in profile"},
3270 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3271 "Determine min and max automatically"},
3272 { "-bins", FALSE, etINT, {&opt.bins},
3273 "Number of bins in profile"},
3274 { "-temp", FALSE, etREAL, {&opt.Temperature},
3276 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3278 { "-v", FALSE, etBOOL, {&opt.verbose},
3280 { "-b", FALSE, etREAL, {&opt.tmin},
3281 "First time to analyse (ps)"},
3282 { "-e", FALSE, etREAL, {&opt.tmax},
3283 "Last time to analyse (ps)"},
3284 { "-dt", FALSE, etREAL, {&opt.dt},
3285 "Analyse only every dt ps"},
3286 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3287 "Write histograms and exit"},
3288 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3289 "Determine min and max and exit (with [TT]-auto[tt])"},
3290 { "-log", FALSE, etBOOL, {&opt.bLog},
3291 "Calculate the log of the profile before printing"},
3292 { "-unit", FALSE, etENUM, {en_unit},
3293 "Energy unit in case of log output" },
3294 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3295 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3296 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3297 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3298 { "-sym", FALSE, etBOOL, {&opt.bSym},
3299 "Symmetrize profile around z=0"},
3300 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3301 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3302 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3303 "Calculate integrated autocorrelation times and use in wham"},
3304 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3305 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3306 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3307 "When computing autocorrelation functions, restart computing every .. (ps)"},
3308 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3309 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3310 "during smoothing"},
3311 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3312 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3313 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3314 "Bootstrap method" },
3315 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3316 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3317 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3318 "Seed for bootstrapping. (-1 = use time)"},
3319 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3320 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3321 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3322 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3323 { "-stepout", FALSE, etINT, {&opt.stepchange},
3324 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3325 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3326 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3330 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3331 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3332 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3333 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3334 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3335 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3336 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3337 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3338 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3339 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3340 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3341 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3344 int i, j, l, nfiles, nwins, nfiles2;
3345 t_UmbrellaHeader header;
3346 t_UmbrellaWindow * window = NULL;
3347 double *profile, maxchange = 1e20;
3348 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3349 char **fninTpr, **fninPull, **fninPdo;
3351 FILE *histout, *profout;
3352 char ylabel[256], title[256];
3355 opt.verbose = FALSE;
3356 opt.bHistOnly = FALSE;
3366 /* bootstrapping stuff */
3368 opt.bsMethod = bsMethod_hist;
3369 opt.tauBootStrap = 0.0;
3371 opt.histBootStrapBlockLength = 8;
3372 opt.bs_verbose = FALSE;
3377 opt.Temperature = 298;
3378 opt.Tolerance = 1e-6;
3379 opt.bBoundsOnly = FALSE;
3381 opt.bCalcTauInt = FALSE;
3382 opt.sigSmoothIact = 0.0;
3383 opt.bAllowReduceIact = TRUE;
3384 opt.bInitPotByIntegration = TRUE;
3385 opt.acTrestart = 1.0;
3386 opt.stepchange = 100;
3387 opt.stepUpdateContrib = 100;
3389 if (!parse_common_args(&argc, argv, 0,
3390 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3395 opt.unit = nenum(en_unit);
3396 opt.bsMethod = nenum(en_bsMethod);
3398 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3400 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3401 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3402 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3403 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3404 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3405 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3406 if (opt.bTab && opt.bPullf)
3408 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3409 "Provide pullx.xvg or pdo files!");
3412 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3414 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3416 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3418 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3419 "\n\n Check g_wham -h !");
3422 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3423 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3424 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3425 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3426 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3428 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3429 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3430 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3431 if ( (bMinSet || bMaxSet) && bAutoSet)
3433 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3436 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3438 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3441 if (bMinSet && opt.bAuto)
3443 printf("Note: min and max given, switching off -auto.\n");
3447 if (opt.bTauIntGiven && opt.bCalcTauInt)
3449 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3450 "the autocorrelation times. Not both.");
3453 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3455 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3456 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3458 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3460 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3461 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3464 /* Reading gmx4 pull output and tpr files */
3465 if (opt.bTpr || opt.bPullf || opt.bPullx)
3467 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3469 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3470 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3471 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3472 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3473 if (nfiles != nfiles2)
3475 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3476 opt.fnTpr, nfiles2, fnPull);
3479 /* Read file that selects the pull group to be used */
3480 if (opt.fnGroupsel != NULL)
3482 readPullGroupSelection(&opt, fninTpr, nfiles);
3485 window = initUmbrellaWindows(nfiles);
3486 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3489 { /* reading pdo files */
3490 if (opt.fnGroupsel != NULL)
3492 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3493 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3495 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3496 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3497 window = initUmbrellaWindows(nfiles);
3498 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3502 /* enforce equal weight for all histograms? */
3505 enforceEqualWeights(window, nwins);
3508 /* write histograms */
3509 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3510 xlabel, "count", opt.oenv);
3511 for (l = 0; l < opt.bins; ++l)
3513 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3514 for (i = 0; i < nwins; ++i)
3516 for (j = 0; j < window[i].nPull; ++j)
3518 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3521 fprintf(histout, "\n");
3523 gmx_ffclose(histout);
3524 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3527 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3531 /* Using tabulated umbrella potential */
3534 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3537 /* Integrated autocorrelation times provided ? */
3538 if (opt.bTauIntGiven)
3540 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3543 /* Compute integrated autocorrelation times */
3544 if (opt.bCalcTauInt)
3546 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3549 /* calc average and sigma for each histogram
3550 (maybe required for bootstrapping. If not, this is fast anyhow) */
3551 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3553 averageSigma(window, nwins);
3556 /* Get initial potential by simple integration */
3557 if (opt.bInitPotByIntegration)
3559 guessPotByIntegration(window, nwins, &opt);
3562 /* Check if complete reaction coordinate is covered */
3563 checkReactionCoordinateCovered(window, nwins, &opt);
3565 /* Calculate profile */
3566 snew(profile, opt.bins);
3574 if ( (i%opt.stepUpdateContrib) == 0)
3576 setup_acc_wham(profile, window, nwins, &opt);
3578 if (maxchange < opt.Tolerance)
3581 /* if (opt.verbose) */
3582 printf("Switched to exact iteration in iteration %d\n", i);
3584 calc_profile(profile, window, nwins, &opt, bExact);
3585 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3587 printf("\t%4d) Maximum change %e\n", i, maxchange);
3591 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3592 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3594 /* calc error from Kumar's formula */
3595 /* Unclear how the error propagates along reaction coordinate, therefore
3597 /* calc_error_kumar(profile,window, nwins,&opt); */
3599 /* Write profile in energy units? */
3602 prof_normalization_and_unit(profile, &opt);
3603 strcpy(ylabel, en_unit_label[opt.unit]);
3604 strcpy(title, "Umbrella potential");
3608 strcpy(ylabel, "Density of states");
3609 strcpy(title, "Density of states");
3612 /* symmetrize profile around z=0? */
3615 symmetrizeProfile(profile, &opt);
3618 /* write profile or density of states */
3619 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3620 for (i = 0; i < opt.bins; ++i)
3622 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3624 gmx_ffclose(profout);
3625 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3627 /* Bootstrap Method */
3630 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3631 opt2fn("-hist", NFILE, fnm),
3632 ylabel, profile, window, nwins, &opt);
3636 freeUmbrellaWindows(window, nfiles);
3638 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3639 please_cite(stdout, "Hub2010");