2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
54 #include "gromacs/commandline/pargs.h"
55 #include "gromacs/fileio/tpxio.h"
56 #include "gromacs/fileio/xvgr.h"
57 #include "gromacs/gmxana/gmx_ana.h"
58 #include "gromacs/legacyheaders/copyrite.h"
59 #include "gromacs/legacyheaders/macros.h"
60 #include "gromacs/legacyheaders/names.h"
61 #include "gromacs/legacyheaders/typedefs.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/random/random.h"
64 #include "gromacs/utility/cstringutil.h"
65 #include "gromacs/utility/fatalerror.h"
66 #include "gromacs/utility/gmxomp.h"
67 #include "gromacs/utility/smalloc.h"
69 //! longest file names allowed in input files
70 #define WHAM_MAXFILELEN 2048
73 * x-axis legend for output files
75 static const char *xlabel = "\\xx\\f{} (nm)";
78 * enum for energy units
81 enSel, en_kJ, en_kCal, en_kT, enNr
84 * enum for type of input files (pdos, tpr, or pullf)
87 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
90 /*! \brief enum for bootstrapping method
92 * These bootstrap methods are supported:
93 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
94 * (bsMethod_BayesianHist)
95 * - bootstrap complete histograms (bsMethod_hist)
96 * - bootstrap trajectories from given umbrella histograms. This generates new
97 * "synthetic" histograms (bsMethod_traj)
98 * - bootstrap trajectories from Gaussian with mu/sigma computed from
99 * the respective histogram (bsMethod_trajGauss). This gives very similar
100 * results compared to bsMethod_traj.
102 * ********************************************************************
103 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
104 * JS Hub, BL de Groot, D van der Spoel
105 * g_wham - A free weighted histogram analysis implementation including
106 * robust error and autocorrelation estimates,
107 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
108 * ********************************************************************
111 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
112 bsMethod_traj, bsMethod_trajGauss
116 //! Parameters of the umbrella potentials
120 * \name Using umbrella pull code since gromacs 4.x
123 int npullcrds_tot; //!< nr of pull coordinates in tpr file
124 int npullcrds; //!< nr of umbrella pull coordinates for reading
125 int pull_geometry; //!< such as distance, direction
126 ivec pull_dim; //!< pull dimension with geometry distance
127 int pull_ndim; //!< nr of pull_dim != 0
128 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
129 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
130 real *k; //!< force constants in tpr file
131 real *init_dist; //!< reference displacements
132 real *umbInitDist; //!< reference displacement in umbrella direction
135 * \name Using PDO files common until gromacs 3.x
143 char PullName[4][256];
145 double UmbCons[4][3];
149 //! Data in the umbrella histograms
152 int nPull; //!< nr of pull groups in this pdo or pullf/x file
153 double **Histo; //!< nPull histograms
154 double **cum; //!< nPull cumulative distribution functions
155 int nBin; //!< nr of bins. identical to opt->bins
156 double *k; //!< force constants for the nPull groups
157 double *pos; //!< umbrella positions for the nPull groups
158 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
159 int *N; //!< nr of data points in nPull histograms.
160 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
162 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
164 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
165 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
168 double *tau; //!< intetrated autocorrelation time (IACT)
169 double *tausmooth; //!< smoothed IACT
171 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
173 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
175 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
177 /*! \brief average force estimated from average displacement, fAv=dzAv*k
179 * Used for integration to guess the potential.
182 real *aver; //!< average of histograms
183 real *sigma; //!< stddev of histograms
184 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
187 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
190 int n; //!< total nr of pull groups in this tpr file
191 int nUse; //!< nr of pull groups used
192 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
195 //! Parameters of WHAM
202 const char *fnTpr, *fnPullf, *fnGroupsel;
203 const char *fnPdo, *fnPullx; //!< file names of input
204 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
205 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
207 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
208 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
209 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
210 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
213 * \name Basic WHAM options
216 int bins; //!< nr of bins, min, max, and dz of profile
218 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
219 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
222 * \name Output control
225 gmx_bool bLog; //!< energy output (instead of probability) for profile
226 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
227 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
228 /*! \brief after wham, set prof to zero at this z-position.
229 * When bootstrapping, set zProf0 to a "stable" reference position.
232 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
234 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
235 gmx_bool bAuto; //!< determine min and max automatically but do not exit
237 gmx_bool verbose; //!< more noisy wham mode
238 int stepchange; //!< print maximum change in prof after how many interations
239 output_env_t oenv; //!< xvgr options
242 * \name Autocorrelation stuff
245 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
246 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
247 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
248 real acTrestart; //!< when computing ACT, time between restarting points
250 /* \brief Enforce the same weight for each umbella window, that is
251 * calculate with the same number of data points for
252 * each window. That can be reasonable, if the histograms
253 * have different length, but due to autocorrelation,
254 * a longer simulation should not have larger weightin wham.
260 * \name Bootstrapping stuff
263 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
265 /* \brief bootstrap method
267 * if == bsMethod_hist, consider complete histograms as independent
268 * data points and, hence, only mix complete histograms.
269 * if == bsMethod_BayesianHist, consider complete histograms
270 * as independent data points, but assign random weights
271 * to the histograms during the bootstrapping ("Bayesian bootstrap")
272 * In case of long correlations (e.g., inside a channel), these
273 * will yield a more realistic error.
274 * if == bsMethod_traj(Gauss), generate synthetic histograms
276 * histogram by generating an autocorrelated random sequence
277 * that is distributed according to the respective given
278 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
279 * (instead of from the umbrella histogram) to generate a new
284 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
287 /* \brief when mixing histograms, mix only histograms withing blocks
288 long the reaction coordinate xi. Avoids gaps along xi. */
289 int histBootStrapBlockLength;
291 int bsSeed; //!< random seed for bootstrapping
293 /* \brief Write cumulative distribution functions (CDFs) of histograms
294 and write the generated histograms for each bootstrap */
298 * \name tabulated umbrella potential stuff
302 double *tabX, *tabY, tabMin, tabMax, tabDz;
305 gmx_rng_t rng; //!< gromacs random number generator
308 //! Make an umbrella window (may contain several histograms)
309 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
311 t_UmbrellaWindow *win;
314 for (i = 0; i < nwin; i++)
316 win[i].Histo = win[i].cum = 0;
317 win[i].k = win[i].pos = win[i].z = 0;
318 win[i].N = win[i].Ntot = 0;
319 win[i].g = win[i].tau = win[i].tausmooth = 0;
323 win[i].aver = win[i].sigma = 0;
329 //! Delete an umbrella window (may contain several histograms)
330 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
333 for (i = 0; i < nwin; i++)
337 for (j = 0; j < win[i].nPull; j++)
339 sfree(win[i].Histo[j]);
344 for (j = 0; j < win[i].nPull; j++)
346 sfree(win[i].cum[j]);
351 for (j = 0; j < win[i].nPull; j++)
353 sfree(win[i].bContrib[j]);
365 sfree(win[i].tausmooth);
366 sfree(win[i].bContrib);
368 sfree(win[i].forceAv);
371 sfree(win[i].bsWeight);
377 * Read and setup tabulated umbrella potential
379 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
384 printf("Setting up tabulated potential from file %s\n", fn);
385 nl = read_xvg(fn, &y, &ny);
389 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
391 opt->tabMin = y[0][0];
392 opt->tabMax = y[0][nl-1];
393 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
396 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
397 "ascending z-direction", fn);
399 for (i = 0; i < nl-1; i++)
401 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
403 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
408 for (i = 0; i < nl; i++)
410 opt->tabX[i] = y[0][i];
411 opt->tabY[i] = y[1][i];
413 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
414 opt->tabMin, opt->tabMax, opt->tabDz);
417 //! Read the header of an PDO file (position, force const, nr of groups)
418 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
421 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
423 std::istringstream ist;
426 if (fgets(line, 2048, file) == NULL)
428 gmx_fatal(FARGS, "Error reading header from pdo file\n");
431 ist >> Buffer0 >> Buffer1 >> Buffer2;
432 if (strcmp(Buffer1, "UMBRELLA"))
434 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
435 "(Found in first line: `%s')\n",
436 Buffer1, "UMBRELLA", line);
438 if (strcmp(Buffer2, "3.0"))
440 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
444 if (fgets(line, 2048, file) == NULL)
446 gmx_fatal(FARGS, "Error reading header from pdo file\n");
449 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
450 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
452 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
453 if (header->nDim != 1)
455 gmx_fatal(FARGS, "Currently only supports one dimension");
459 if (fgets(line, 2048, file) == NULL)
461 gmx_fatal(FARGS, "Error reading header from pdo file\n");
464 ist >> Buffer0 >> Buffer1 >> header->nSkip;
467 if (fgets(line, 2048, file) == NULL)
469 gmx_fatal(FARGS, "Error reading header from pdo file\n");
472 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
475 if (fgets(line, 2048, file) == NULL)
477 gmx_fatal(FARGS, "Error reading header from pdo file\n");
480 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
484 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
488 for (i = 0; i < header->nPull; ++i)
490 if (fgets(line, 2048, file) == NULL)
492 gmx_fatal(FARGS, "Error reading header from pdo file\n");
495 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
496 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
497 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
501 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
502 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
506 if (fgets(line, 2048, file) == NULL)
508 gmx_fatal(FARGS, "Cannot read from file\n");
512 if (strcmp(Buffer3, "#####") != 0)
514 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
519 static char *fgets3(FILE *fp, char ptr[], int *len)
524 if (fgets(ptr, *len-1, fp) == NULL)
529 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
531 /* This line is longer than len characters, let's increase len! */
535 if (fgets(p-1, STRLEN, fp) == NULL)
541 if (ptr[slen-1] == '\n')
549 /*! \brief Read the data columns of and PDO file.
551 * TO DO: Get rid of the scanf function to avoid the clang warning.
552 * At the moment, this warning is avoided by hiding the format string
553 * the variable fmtlf.
555 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
556 int fileno, t_UmbrellaWindow * win,
557 t_UmbrellaOptions *opt,
558 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
560 int i, inttemp, bins, count, ntot;
561 real min, max, minfound = 1e20, maxfound = -1e20;
562 double temp, time, time0 = 0, dt;
564 t_UmbrellaWindow * window = 0;
565 gmx_bool timeok, dt_ok = 1;
566 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
567 int len = STRLEN, dstep = 1;
568 const int blocklen = 4096;
578 /* Need to alocate memory and set up structure */
579 window->nPull = header->nPull;
582 snew(window->Histo, window->nPull);
583 snew(window->z, window->nPull);
584 snew(window->k, window->nPull);
585 snew(window->pos, window->nPull);
586 snew(window->N, window->nPull);
587 snew(window->Ntot, window->nPull);
588 snew(window->g, window->nPull);
589 snew(window->bsWeight, window->nPull);
591 window->bContrib = 0;
593 if (opt->bCalcTauInt)
595 snew(window->ztime, window->nPull);
601 snew(lennow, window->nPull);
603 for (i = 0; i < window->nPull; ++i)
606 window->bsWeight[i] = 1.;
607 snew(window->Histo[i], bins);
608 window->k[i] = header->UmbCons[i][0];
609 window->pos[i] = header->UmbPos[i][0];
613 if (opt->bCalcTauInt)
615 window->ztime[i] = 0;
619 /* Done with setup */
625 min = max = bins = 0; /* Get rid of warnings */
630 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
634 if (ptr[0] == '#' || strlen(ptr) < 2)
639 /* Initiate format string */
641 strcat(fmtign, "%*s");
643 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
644 /* Round time to fs */
645 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
647 /* get time step of pdo file */
657 dstep = static_cast<int>(opt->dt/dt+0.5);
665 window->dt = dt*dstep;
670 dt_ok = ((count-1)%dstep == 0);
671 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
673 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
674 time,opt->tmin, opt->tmax, dt_ok,timeok); */
678 for (i = 0; i < header->nPull; ++i)
681 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
682 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
683 if (sscanf(ptr, fmt, &temp))
685 temp += header->UmbPos[i][0];
699 if (opt->bCalcTauInt)
701 /* save time series for autocorrelation analysis */
702 ntot = window->Ntot[i];
703 if (ntot >= lennow[i])
705 lennow[i] += blocklen;
706 srenew(window->ztime[i], lennow[i]);
708 window->ztime[i][ntot] = temp;
716 inttemp = static_cast<int> (temp);
723 else if (inttemp >= bins)
729 if (inttemp >= 0 && inttemp < bins)
731 window->Histo[i][inttemp] += 1.;
739 if (time > opt->tmax)
743 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
759 /*! \brief Set identical weights for all histograms
761 * Normally, the weight is given by the number data points in each
762 * histogram, together with the autocorrelation time. This can be overwritten
763 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
764 * an appropriate autocorrelation times instead of using this function.
766 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
768 int i, k, j, NEnforced;
771 NEnforced = window[0].Ntot[0];
772 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
773 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
774 /* enforce all histograms to have the same weight as the very first histogram */
776 for (j = 0; j < nWindows; ++j)
778 for (k = 0; k < window[j].nPull; ++k)
780 ratio = 1.0*NEnforced/window[j].Ntot[k];
781 for (i = 0; i < window[0].nBin; ++i)
783 window[j].Histo[k][i] *= ratio;
785 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
790 /*! \brief Simple linear interpolation between two given tabulated points
792 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
795 double pl, pu, dz, dp;
797 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
799 if (jl < 0 || ju >= opt->tabNbins)
801 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
802 "Provide an extended table.", dist, jl, ju);
806 dz = dist-opt->tabX[jl];
807 dp = (pu-pl)*dz/opt->tabDz;
813 * Check which bins substiantially contribute (accelerates WHAM)
815 * Don't worry, that routine does not mean we compute the PMF in limited precision.
816 * After rapid convergence (using only substiantal contributions), we always switch to
819 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
820 t_UmbrellaOptions *opt)
822 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
823 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
824 gmx_bool bAnyContrib;
825 static int bFirst = 1;
826 static double wham_contrib_lim;
830 for (i = 0; i < nWindows; ++i)
832 nGrptot += window[i].nPull;
834 wham_contrib_lim = opt->Tolerance/nGrptot;
837 ztot = opt->max-opt->min;
840 for (i = 0; i < nWindows; ++i)
842 if (!window[i].bContrib)
844 snew(window[i].bContrib, window[i].nPull);
846 for (j = 0; j < window[i].nPull; ++j)
848 if (!window[i].bContrib[j])
850 snew(window[i].bContrib[j], opt->bins);
853 for (k = 0; k < opt->bins; ++k)
855 temp = (1.0*k+0.5)*dz+min;
856 distance = temp - window[i].pos[j]; /* distance to umbrella center */
858 { /* in cyclic wham: */
859 if (distance > ztot_half) /* |distance| < ztot_half */
863 else if (distance < -ztot_half)
868 /* Note: there are two contributions to bin k in the wham equations:
869 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
870 ii) exp(- U/(8.314e-3*opt->Temperature))
871 where U is the umbrella potential
872 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
877 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
881 U = tabulated_pot(distance, opt); /* Use tabulated potential */
883 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
884 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
885 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
886 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
887 if (window[i].bContrib[j][k])
893 /* If this histo is far outside min and max all bContrib may be FALSE,
894 causing a floating point exception later on. To avoid that, switch
898 for (k = 0; k < opt->bins; ++k)
900 window[i].bContrib[j][k] = TRUE;
907 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
908 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
913 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
919 //! Compute the PMF (one of the two main WHAM routines)
920 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
921 t_UmbrellaOptions *opt, gmx_bool bExact)
923 double ztot_half, ztot, min = opt->min, dz = opt->dz;
925 ztot = opt->max-opt->min;
930 int nthreads = gmx_omp_get_max_threads();
931 int thread_id = gmx_omp_get_thread_num();
933 int i0 = thread_id*opt->bins/nthreads;
934 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
936 for (i = i0; i < i1; ++i)
939 double num, denom, invg, temp = 0, distance, U = 0;
941 for (j = 0; j < nWindows; ++j)
943 for (k = 0; k < window[j].nPull; ++k)
945 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
946 temp = (1.0*i+0.5)*dz+min;
947 num += invg*window[j].Histo[k][i];
949 if (!(bExact || window[j].bContrib[k][i]))
953 distance = temp - window[j].pos[k]; /* distance to umbrella center */
955 { /* in cyclic wham: */
956 if (distance > ztot_half) /* |distance| < ztot_half */
960 else if (distance < -ztot_half)
968 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
972 U = tabulated_pot(distance, opt); /* Use tabulated potential */
974 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
977 profile[i] = num/denom;
982 //! Compute the free energy offsets z (one of the two main WHAM routines)
983 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
984 t_UmbrellaOptions *opt, gmx_bool bExact)
986 double min = opt->min, dz = opt->dz, ztot_half, ztot;
987 double maxglob = -1e20;
989 ztot = opt->max-opt->min;
994 int nthreads = gmx_omp_get_max_threads();
995 int thread_id = gmx_omp_get_thread_num();
997 int i0 = thread_id*nWindows/nthreads;
998 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
999 double maxloc = -1e20;
1001 for (i = i0; i < i1; ++i)
1003 double total = 0, temp, distance, U = 0;
1006 for (j = 0; j < window[i].nPull; ++j)
1009 for (k = 0; k < window[i].nBin; ++k)
1011 if (!(bExact || window[i].bContrib[j][k]))
1015 temp = (1.0*k+0.5)*dz+min;
1016 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1018 { /* in cyclic wham: */
1019 if (distance > ztot_half) /* |distance| < ztot_half */
1023 else if (distance < -ztot_half)
1031 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1035 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1037 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1039 /* Avoid floating point exception if window is far outside min and max */
1042 total = -log(total);
1048 temp = fabs(total - window[i].z[j]);
1053 window[i].z[j] = total;
1056 /* Now get maximum maxloc from the threads and put in maxglob */
1057 if (maxloc > maxglob)
1059 #pragma omp critical
1061 if (maxloc > maxglob)
1072 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1073 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1075 int i, j, bins = opt->bins;
1076 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1079 if (min > 0. || max < 0.)
1081 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1082 opt->min, opt->max);
1087 for (i = 0; i < bins; i++)
1091 /* bin left of zsym */
1092 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1093 if (j >= 0 && (j+1) < bins)
1095 /* interpolate profile linearly between bins j and j+1 */
1096 z1 = min+(j+0.5)*dz;
1098 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1099 /* average between left and right */
1100 prof2[i] = 0.5*(profsym+profile[i]);
1104 prof2[i] = profile[i];
1108 memcpy(profile, prof2, bins*sizeof(double));
1112 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1113 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1116 double unit_factor = 1., R_MolarGasConst, diff;
1118 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1121 /* Not log? Nothing to do! */
1127 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1128 if (opt->unit == en_kT)
1132 else if (opt->unit == en_kJ)
1134 unit_factor = R_MolarGasConst*opt->Temperature;
1136 else if (opt->unit == en_kCal)
1138 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1142 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1145 for (i = 0; i < bins; i++)
1147 if (profile[i] > 0.0)
1149 profile[i] = -log(profile[i])*unit_factor;
1153 /* shift to zero at z=opt->zProf0 */
1154 if (!opt->bProf0Set)
1160 /* Get bin with shortest distance to opt->zProf0
1161 (-0.5 from bin position and +0.5 from rounding cancel) */
1162 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1167 else if (imin >= bins)
1171 diff = profile[imin];
1175 for (i = 0; i < bins; i++)
1181 //! Make an array of random integers (used for bootstrapping)
1182 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1184 int ipull, blockBase, nr, ipullRandom;
1186 if (blockLength == 0)
1188 blockLength = nPull;
1191 for (ipull = 0; ipull < nPull; ipull++)
1193 blockBase = (ipull/blockLength)*blockLength;
1195 { /* make sure nothing bad happens in the last block */
1196 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1197 ipullRandom = blockBase + nr;
1199 while (ipullRandom >= nPull);
1200 if (ipullRandom < 0 || ipullRandom >= nPull)
1202 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1203 "blockLength = %d, blockBase = %d\n",
1204 ipullRandom, nPull, nr, blockLength, blockBase);
1206 randomArray[ipull] = ipullRandom;
1208 /*for (ipull=0; ipull<nPull; ipull++)
1209 printf("%d ",randomArray[ipull]); printf("\n"); */
1212 /*! \brief Set pull group information of a synthetic histogram
1214 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1215 * but it is not required if we bootstrap complete histograms.
1217 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1218 t_UmbrellaWindow *thisWindow, int pullid)
1220 synthWindow->N [0] = thisWindow->N [pullid];
1221 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1222 synthWindow->pos [0] = thisWindow->pos [pullid];
1223 synthWindow->z [0] = thisWindow->z [pullid];
1224 synthWindow->k [0] = thisWindow->k [pullid];
1225 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1226 synthWindow->g [0] = thisWindow->g [pullid];
1227 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1230 /*! \brief Calculate cumulative distribution function of of all histograms.
1232 * This allow to create random number sequences
1233 * which are distributed according to the histograms. Required to generate
1234 * the "synthetic" histograms for the Bootstrap method
1236 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1237 t_UmbrellaOptions *opt, const char *fnhist)
1241 char *fn = 0, *buf = 0;
1244 if (opt->bs_verbose)
1246 snew(fn, strlen(fnhist)+10);
1247 snew(buf, strlen(fnhist)+10);
1248 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1249 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1253 for (i = 0; i < nWindows; i++)
1255 snew(window[i].cum, window[i].nPull);
1256 for (j = 0; j < window[i].nPull; j++)
1258 snew(window[i].cum[j], nbin+1);
1259 window[i].cum[j][0] = 0.;
1260 for (k = 1; k <= nbin; k++)
1262 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1265 /* normalize CDFs. Ensure cum[nbin]==1 */
1266 last = window[i].cum[j][nbin];
1267 for (k = 0; k <= nbin; k++)
1269 window[i].cum[j][k] /= last;
1274 printf("Cumulative distriubtion functions of all histograms created.\n");
1275 if (opt->bs_verbose)
1277 for (k = 0; k <= nbin; k++)
1279 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1280 for (i = 0; i < nWindows; i++)
1282 for (j = 0; j < window[i].nPull; j++)
1284 fprintf(fp, "%g\t", window[i].cum[j][k]);
1289 printf("Wrote cumulative distribution functions to %s\n", fn);
1297 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1299 * This is used to generate a random sequence distributed according to a histogram
1301 void searchCumulative(double xx[], int n, double x, int *j)
1323 else if (x == xx[n-1])
1333 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1334 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1335 int pullid, t_UmbrellaOptions *opt)
1337 int N, i, nbins, r_index, ibin;
1338 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1341 N = thisWindow->N[pullid];
1342 dt = thisWindow->dt;
1343 nbins = thisWindow->nBin;
1345 /* tau = autocorrelation time */
1346 if (opt->tauBootStrap > 0.0)
1348 tausteps = opt->tauBootStrap/dt;
1350 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1352 /* calc tausteps from g=1+2tausteps */
1353 g = thisWindow->g[pullid];
1359 "When generating hypothetical trajctories from given umbrella histograms,\n"
1360 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1361 "cannot be predicted. You have 3 options:\n"
1362 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1363 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1365 " with option -iiact for all umbrella windows.\n"
1366 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1367 " Use option (3) only if you are sure what you're doing, you may severely\n"
1368 " underestimate the error if a too small ACT is given.\n");
1369 gmx_fatal(FARGS, errstr);
1372 synthWindow->N [0] = N;
1373 synthWindow->pos [0] = thisWindow->pos[pullid];
1374 synthWindow->z [0] = thisWindow->z[pullid];
1375 synthWindow->k [0] = thisWindow->k[pullid];
1376 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1377 synthWindow->g [0] = thisWindow->g [pullid];
1378 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1380 for (i = 0; i < nbins; i++)
1382 synthWindow->Histo[0][i] = 0.;
1385 if (opt->bsMethod == bsMethod_trajGauss)
1387 sig = thisWindow->sigma [pullid];
1388 mu = thisWindow->aver [pullid];
1391 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1393 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1395 z = a*x + sqrt(1-a^2)*y
1396 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1397 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1399 C(t) = exp(-t/tau) with tau=-1/ln(a)
1401 Then, use error function to turn the Gaussian random variable into a uniformly
1402 distributed one in [0,1]. Eventually, use cumulative distribution function of
1403 histogram to get random variables distributed according to histogram.
1404 Note: The ACT of the flat distribution and of the generated histogram is not
1405 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1407 a = exp(-1.0/tausteps);
1409 invsqrt2 = 1./sqrt(2.0);
1411 /* init random sequence */
1412 x = gmx_rng_gaussian_table(opt->rng);
1414 if (opt->bsMethod == bsMethod_traj)
1416 /* bootstrap points from the umbrella histograms */
1417 for (i = 0; i < N; i++)
1419 y = gmx_rng_gaussian_table(opt->rng);
1421 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1422 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1424 r = 0.5*(1+gmx_erf(x*invsqrt2));
1425 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1426 synthWindow->Histo[0][r_index] += 1.;
1429 else if (opt->bsMethod == bsMethod_trajGauss)
1431 /* bootstrap points from a Gaussian with the same average and sigma
1432 as the respective umbrella histogram. The idea was, that -given
1433 limited sampling- the bootstrapped histograms are otherwise biased
1434 from the limited sampling of the US histos. However, bootstrapping from
1435 the Gaussian seems to yield a similar estimate. */
1439 y = gmx_rng_gaussian_table(opt->rng);
1442 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1447 while ( (ibin += nbins) < 0)
1452 else if (ibin >= nbins)
1454 while ( (ibin -= nbins) >= nbins)
1461 if (ibin >= 0 && ibin < nbins)
1463 synthWindow->Histo[0][ibin] += 1.;
1470 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1474 /*! \brief Write all histograms to a file
1476 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1477 * sets of bootstrapped histograms.
1479 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1480 int bs_index, t_UmbrellaOptions *opt)
1482 char *fn = 0, *buf = 0, title[256];
1488 snew(fn, strlen(fnhist)+10);
1489 snew(buf, strlen(fnhist)+1);
1490 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1491 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1495 fn = gmx_strdup(fnhist);
1496 strcpy(title, "Umbrella histograms");
1499 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1502 /* Write histograms */
1503 for (l = 0; l < bins; ++l)
1505 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1506 for (i = 0; i < nWindows; ++i)
1508 for (j = 0; j < window[i].nPull; ++j)
1510 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1517 printf("Wrote %s\n", fn);
1525 //! Used for qsort to sort random numbers
1526 int func_wham_is_larger(const void *a, const void *b)
1545 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1546 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1553 /* generate ordered random numbers between 0 and nAllPull */
1554 for (i = 0; i < nAllPull-1; i++)
1556 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1558 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1559 r[nAllPull-1] = 1.0*nAllPull;
1561 synthwin[0].bsWeight[0] = r[0];
1562 for (i = 1; i < nAllPull; i++)
1564 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1567 /* avoid to have zero weight by adding a tiny value */
1568 for (i = 0; i < nAllPull; i++)
1570 if (synthwin[i].bsWeight[0] < 1e-5)
1572 synthwin[i].bsWeight[0] = 1e-5;
1579 //! The main bootstrapping routine
1580 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1581 char* ylabel, double *profile,
1582 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1584 t_UmbrellaWindow * synthWindow;
1585 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1586 int i, j, *randomArray = 0, winid, pullid, ib;
1587 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1589 gmx_bool bExact = FALSE;
1591 /* init random generator */
1592 if (opt->bsSeed == -1)
1594 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1598 opt->rng = gmx_rng_init(opt->bsSeed);
1601 snew(bsProfile, opt->bins);
1602 snew(bsProfiles_av, opt->bins);
1603 snew(bsProfiles_av2, opt->bins);
1605 /* Create array of all pull groups. Note that different windows
1606 may have different nr of pull groups
1607 First: Get total nr of pull groups */
1609 for (i = 0; i < nWindows; i++)
1611 nAllPull += window[i].nPull;
1613 snew(allPull_winId, nAllPull);
1614 snew(allPull_pullId, nAllPull);
1616 /* Setup one array of all pull groups */
1617 for (i = 0; i < nWindows; i++)
1619 for (j = 0; j < window[i].nPull; j++)
1621 allPull_winId[iAllPull] = i;
1622 allPull_pullId[iAllPull] = j;
1627 /* setup stuff for synthetic windows */
1628 snew(synthWindow, nAllPull);
1629 for (i = 0; i < nAllPull; i++)
1631 synthWindow[i].nPull = 1;
1632 synthWindow[i].nBin = opt->bins;
1633 snew(synthWindow[i].Histo, 1);
1634 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1636 snew(synthWindow[i].Histo[0], opt->bins);
1638 snew(synthWindow[i].N, 1);
1639 snew(synthWindow[i].pos, 1);
1640 snew(synthWindow[i].z, 1);
1641 snew(synthWindow[i].k, 1);
1642 snew(synthWindow[i].bContrib, 1);
1643 snew(synthWindow[i].g, 1);
1644 snew(synthWindow[i].bsWeight, 1);
1647 switch (opt->bsMethod)
1650 snew(randomArray, nAllPull);
1651 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1652 please_cite(stdout, "Hub2006");
1654 case bsMethod_BayesianHist:
1655 /* just copy all histogams into synthWindow array */
1656 for (i = 0; i < nAllPull; i++)
1658 winid = allPull_winId [i];
1659 pullid = allPull_pullId[i];
1660 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1664 case bsMethod_trajGauss:
1665 calc_cumulatives(window, nWindows, opt, fnhist);
1668 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1671 /* do bootstrapping */
1672 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1673 for (ib = 0; ib < opt->nBootStrap; ib++)
1675 printf(" *******************************************\n"
1676 " ******** Start bootstrap nr %d ************\n"
1677 " *******************************************\n", ib+1);
1679 switch (opt->bsMethod)
1682 /* bootstrap complete histograms from given histograms */
1683 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1684 for (i = 0; i < nAllPull; i++)
1686 winid = allPull_winId [randomArray[i]];
1687 pullid = allPull_pullId[randomArray[i]];
1688 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1691 case bsMethod_BayesianHist:
1692 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1693 setRandomBsWeights(synthWindow, nAllPull, opt);
1696 case bsMethod_trajGauss:
1697 /* create new histos from given histos, that is generate new hypothetical
1699 for (i = 0; i < nAllPull; i++)
1701 winid = allPull_winId[i];
1702 pullid = allPull_pullId[i];
1703 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1708 /* write histos in case of verbose output */
1709 if (opt->bs_verbose)
1711 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1718 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1721 if ( (i%opt->stepUpdateContrib) == 0)
1723 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1725 if (maxchange < opt->Tolerance)
1729 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1731 printf("\t%4d) Maximum change %e\n", i, maxchange);
1733 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1736 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1737 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1741 prof_normalization_and_unit(bsProfile, opt);
1744 /* symmetrize profile around z=0 */
1747 symmetrizeProfile(bsProfile, opt);
1750 /* save stuff to get average and stddev */
1751 for (i = 0; i < opt->bins; i++)
1754 bsProfiles_av[i] += tmp;
1755 bsProfiles_av2[i] += tmp*tmp;
1756 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1758 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1762 /* write average and stddev */
1763 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1764 if (output_env_get_print_xvgr_codes(opt->oenv))
1766 fprintf(fp, "@TYPE xydy\n");
1768 for (i = 0; i < opt->bins; i++)
1770 bsProfiles_av [i] /= opt->nBootStrap;
1771 bsProfiles_av2[i] /= opt->nBootStrap;
1772 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1773 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1774 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1777 printf("Wrote boot strap result to %s\n", fnres);
1780 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1781 int whaminFileType(char *fn)
1785 if (strcmp(fn+len-3, "tpr") == 0)
1789 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1791 return whamin_pullxf;
1793 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1799 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1801 return whamin_unknown;
1804 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1805 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1806 t_UmbrellaOptions *opt)
1808 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1809 int nread, sizenow, i, block = 1;
1812 fp = gmx_ffopen(fn, "r");
1815 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1817 if (strlen(tmp) >= WHAM_MAXFILELEN)
1819 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1821 if (nread >= sizenow)
1824 srenew(filename, sizenow);
1825 for (i = sizenow-block; i < sizenow; i++)
1827 snew(filename[i], WHAM_MAXFILELEN);
1830 /* remove newline if there is one */
1831 if (tmp[strlen(tmp)-1] == '\n')
1833 tmp[strlen(tmp)-1] = '\0';
1835 strcpy(filename[nread], tmp);
1838 printf("Found file %s in %s\n", filename[nread], fn);
1842 *filenamesRet = filename;
1846 //! Open a file or a pipe to a gzipped file
1847 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1849 char Buffer[1024], gunzip[1024], *Path = 0;
1851 static gmx_bool bFirst = 1;
1853 /* gzipped pdo file? */
1854 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1856 /* search gunzip executable */
1857 if (!(Path = getenv("GMX_PATH_GZIP")))
1859 if (gmx_fexist("/bin/gunzip"))
1861 sprintf(gunzip, "%s", "/bin/gunzip");
1863 else if (gmx_fexist("/usr/bin/gunzip"))
1865 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1869 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1870 "You may want to define the path to gunzip "
1871 "with the environment variable GMX_PATH_GZIP.", gunzip);
1876 sprintf(gunzip, "%s/gunzip", Path);
1877 if (!gmx_fexist(gunzip))
1879 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1880 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1885 printf("Using gunzig executable %s\n", gunzip);
1888 if (!gmx_fexist(fn))
1890 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1892 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1895 printf("Executing command '%s'\n", Buffer);
1898 if ((pipe = popen(Buffer, "r")) == NULL)
1900 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1903 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1909 pipe = gmx_ffopen(fn, "r");
1916 //! Close file or pipe
1917 void pdo_close_file(FILE *fp)
1926 //! Reading all pdo files
1927 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1928 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1931 real mintmp, maxtmp, done = 0.;
1934 /* char Buffer0[1000]; */
1938 gmx_fatal(FARGS, "No files found. Hick.");
1941 /* if min and max are not given, get min and max from the input files */
1944 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1947 for (i = 0; i < nfiles; ++i)
1949 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1950 /*fgets(Buffer0,999,file);
1951 fprintf(stderr,"First line '%s'\n",Buffer0); */
1952 done = 100.0*(i+1)/nfiles;
1953 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1958 read_pdo_header(file, header, opt);
1959 /* here only determine min and max of this window */
1960 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1961 if (maxtmp > opt->max)
1965 if (mintmp < opt->min)
1971 pdo_close_file(file);
1979 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1980 if (opt->bBoundsOnly)
1982 printf("Found option -boundsonly, now exiting.\n");
1986 /* store stepsize in profile */
1987 opt->dz = (opt->max-opt->min)/opt->bins;
1989 /* Having min and max, we read in all files */
1990 /* Loop over all files */
1991 for (i = 0; i < nfiles; ++i)
1993 done = 100.0*(i+1)/nfiles;
1994 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1999 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2000 read_pdo_header(file, header, opt);
2001 /* load data into window */
2002 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2003 if ((window+i)->Ntot[0] == 0)
2005 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2009 pdo_close_file(file);
2017 for (i = 0; i < nfiles; ++i)
2024 //! translate 0/1 to N/Y to write pull dimensions
2025 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2027 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2028 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2033 static int first = 1;
2035 /* printf("Reading %s \n",fn); */
2036 read_tpx_state(fn, &ir, &state, NULL, NULL);
2040 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2043 /* nr of pull groups */
2045 for (i = 0; i < ir.pull->ncoord; i++)
2047 if (ir.pull->coord[i].eType == epullUMBRELLA)
2053 header->pull_geometry = ir.pull->coord[i].eGeom;
2054 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2059 /* TODO: remove this restriction */
2060 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2063 for (m = 0; m < DIM; m++)
2065 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2067 /* TODO: remove the restriction */
2068 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2071 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2073 /* TODO: remove the restriction */
2074 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2084 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2087 header->npullcrds_tot = ir.pull->ncoord;
2088 header->npullcrds = ncrd;
2089 header->bPrintRef = ir.pull->bPrintCOM1;
2090 if (ir.pull->bPrintCOM2)
2092 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2094 if (ir.pull->bPrintRefValue)
2096 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2098 header->bPrintComp = ir.pull->bPrintComp;
2099 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2100 /* We should add a struct for each pull coord with all pull coord data
2101 instead of allocating many arrays for each property */
2102 snew(header->k, ncrd);
2103 snew(header->init_dist, ncrd);
2104 snew(header->umbInitDist, ncrd);
2106 /* only z-direction with epullgCYL? */
2107 if (header->pull_geometry == epullgCYL)
2109 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2111 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2112 "However, found dimensions [%s %s %s]\n",
2113 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2114 int2YN(header->pull_dim[ZZ]));
2118 for (i = 0; i < ncrd; i++)
2120 header->k[i] = ir.pull->coord[i].k;
2121 if (header->k[i] == 0.0)
2123 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2124 "That doesn't seem to be an Umbrella tpr.\n",
2127 header->init_dist[i] = ir.pull->coord[i].init;
2129 /* initial distance to reference */
2130 switch (header->pull_geometry)
2133 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2137 header->umbInitDist[i] = header->init_dist[i];
2140 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2144 if (opt->verbose || first)
2146 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2147 "\tPull group coordinates%s expected in pullx files.\n",
2148 fn, header->npullcrds, epullg_names[header->pull_geometry],
2149 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2150 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2151 for (i = 0; i < ncrd; i++)
2153 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2156 if (!opt->verbose && first)
2158 printf("\tUse option -v to see this output for all input tpr files\n\n");
2164 //! 2-norm in a ndim-dimensional space
2165 double dist_ndim(double **dx, int ndim, int line)
2169 for (i = 0; i < ndim; i++)
2171 r2 += sqr(dx[i][line]);
2176 //! Read pullx.xvg or pullf.xvg
2177 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2178 t_UmbrellaWindow * window,
2179 t_UmbrellaOptions *opt,
2180 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2181 t_groupselection *groupsel)
2183 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2184 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2185 real min, max, minfound = 1e20, maxfound = -1e20;
2186 gmx_bool dt_ok, timeok, bHaveForce;
2187 const char *quantity;
2188 const int blocklen = 4096;
2190 static gmx_bool bFirst = TRUE;
2193 * Data columns in pull output:
2194 * - in force output pullf.xvg:
2195 * No reference columns, one column per pull coordinate
2197 * - in position output pullx.xvg
2198 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2199 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2203 if (opt->bPullx && header->bPrintComp)
2205 nColPerCrd += header->pull_ndim;
2207 quantity = opt->bPullx ? "position" : "force";
2209 if (opt->bPullx && header->bPrintRef)
2211 nColRefEachCrd = header->pull_ndim;
2218 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2219 bHaveForce = opt->bPullf;
2221 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2222 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2223 Sorry for the laziness, this is a To-do. */
2224 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2227 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2228 "reading \n(option -if) is supported at present, "
2229 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2230 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2231 epullg_names[header->pull_geometry]);
2234 nt = read_xvg(fn, &y, &ny);
2236 /* Check consistency */
2239 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2243 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2244 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2246 /* Since this tool code has not updated yet to allow difference pull
2247 * dimensions per pull coordinate, we can't easily check the exact
2248 * number of expected columns, so we only check what we expect for
2249 * the pull coordinates selected for the WHAM analysis.
2251 printf("Expecting these columns in pull file:\n"
2252 "\t%d reference columns for each individual pull coordinate\n"
2253 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2254 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2256 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2260 if (ny < nColExpect ||
2261 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2263 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2264 "\nMaybe you confused options -ix and -if ?\n",
2265 header->npullcrds, fntpr, ny-1, fn,
2266 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2272 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2282 window->dt = y[0][1]-y[0][0];
2284 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2286 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2289 /* Need to alocate memory and set up structure */
2293 /* Use only groups selected with option -is file */
2294 if (header->npullcrds != groupsel->n)
2296 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2297 header->npullcrds, groupsel->n);
2299 window->nPull = groupsel->nUse;
2303 window->nPull = header->npullcrds;
2306 window->nBin = bins;
2307 snew(window->Histo, window->nPull);
2308 snew(window->z, window->nPull);
2309 snew(window->k, window->nPull);
2310 snew(window->pos, window->nPull);
2311 snew(window->N, window->nPull);
2312 snew(window->Ntot, window->nPull);
2313 snew(window->g, window->nPull);
2314 snew(window->bsWeight, window->nPull);
2315 window->bContrib = 0;
2317 if (opt->bCalcTauInt)
2319 snew(window->ztime, window->nPull);
2323 window->ztime = NULL;
2325 snew(lennow, window->nPull);
2327 for (g = 0; g < window->nPull; ++g)
2330 window->bsWeight[g] = 1.;
2331 snew(window->Histo[g], bins);
2333 window->Ntot[g] = 0;
2335 if (opt->bCalcTauInt)
2337 window->ztime[g] = NULL;
2341 /* Copying umbrella center and force const is more involved since not
2342 all pull groups from header (tpr file) may be used in window variable */
2343 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2345 if (groupsel && (groupsel->bUse[g] == FALSE))
2349 window->k[gUsed] = header->k[g];
2350 window->pos[gUsed] = header->umbInitDist[g];
2355 { /* only determine min and max */
2358 min = max = bins = 0; /* Get rid of warnings */
2362 for (i = 0; i < nt; i++)
2364 /* Do you want that time frame? */
2365 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2367 /* get time step of pdo file and get dstep from opt->dt */
2377 dstep = static_cast<int>(opt->dt/dt+0.5);
2385 window->dt = dt*dstep;
2389 dt_ok = (i%dstep == 0);
2390 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2392 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2393 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2396 /* Note: if groupsel == NULL:
2397 * all groups in pullf/x file are stored in this window, and gUsed == g
2398 * if groupsel != NULL:
2399 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2402 for (g = 0; g < header->npullcrds; ++g)
2404 /* was this group selected for application in WHAM? */
2405 if (groupsel && (groupsel->bUse[g] == FALSE))
2414 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2416 pos = -force/header->k[g] + header->umbInitDist[g];
2420 /* pick the right column from:
2421 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2423 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2427 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2441 if (gUsed >= window->nPull)
2443 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2444 gUsed, window->nPull);
2447 if (opt->bCalcTauInt && !bGetMinMax)
2449 /* save time series for autocorrelation analysis */
2450 ntot = window->Ntot[gUsed];
2451 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2452 if (ntot >= lennow[gUsed])
2454 lennow[gUsed] += blocklen;
2455 srenew(window->ztime[gUsed], lennow[gUsed]);
2457 window->ztime[gUsed][ntot] = pos;
2460 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2465 while ( (ibin += bins) < 0)
2470 else if (ibin >= bins)
2472 while ( (ibin -= bins) >= bins)
2478 if (ibin >= 0 && ibin < bins)
2480 window->Histo[gUsed][ibin] += 1.;
2483 window->Ntot[gUsed]++;
2487 else if (t > opt->tmax)
2491 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2503 for (i = 0; i < ny; i++)
2509 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2510 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2511 t_UmbrellaHeader* header,
2512 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2515 real mintmp, maxtmp;
2517 printf("Reading %d tpr and pullf files\n", nfiles/2);
2519 /* min and max not given? */
2522 printf("Automatic determination of boundaries...\n");
2525 for (i = 0; i < nfiles; i++)
2527 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2529 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2531 read_tpr_header(fnTprs[i], header, opt);
2532 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2534 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2536 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2537 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2538 if (maxtmp > opt->max)
2542 if (mintmp < opt->min)
2547 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2548 if (opt->bBoundsOnly)
2550 printf("Found option -boundsonly, now exiting.\n");
2554 /* store stepsize in profile */
2555 opt->dz = (opt->max-opt->min)/opt->bins;
2557 for (i = 0; i < nfiles; i++)
2559 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2561 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2563 read_tpr_header(fnTprs[i], header, opt);
2564 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2566 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2568 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2569 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2570 if (window[i].Ntot[0] == 0)
2572 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2576 for (i = 0; i < nfiles; i++)
2585 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2587 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2588 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2590 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2592 int nlines, ny, i, ig;
2595 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2596 nlines = read_xvg(fn, &iact, &ny);
2597 if (nlines != nwins)
2599 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2602 for (i = 0; i < nlines; i++)
2604 if (window[i].nPull != ny)
2606 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2607 "number of pull groups is different in different simulations. That is not\n"
2608 "supported yet. Sorry.\n");
2610 for (ig = 0; ig < window[i].nPull; ig++)
2612 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2613 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2615 if (iact[ig][i] <= 0.0)
2617 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2624 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2627 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2628 * that -in case of limited sampling- the ACT may be severely underestimated.
2629 * Note: the g=1+2tau are overwritten.
2630 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2633 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2636 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2638 /* only evaluate within +- 3sigma of the Gausian */
2639 siglim = 3.0*opt->sigSmoothIact;
2640 siglim2 = dsqr(siglim);
2641 /* pre-factor of Gaussian */
2642 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2643 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2645 for (i = 0; i < nwins; i++)
2647 snew(window[i].tausmooth, window[i].nPull);
2648 for (ig = 0; ig < window[i].nPull; ig++)
2652 pos = window[i].pos[ig];
2653 for (j = 0; j < nwins; j++)
2655 for (jg = 0; jg < window[j].nPull; jg++)
2657 dpos2 = dsqr(window[j].pos[jg]-pos);
2658 if (dpos2 < siglim2)
2660 w = gaufact*exp(-dpos2*invtwosig2);
2662 tausm += w*window[j].tau[jg];
2663 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2664 w,dpos2,pos,gaufact,invtwosig2); */
2669 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2671 window[i].tausmooth[ig] = tausm;
2675 window[i].tausmooth[ig] = window[i].tau[ig];
2677 window[i].g[ig] = 1+2*tausm/window[i].dt;
2682 //! Stop integrating autoccorelation function when ACF drops under this value
2683 #define WHAM_AC_ZERO_LIMIT 0.05
2685 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2687 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2688 t_UmbrellaOptions *opt, const char *fn)
2690 int i, ig, ncorr, ntot, j, k, *count, restart;
2691 real *corr, c0, dt, tmp;
2692 real *ztime, av, tausteps;
2693 FILE *fp, *fpcorr = 0;
2697 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2698 "time [ps]", "autocorrelation function", opt->oenv);
2702 for (i = 0; i < nwins; i++)
2704 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2706 ntot = window[i].Ntot[0];
2708 /* using half the maximum time as length of autocorrelation function */
2712 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2713 " points. Provide more pull data!", ntot);
2716 /* snew(corrSq,ncorr); */
2719 snew(window[i].tau, window[i].nPull);
2720 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2726 for (ig = 0; ig < window[i].nPull; ig++)
2728 if (ntot != window[i].Ntot[ig])
2730 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2731 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2733 ztime = window[i].ztime[ig];
2735 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2736 for (j = 0, av = 0; (j < ntot); j++)
2741 for (k = 0; (k < ncorr); k++)
2746 for (j = 0; (j < ntot); j += restart)
2748 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2750 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2752 /* corrSq[k] += tmp*tmp; */
2756 /* divide by nr of frames for each time displacement */
2757 for (k = 0; (k < ncorr); k++)
2759 /* count probably = (ncorr-k+(restart-1))/restart; */
2760 corr[k] = corr[k]/count[k];
2761 /* variance of autocorrelation function */
2762 /* corrSq[k]=corrSq[k]/count[k]; */
2764 /* normalize such that corr[0] == 0 */
2766 for (k = 0; (k < ncorr); k++)
2769 /* corrSq[k]*=c0*c0; */
2772 /* write ACFs in verbose mode */
2775 for (k = 0; (k < ncorr); k++)
2777 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2779 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2782 /* esimate integrated correlation time, fitting is too unstable */
2783 tausteps = 0.5*corr[0];
2784 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2785 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2787 tausteps += corr[j];
2790 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2791 Kumar et al, eq. 28 ff. */
2792 window[i].tau[ig] = tausteps*dt;
2793 window[i].g[ig] = 1+2*tausteps;
2794 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2805 /* plot IACT along reaction coordinate */
2806 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2807 if (output_env_get_print_xvgr_codes(opt->oenv))
2809 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2810 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2811 for (i = 0; i < nwins; i++)
2813 fprintf(fp, "# %3d ", i);
2814 for (ig = 0; ig < window[i].nPull; ig++)
2816 fprintf(fp, " %11g", window[i].tau[ig]);
2821 for (i = 0; i < nwins; i++)
2823 for (ig = 0; ig < window[i].nPull; ig++)
2825 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2828 if (opt->sigSmoothIact > 0.0)
2830 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2831 opt->sigSmoothIact);
2832 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2833 smoothIact(window, nwins, opt);
2834 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2835 if (output_env_get_print_xvgr_codes(opt->oenv))
2837 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2838 fprintf(fp, "@ s1 symbol color 2\n");
2840 for (i = 0; i < nwins; i++)
2842 for (ig = 0; ig < window[i].nPull; ig++)
2844 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2849 printf("Wrote %s\n", fn);
2853 * compute average and sigma of each umbrella histogram
2855 void averageSigma(t_UmbrellaWindow *window, int nwins)
2858 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2860 for (i = 0; i < nwins; i++)
2862 snew(window[i].aver, window[i].nPull);
2863 snew(window[i].sigma, window[i].nPull);
2865 ntot = window[i].Ntot[0];
2866 for (ig = 0; ig < window[i].nPull; ig++)
2868 ztime = window[i].ztime[ig];
2869 for (k = 0, av = 0.; k < ntot; k++)
2874 for (k = 0, sum2 = 0.; k < ntot; k++)
2879 sig = sqrt(sum2/ntot);
2880 window[i].aver[ig] = av;
2882 /* Note: This estimate for sigma is biased from the limited sampling.
2883 Correct sigma by n/(n-1) where n = number of independent
2884 samples. Only possible if IACT is known.
2888 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2889 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2893 window[i].sigma[ig] = sig;
2895 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2902 * Use histograms to compute average force on pull group.
2904 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2906 int i, j, bins = opt->bins, k;
2907 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2910 dz = (max-min)/bins;
2911 ztot = opt->max-min;
2914 /* Compute average displacement from histograms */
2915 for (j = 0; j < nWindows; ++j)
2917 snew(window[j].forceAv, window[j].nPull);
2918 for (k = 0; k < window[j].nPull; ++k)
2922 for (i = 0; i < opt->bins; ++i)
2924 temp = (1.0*i+0.5)*dz+min;
2925 distance = temp - window[j].pos[k];
2927 { /* in cyclic wham: */
2928 if (distance > ztot_half) /* |distance| < ztot_half */
2932 else if (distance < -ztot_half)
2937 w = window[j].Histo[k][i]/window[j].g[k];
2938 displAv += w*distance;
2940 /* Are we near min or max? We are getting wrong forces from the histgrams since
2941 the histograms are zero outside [min,max). Therefore, assume that the position
2942 on the other side of the histomgram center is equally likely. */
2945 posmirrored = window[j].pos[k]-distance;
2946 if (posmirrored >= max || posmirrored < min)
2948 displAv += -w*distance;
2955 /* average force from average displacement */
2956 window[j].forceAv[k] = displAv*window[j].k[k];
2957 /* sigma from average square displacement */
2958 /* window[j].sigma [k] = sqrt(displAv2); */
2959 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2965 * Check if the complete reaction coordinate is covered by the histograms
2967 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2968 t_UmbrellaOptions *opt)
2970 int i, ig, j, bins = opt->bins, bBoundary;
2971 real avcount = 0, z, relcount, *count;
2972 snew(count, opt->bins);
2974 for (j = 0; j < opt->bins; ++j)
2976 for (i = 0; i < nwins; i++)
2978 for (ig = 0; ig < window[i].nPull; ig++)
2980 count[j] += window[i].Histo[ig][j];
2983 avcount += 1.0*count[j];
2986 for (j = 0; j < bins; ++j)
2988 relcount = count[j]/avcount;
2989 z = (j+0.5)*opt->dz+opt->min;
2990 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2991 /* check for bins with no data */
2994 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2995 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2997 /* and check for poor sampling */
2998 else if (relcount < 0.005 && !bBoundary)
3000 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3006 /*! \brief Compute initial potential by integrating the average force
3008 * This speeds up the convergence by roughly a factor of 2
3010 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3012 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3013 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3016 dz = (opt->max-min)/bins;
3018 printf("Getting initial potential by integration.\n");
3020 /* Compute average displacement from histograms */
3021 computeAverageForce(window, nWindows, opt);
3023 /* Get force for each bin from all histograms in this bin, or, alternatively,
3024 if no histograms are inside this bin, from the closest histogram */
3027 for (j = 0; j < opt->bins; ++j)
3029 pos = (1.0*j+0.5)*dz+min;
3033 groupmin = winmin = 0;
3034 for (i = 0; i < nWindows; i++)
3036 for (ig = 0; ig < window[i].nPull; ig++)
3038 hispos = window[i].pos[ig];
3039 dist = fabs(hispos-pos);
3040 /* average force within bin */
3044 fAv += window[i].forceAv[ig];
3046 /* at the same time, remember closest histogram */
3055 /* if no histogram found in this bin, use closest histogram */
3062 fAv = window[winmin].forceAv[groupmin];
3066 for (j = 1; j < opt->bins; ++j)
3068 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3071 /* cyclic wham: linearly correct possible offset */
3074 diff = (pot[bins-1]-pot[0])/(bins-1);
3075 for (j = 1; j < opt->bins; ++j)
3082 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3083 for (j = 0; j < opt->bins; ++j)
3085 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3088 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3091 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3092 energy offsets which are usually determined by wham
3093 First: turn pot into probabilities:
3095 for (j = 0; j < opt->bins; ++j)
3097 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3099 calc_z(pot, window, nWindows, opt, TRUE);
3105 //! Count number of words in an line
3106 static int wordcount(char *ptr)
3111 if (strlen(ptr) == 0)
3115 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3117 for (i = 0; (ptr[i] != '\0'); i++)
3119 is[cur] = isspace(ptr[i]);
3120 if ((i > 0) && (is[cur] && !is[1-cur]))
3129 /*! \brief Read input file for pull group selection (option -is)
3131 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3133 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3136 int i, iline, n, len = STRLEN, temp;
3137 char *ptr = 0, *tmpbuf = 0;
3138 char fmt[1024], fmtign[1024];
3139 int block = 1, sizenow;
3141 fp = gmx_ffopen(opt->fnGroupsel, "r");
3142 opt->groupsel = NULL;
3147 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3152 if (iline >= sizenow)
3155 srenew(opt->groupsel, sizenow);
3157 opt->groupsel[iline].n = n;
3158 opt->groupsel[iline].nUse = 0;
3159 snew(opt->groupsel[iline].bUse, n);
3162 for (i = 0; i < n; i++)
3164 strcpy(fmt, fmtign);
3166 if (sscanf(ptr, fmt, &temp))
3168 opt->groupsel[iline].bUse[i] = (temp > 0);
3169 if (opt->groupsel[iline].bUse[i])
3171 opt->groupsel[iline].nUse++;
3174 strcat(fmtign, "%*s");
3178 opt->nGroupsel = iline;
3179 if (nTpr != opt->nGroupsel)
3181 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3185 printf("\nUse only these pull groups:\n");
3186 for (iline = 0; iline < nTpr; iline++)
3188 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3189 for (i = 0; i < opt->groupsel[iline].n; i++)
3191 if (opt->groupsel[iline].bUse[i])
3204 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3206 //! Number of elements in fnm (used for command line parsing)
3207 #define NFILE asize(fnm)
3209 //! The main g_wham routine
3210 int gmx_wham(int argc, char *argv[])
3212 const char *desc[] = {
3213 "[THISMODULE] is an analysis program that implements the Weighted",
3214 "Histogram Analysis Method (WHAM). It is intended to analyze",
3215 "output files generated by umbrella sampling simulations to ",
3216 "compute a potential of mean force (PMF).[PAR]",
3218 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3219 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3220 "and, if multiple coordinates need to be analyzed, all used the same",
3221 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3223 "At present, three input modes are supported.",
3225 "* With option [TT]-it[tt], the user provides a file which contains the",
3226 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3227 " AND, with option [TT]-ix[tt], a file which contains file names of",
3228 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3229 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3230 " first pullx, etc.",
3231 "* Same as the previous input mode, except that the the user",
3232 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3233 " From the pull force the position in the umbrella potential is",
3234 " computed. This does not work with tabulated umbrella potentials.",
3235 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3236 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3237 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3238 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3239 " must be similar to the following::",
3242 " # Component selection: 0 0 1",
3244 " # Ref. Group 'TestAtom'",
3245 " # Nr. of pull groups 2",
3246 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3247 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3250 " The number of pull groups, umbrella positions, force constants, and names ",
3251 " may (of course) differ. Following the header, a time column and ",
3252 " a data column for each pull group follows (i.e. the displacement",
3253 " with respect to the umbrella center). Up to four pull groups are possible ",
3254 " per [REF].pdo[ref] file at present.",
3256 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3257 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3258 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3259 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3260 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3261 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3262 "used, groupsel.dat looks like this::",
3268 "By default, the output files are",
3270 "* [TT]-o[tt] PMF output file",
3271 "* [TT]-hist[tt] Histograms output file",
3273 "Always check whether the histograms sufficiently overlap.[PAR]",
3274 "The umbrella potential is assumed to be harmonic and the force constants are ",
3275 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3276 "a tabulated potential can be provided with [TT]-tab[tt].",
3281 "* [TT]-bins[tt] Number of bins used in analysis",
3282 "* [TT]-temp[tt] Temperature in the simulations",
3283 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3284 "* [TT]-auto[tt] Automatic determination of boundaries",
3285 "* [TT]-min,-max[tt] Boundaries of the profile",
3287 "The data points that are used to compute the profile",
3288 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3289 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3290 "umbrella window.[PAR]",
3291 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3292 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3293 "With energy output, the energy in the first bin is defined to be zero. ",
3294 "If you want the free energy at a different ",
3295 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3296 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3297 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3298 "[THISMODULE] will make use of the",
3299 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3300 "reaction coordinate will assumed be be neighbors.[PAR]",
3301 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3302 "which may be useful for, e.g. membranes.",
3307 "If available, the number of OpenMP threads used by g_wham is controlled with [TT]-nt[tt].",
3312 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3313 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3314 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3315 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3316 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3317 "Because the IACTs can be severely underestimated in case of limited ",
3318 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3319 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3320 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3321 "integration of the ACFs while the ACFs are larger 0.05.",
3322 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3323 "less robust) method such as fitting to a double exponential, you can ",
3324 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3325 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3326 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3331 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3332 "otherwise the statistical error may be substantially underestimated. ",
3333 "More background and examples for the bootstrap technique can be found in ",
3334 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3335 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3336 "Four bootstrapping methods are supported and ",
3337 "selected with [TT]-bs-method[tt].",
3339 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3340 " data points, and the bootstrap is carried out by assigning random weights to the ",
3341 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3342 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3343 " statistical error is underestimated.",
3344 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3345 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3346 " (allowing duplication, i.e. sampling with replacement).",
3347 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3348 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3349 " divided into blocks and only histograms within each block are mixed. Note that ",
3350 " the histograms within each block must be representative for all possible histograms, ",
3351 " otherwise the statistical error is underestimated.",
3352 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3353 " such that the generated data points are distributed according the given histograms ",
3354 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3355 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3356 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3357 " Note that this method may severely underestimate the error in case of limited sampling, ",
3358 " that is if individual histograms do not represent the complete phase space at ",
3359 " the respective positions.",
3360 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3361 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3362 " and width of the umbrella histograms. That method yields similar error estimates ",
3363 " like method [TT]traj[tt].",
3365 "Bootstrapping output:",
3367 "* [TT]-bsres[tt] Average profile and standard deviations",
3368 "* [TT]-bsprof[tt] All bootstrapping profiles",
3370 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3371 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3375 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3376 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3377 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3378 static t_UmbrellaOptions opt;
3379 static int nthreads = -1;
3383 { "-nt", FALSE, etINT, {&nthreads},
3384 "Number of threads used by g_wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3386 { "-min", FALSE, etREAL, {&opt.min},
3387 "Minimum coordinate in profile"},
3388 { "-max", FALSE, etREAL, {&opt.max},
3389 "Maximum coordinate in profile"},
3390 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3391 "Determine min and max automatically"},
3392 { "-bins", FALSE, etINT, {&opt.bins},
3393 "Number of bins in profile"},
3394 { "-temp", FALSE, etREAL, {&opt.Temperature},
3396 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3398 { "-v", FALSE, etBOOL, {&opt.verbose},
3400 { "-b", FALSE, etREAL, {&opt.tmin},
3401 "First time to analyse (ps)"},
3402 { "-e", FALSE, etREAL, {&opt.tmax},
3403 "Last time to analyse (ps)"},
3404 { "-dt", FALSE, etREAL, {&opt.dt},
3405 "Analyse only every dt ps"},
3406 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3407 "Write histograms and exit"},
3408 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3409 "Determine min and max and exit (with [TT]-auto[tt])"},
3410 { "-log", FALSE, etBOOL, {&opt.bLog},
3411 "Calculate the log of the profile before printing"},
3412 { "-unit", FALSE, etENUM, {en_unit},
3413 "Energy unit in case of log output" },
3414 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3415 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3416 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3417 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3418 { "-sym", FALSE, etBOOL, {&opt.bSym},
3419 "Symmetrize profile around z=0"},
3420 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3421 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3422 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3423 "Calculate integrated autocorrelation times and use in wham"},
3424 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3425 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3426 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3427 "When computing autocorrelation functions, restart computing every .. (ps)"},
3428 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3429 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3430 "during smoothing"},
3431 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3432 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3433 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3434 "Bootstrap method" },
3435 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3436 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3437 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3438 "Seed for bootstrapping. (-1 = use time)"},
3439 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3440 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3441 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3442 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3443 { "-stepout", FALSE, etINT, {&opt.stepchange},
3444 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3445 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3446 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3450 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3451 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3452 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3453 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3454 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3455 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3456 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3457 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3458 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3459 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3460 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3461 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3464 int i, j, l, nfiles, nwins, nfiles2;
3465 t_UmbrellaHeader header;
3466 t_UmbrellaWindow * window = NULL;
3467 double *profile, maxchange = 1e20;
3468 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3469 char **fninTpr, **fninPull, **fninPdo;
3471 FILE *histout, *profout;
3472 char ylabel[256], title[256];
3475 opt.verbose = FALSE;
3476 opt.bHistOnly = FALSE;
3486 /* bootstrapping stuff */
3488 opt.bsMethod = bsMethod_hist;
3489 opt.tauBootStrap = 0.0;
3491 opt.histBootStrapBlockLength = 8;
3492 opt.bs_verbose = FALSE;
3497 opt.Temperature = 298;
3498 opt.Tolerance = 1e-6;
3499 opt.bBoundsOnly = FALSE;
3501 opt.bCalcTauInt = FALSE;
3502 opt.sigSmoothIact = 0.0;
3503 opt.bAllowReduceIact = TRUE;
3504 opt.bInitPotByIntegration = TRUE;
3505 opt.acTrestart = 1.0;
3506 opt.stepchange = 100;
3507 opt.stepUpdateContrib = 100;
3509 if (!parse_common_args(&argc, argv, 0,
3510 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3515 opt.unit = nenum(en_unit);
3516 opt.bsMethod = nenum(en_bsMethod);
3518 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3520 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3521 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3522 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3523 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3524 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3525 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3526 if (opt.bTab && opt.bPullf)
3528 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3529 "Provide pullx.xvg or pdo files!");
3532 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3534 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3536 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3538 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3539 "\n\n Check g_wham -h !");
3542 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3543 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3544 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3545 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3546 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3548 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3549 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3550 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3551 if ( (bMinSet || bMaxSet) && bAutoSet)
3553 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3556 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3558 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3561 if (bMinSet && opt.bAuto)
3563 printf("Note: min and max given, switching off -auto.\n");
3567 if (opt.bTauIntGiven && opt.bCalcTauInt)
3569 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3570 "the autocorrelation times. Not both.");
3573 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3575 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3576 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3578 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3580 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3581 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3584 /* Set # of OpenMP threads */
3587 gmx_omp_set_num_threads(nthreads);
3591 nthreads = gmx_omp_get_max_threads();
3595 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3598 /* Reading gmx4 pull output and tpr files */
3599 if (opt.bTpr || opt.bPullf || opt.bPullx)
3601 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3603 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3604 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3605 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3606 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3607 if (nfiles != nfiles2)
3609 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3610 opt.fnTpr, nfiles2, fnPull);
3613 /* Read file that selects the pull group to be used */
3614 if (opt.fnGroupsel != NULL)
3616 readPullGroupSelection(&opt, fninTpr, nfiles);
3619 window = initUmbrellaWindows(nfiles);
3620 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3623 { /* reading pdo files */
3624 if (opt.fnGroupsel != NULL)
3626 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3627 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3629 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3630 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3631 window = initUmbrellaWindows(nfiles);
3632 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3636 /* enforce equal weight for all histograms? */
3639 enforceEqualWeights(window, nwins);
3642 /* write histograms */
3643 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3644 xlabel, "count", opt.oenv);
3645 for (l = 0; l < opt.bins; ++l)
3647 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3648 for (i = 0; i < nwins; ++i)
3650 for (j = 0; j < window[i].nPull; ++j)
3652 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3655 fprintf(histout, "\n");
3658 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3661 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3665 /* Using tabulated umbrella potential */
3668 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3671 /* Integrated autocorrelation times provided ? */
3672 if (opt.bTauIntGiven)
3674 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3677 /* Compute integrated autocorrelation times */
3678 if (opt.bCalcTauInt)
3680 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3683 /* calc average and sigma for each histogram
3684 (maybe required for bootstrapping. If not, this is fast anyhow) */
3685 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3687 averageSigma(window, nwins);
3690 /* Get initial potential by simple integration */
3691 if (opt.bInitPotByIntegration)
3693 guessPotByIntegration(window, nwins, &opt);
3696 /* Check if complete reaction coordinate is covered */
3697 checkReactionCoordinateCovered(window, nwins, &opt);
3699 /* Calculate profile */
3700 snew(profile, opt.bins);
3708 if ( (i%opt.stepUpdateContrib) == 0)
3710 setup_acc_wham(profile, window, nwins, &opt);
3712 if (maxchange < opt.Tolerance)
3715 /* if (opt.verbose) */
3716 printf("Switched to exact iteration in iteration %d\n", i);
3718 calc_profile(profile, window, nwins, &opt, bExact);
3719 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3721 printf("\t%4d) Maximum change %e\n", i, maxchange);
3725 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3726 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3728 /* calc error from Kumar's formula */
3729 /* Unclear how the error propagates along reaction coordinate, therefore
3731 /* calc_error_kumar(profile,window, nwins,&opt); */
3733 /* Write profile in energy units? */
3736 prof_normalization_and_unit(profile, &opt);
3737 strcpy(ylabel, en_unit_label[opt.unit]);
3738 strcpy(title, "Umbrella potential");
3742 strcpy(ylabel, "Density of states");
3743 strcpy(title, "Density of states");
3746 /* symmetrize profile around z=0? */
3749 symmetrizeProfile(profile, &opt);
3752 /* write profile or density of states */
3753 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3754 for (i = 0; i < opt.bins; ++i)
3756 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3759 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3761 /* Bootstrap Method */
3764 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3765 opt2fn("-hist", NFILE, fnm),
3766 ylabel, profile, window, nwins, &opt);
3770 freeUmbrellaWindows(window, nfiles);
3772 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3773 please_cite(stdout, "Hub2010");