2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/fileio/tpxio.h"
57 #include "gromacs/fileio/xvgr.h"
58 #include "gromacs/gmxana/gmx_ana.h"
59 #include "gromacs/legacyheaders/copyrite.h"
60 #include "gromacs/legacyheaders/macros.h"
61 #include "gromacs/legacyheaders/names.h"
62 #include "gromacs/legacyheaders/typedefs.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/random/random.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/futil.h"
68 #include "gromacs/utility/gmxomp.h"
69 #include "gromacs/utility/smalloc.h"
71 //! longest file names allowed in input files
72 #define WHAM_MAXFILELEN 2048
75 * x-axis legend for output files
77 static const char *xlabel = "\\xx\\f{} (nm)";
80 * enum for energy units
83 enSel, en_kJ, en_kCal, en_kT, enNr
86 * enum for type of input files (pdos, tpr, or pullf)
89 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
92 /*! \brief enum for bootstrapping method
94 * These bootstrap methods are supported:
95 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
96 * (bsMethod_BayesianHist)
97 * - bootstrap complete histograms (bsMethod_hist)
98 * - bootstrap trajectories from given umbrella histograms. This generates new
99 * "synthetic" histograms (bsMethod_traj)
100 * - bootstrap trajectories from Gaussian with mu/sigma computed from
101 * the respective histogram (bsMethod_trajGauss). This gives very similar
102 * results compared to bsMethod_traj.
104 * ********************************************************************
105 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
106 * JS Hub, BL de Groot, D van der Spoel
107 * g_wham - A free weighted histogram analysis implementation including
108 * robust error and autocorrelation estimates,
109 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
110 * ********************************************************************
113 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
114 bsMethod_traj, bsMethod_trajGauss
118 //! Parameters of the umbrella potentials
122 * \name Using umbrella pull code since gromacs 4.x
125 int npullcrds_tot; //!< nr of pull coordinates in tpr file
126 int npullcrds; //!< nr of umbrella pull coordinates for reading
127 int pull_geometry; //!< such as distance, direction
128 ivec pull_dim; //!< pull dimension with geometry distance
129 int pull_ndim; //!< nr of pull_dim != 0
130 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
131 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
132 real *k; //!< force constants in tpr file
133 real *init_dist; //!< reference displacements
134 real *umbInitDist; //!< reference displacement in umbrella direction
137 * \name Using PDO files common until gromacs 3.x
145 char PullName[4][256];
147 double UmbCons[4][3];
151 //! Data in the umbrella histograms
154 int nPull; //!< nr of pull groups in this pdo or pullf/x file
155 double **Histo; //!< nPull histograms
156 double **cum; //!< nPull cumulative distribution functions
157 int nBin; //!< nr of bins. identical to opt->bins
158 double *k; //!< force constants for the nPull groups
159 double *pos; //!< umbrella positions for the nPull groups
160 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
161 int *N; //!< nr of data points in nPull histograms.
162 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
164 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
166 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
167 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
170 double *tau; //!< intetrated autocorrelation time (IACT)
171 double *tausmooth; //!< smoothed IACT
173 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
175 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
177 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
179 /*! \brief average force estimated from average displacement, fAv=dzAv*k
181 * Used for integration to guess the potential.
184 real *aver; //!< average of histograms
185 real *sigma; //!< stddev of histograms
186 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
189 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
192 int n; //!< total nr of pull groups in this tpr file
193 int nUse; //!< nr of pull groups used
194 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
197 //! Parameters of WHAM
204 const char *fnTpr, *fnPullf, *fnGroupsel;
205 const char *fnPdo, *fnPullx; //!< file names of input
206 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
207 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
209 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
210 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
211 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
212 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
215 * \name Basic WHAM options
218 int bins; //!< nr of bins, min, max, and dz of profile
220 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
221 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
224 * \name Output control
227 gmx_bool bLog; //!< energy output (instead of probability) for profile
228 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
229 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
230 /*! \brief after wham, set prof to zero at this z-position.
231 * When bootstrapping, set zProf0 to a "stable" reference position.
234 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
236 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
237 gmx_bool bAuto; //!< determine min and max automatically but do not exit
239 gmx_bool verbose; //!< more noisy wham mode
240 int stepchange; //!< print maximum change in prof after how many interations
241 output_env_t oenv; //!< xvgr options
244 * \name Autocorrelation stuff
247 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
248 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
249 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
250 real acTrestart; //!< when computing ACT, time between restarting points
252 /* \brief Enforce the same weight for each umbella window, that is
253 * calculate with the same number of data points for
254 * each window. That can be reasonable, if the histograms
255 * have different length, but due to autocorrelation,
256 * a longer simulation should not have larger weightin wham.
262 * \name Bootstrapping stuff
265 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
267 /* \brief bootstrap method
269 * if == bsMethod_hist, consider complete histograms as independent
270 * data points and, hence, only mix complete histograms.
271 * if == bsMethod_BayesianHist, consider complete histograms
272 * as independent data points, but assign random weights
273 * to the histograms during the bootstrapping ("Bayesian bootstrap")
274 * In case of long correlations (e.g., inside a channel), these
275 * will yield a more realistic error.
276 * if == bsMethod_traj(Gauss), generate synthetic histograms
278 * histogram by generating an autocorrelated random sequence
279 * that is distributed according to the respective given
280 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
281 * (instead of from the umbrella histogram) to generate a new
286 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
289 /* \brief when mixing histograms, mix only histograms withing blocks
290 long the reaction coordinate xi. Avoids gaps along xi. */
291 int histBootStrapBlockLength;
293 int bsSeed; //!< random seed for bootstrapping
295 /* \brief Write cumulative distribution functions (CDFs) of histograms
296 and write the generated histograms for each bootstrap */
300 * \name tabulated umbrella potential stuff
304 double *tabX, *tabY, tabMin, tabMax, tabDz;
307 gmx_rng_t rng; //!< gromacs random number generator
310 //! Make an umbrella window (may contain several histograms)
311 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
313 t_UmbrellaWindow *win;
316 for (i = 0; i < nwin; i++)
318 win[i].Histo = win[i].cum = 0;
319 win[i].k = win[i].pos = win[i].z = 0;
320 win[i].N = win[i].Ntot = 0;
321 win[i].g = win[i].tau = win[i].tausmooth = 0;
325 win[i].aver = win[i].sigma = 0;
331 //! Delete an umbrella window (may contain several histograms)
332 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
335 for (i = 0; i < nwin; i++)
339 for (j = 0; j < win[i].nPull; j++)
341 sfree(win[i].Histo[j]);
346 for (j = 0; j < win[i].nPull; j++)
348 sfree(win[i].cum[j]);
353 for (j = 0; j < win[i].nPull; j++)
355 sfree(win[i].bContrib[j]);
367 sfree(win[i].tausmooth);
368 sfree(win[i].bContrib);
370 sfree(win[i].forceAv);
373 sfree(win[i].bsWeight);
379 * Read and setup tabulated umbrella potential
381 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
386 printf("Setting up tabulated potential from file %s\n", fn);
387 nl = read_xvg(fn, &y, &ny);
391 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
393 opt->tabMin = y[0][0];
394 opt->tabMax = y[0][nl-1];
395 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
398 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
399 "ascending z-direction", fn);
401 for (i = 0; i < nl-1; i++)
403 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
405 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
410 for (i = 0; i < nl; i++)
412 opt->tabX[i] = y[0][i];
413 opt->tabY[i] = y[1][i];
415 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
416 opt->tabMin, opt->tabMax, opt->tabDz);
419 //! Read the header of an PDO file (position, force const, nr of groups)
420 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
423 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
425 std::istringstream ist;
428 if (fgets(line, 2048, file) == NULL)
430 gmx_fatal(FARGS, "Error reading header from pdo file\n");
433 ist >> Buffer0 >> Buffer1 >> Buffer2;
434 if (strcmp(Buffer1, "UMBRELLA"))
436 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
437 "(Found in first line: `%s')\n",
438 Buffer1, "UMBRELLA", line);
440 if (strcmp(Buffer2, "3.0"))
442 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
446 if (fgets(line, 2048, file) == NULL)
448 gmx_fatal(FARGS, "Error reading header from pdo file\n");
451 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
452 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
454 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
455 if (header->nDim != 1)
457 gmx_fatal(FARGS, "Currently only supports one dimension");
461 if (fgets(line, 2048, file) == NULL)
463 gmx_fatal(FARGS, "Error reading header from pdo file\n");
466 ist >> Buffer0 >> Buffer1 >> header->nSkip;
469 if (fgets(line, 2048, file) == NULL)
471 gmx_fatal(FARGS, "Error reading header from pdo file\n");
474 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
477 if (fgets(line, 2048, file) == NULL)
479 gmx_fatal(FARGS, "Error reading header from pdo file\n");
482 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
486 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
490 for (i = 0; i < header->nPull; ++i)
492 if (fgets(line, 2048, file) == NULL)
494 gmx_fatal(FARGS, "Error reading header from pdo file\n");
497 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
498 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
499 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
503 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
504 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
508 if (fgets(line, 2048, file) == NULL)
510 gmx_fatal(FARGS, "Cannot read from file\n");
514 if (strcmp(Buffer3, "#####") != 0)
516 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
521 static char *fgets3(FILE *fp, char ptr[], int *len)
526 if (fgets(ptr, *len-1, fp) == NULL)
531 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
533 /* This line is longer than len characters, let's increase len! */
537 if (fgets(p-1, STRLEN, fp) == NULL)
543 if (ptr[slen-1] == '\n')
551 /*! \brief Read the data columns of and PDO file.
553 * TO DO: Get rid of the scanf function to avoid the clang warning.
554 * At the moment, this warning is avoided by hiding the format string
555 * the variable fmtlf.
557 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
558 int fileno, t_UmbrellaWindow * win,
559 t_UmbrellaOptions *opt,
560 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
562 int i, inttemp, bins, count, ntot;
563 real min, max, minfound = 1e20, maxfound = -1e20;
564 double temp, time, time0 = 0, dt;
566 t_UmbrellaWindow * window = 0;
567 gmx_bool timeok, dt_ok = 1;
568 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
569 int len = STRLEN, dstep = 1;
570 const int blocklen = 4096;
580 /* Need to alocate memory and set up structure */
581 window->nPull = header->nPull;
584 snew(window->Histo, window->nPull);
585 snew(window->z, window->nPull);
586 snew(window->k, window->nPull);
587 snew(window->pos, window->nPull);
588 snew(window->N, window->nPull);
589 snew(window->Ntot, window->nPull);
590 snew(window->g, window->nPull);
591 snew(window->bsWeight, window->nPull);
593 window->bContrib = 0;
595 if (opt->bCalcTauInt)
597 snew(window->ztime, window->nPull);
603 snew(lennow, window->nPull);
605 for (i = 0; i < window->nPull; ++i)
608 window->bsWeight[i] = 1.;
609 snew(window->Histo[i], bins);
610 window->k[i] = header->UmbCons[i][0];
611 window->pos[i] = header->UmbPos[i][0];
615 if (opt->bCalcTauInt)
617 window->ztime[i] = 0;
621 /* Done with setup */
627 min = max = bins = 0; /* Get rid of warnings */
632 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
636 if (ptr[0] == '#' || strlen(ptr) < 2)
641 /* Initiate format string */
643 strcat(fmtign, "%*s");
645 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
646 /* Round time to fs */
647 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
649 /* get time step of pdo file */
659 dstep = static_cast<int>(opt->dt/dt+0.5);
667 window->dt = dt*dstep;
672 dt_ok = ((count-1)%dstep == 0);
673 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
675 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
676 time,opt->tmin, opt->tmax, dt_ok,timeok); */
680 for (i = 0; i < header->nPull; ++i)
683 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
684 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
685 if (sscanf(ptr, fmt, &temp))
687 temp += header->UmbPos[i][0];
701 if (opt->bCalcTauInt)
703 /* save time series for autocorrelation analysis */
704 ntot = window->Ntot[i];
705 if (ntot >= lennow[i])
707 lennow[i] += blocklen;
708 srenew(window->ztime[i], lennow[i]);
710 window->ztime[i][ntot] = temp;
718 inttemp = static_cast<int> (temp);
725 else if (inttemp >= bins)
731 if (inttemp >= 0 && inttemp < bins)
733 window->Histo[i][inttemp] += 1.;
741 if (time > opt->tmax)
745 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
761 /*! \brief Set identical weights for all histograms
763 * Normally, the weight is given by the number data points in each
764 * histogram, together with the autocorrelation time. This can be overwritten
765 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
766 * an appropriate autocorrelation times instead of using this function.
768 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
770 int i, k, j, NEnforced;
773 NEnforced = window[0].Ntot[0];
774 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
775 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
776 /* enforce all histograms to have the same weight as the very first histogram */
778 for (j = 0; j < nWindows; ++j)
780 for (k = 0; k < window[j].nPull; ++k)
782 ratio = 1.0*NEnforced/window[j].Ntot[k];
783 for (i = 0; i < window[0].nBin; ++i)
785 window[j].Histo[k][i] *= ratio;
787 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
792 /*! \brief Simple linear interpolation between two given tabulated points
794 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
797 double pl, pu, dz, dp;
799 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
801 if (jl < 0 || ju >= opt->tabNbins)
803 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
804 "Provide an extended table.", dist, jl, ju);
808 dz = dist-opt->tabX[jl];
809 dp = (pu-pl)*dz/opt->tabDz;
815 * Check which bins substiantially contribute (accelerates WHAM)
817 * Don't worry, that routine does not mean we compute the PMF in limited precision.
818 * After rapid convergence (using only substiantal contributions), we always switch to
821 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
822 t_UmbrellaOptions *opt)
824 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
825 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
826 gmx_bool bAnyContrib;
827 static int bFirst = 1;
828 static double wham_contrib_lim;
832 for (i = 0; i < nWindows; ++i)
834 nGrptot += window[i].nPull;
836 wham_contrib_lim = opt->Tolerance/nGrptot;
839 ztot = opt->max-opt->min;
842 for (i = 0; i < nWindows; ++i)
844 if (!window[i].bContrib)
846 snew(window[i].bContrib, window[i].nPull);
848 for (j = 0; j < window[i].nPull; ++j)
850 if (!window[i].bContrib[j])
852 snew(window[i].bContrib[j], opt->bins);
855 for (k = 0; k < opt->bins; ++k)
857 temp = (1.0*k+0.5)*dz+min;
858 distance = temp - window[i].pos[j]; /* distance to umbrella center */
860 { /* in cyclic wham: */
861 if (distance > ztot_half) /* |distance| < ztot_half */
865 else if (distance < -ztot_half)
870 /* Note: there are two contributions to bin k in the wham equations:
871 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
872 ii) exp(- U/(8.314e-3*opt->Temperature))
873 where U is the umbrella potential
874 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
879 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
883 U = tabulated_pot(distance, opt); /* Use tabulated potential */
885 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
886 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
887 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
888 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
889 if (window[i].bContrib[j][k])
895 /* If this histo is far outside min and max all bContrib may be FALSE,
896 causing a floating point exception later on. To avoid that, switch
900 for (k = 0; k < opt->bins; ++k)
902 window[i].bContrib[j][k] = TRUE;
909 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
910 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
915 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
921 //! Compute the PMF (one of the two main WHAM routines)
922 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
923 t_UmbrellaOptions *opt, gmx_bool bExact)
925 double ztot_half, ztot, min = opt->min, dz = opt->dz;
927 ztot = opt->max-opt->min;
932 int nthreads = gmx_omp_get_max_threads();
933 int thread_id = gmx_omp_get_thread_num();
935 int i0 = thread_id*opt->bins/nthreads;
936 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
938 for (i = i0; i < i1; ++i)
941 double num, denom, invg, temp = 0, distance, U = 0;
943 for (j = 0; j < nWindows; ++j)
945 for (k = 0; k < window[j].nPull; ++k)
947 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
948 temp = (1.0*i+0.5)*dz+min;
949 num += invg*window[j].Histo[k][i];
951 if (!(bExact || window[j].bContrib[k][i]))
955 distance = temp - window[j].pos[k]; /* distance to umbrella center */
957 { /* in cyclic wham: */
958 if (distance > ztot_half) /* |distance| < ztot_half */
962 else if (distance < -ztot_half)
970 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
974 U = tabulated_pot(distance, opt); /* Use tabulated potential */
976 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
979 profile[i] = num/denom;
984 //! Compute the free energy offsets z (one of the two main WHAM routines)
985 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
986 t_UmbrellaOptions *opt, gmx_bool bExact)
988 double min = opt->min, dz = opt->dz, ztot_half, ztot;
989 double maxglob = -1e20;
991 ztot = opt->max-opt->min;
996 int nthreads = gmx_omp_get_max_threads();
997 int thread_id = gmx_omp_get_thread_num();
999 int i0 = thread_id*nWindows/nthreads;
1000 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1001 double maxloc = -1e20;
1003 for (i = i0; i < i1; ++i)
1005 double total = 0, temp, distance, U = 0;
1008 for (j = 0; j < window[i].nPull; ++j)
1011 for (k = 0; k < window[i].nBin; ++k)
1013 if (!(bExact || window[i].bContrib[j][k]))
1017 temp = (1.0*k+0.5)*dz+min;
1018 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1020 { /* in cyclic wham: */
1021 if (distance > ztot_half) /* |distance| < ztot_half */
1025 else if (distance < -ztot_half)
1033 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1037 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1039 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1041 /* Avoid floating point exception if window is far outside min and max */
1044 total = -log(total);
1050 temp = fabs(total - window[i].z[j]);
1055 window[i].z[j] = total;
1058 /* Now get maximum maxloc from the threads and put in maxglob */
1059 if (maxloc > maxglob)
1061 #pragma omp critical
1063 if (maxloc > maxglob)
1074 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1075 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1077 int i, j, bins = opt->bins;
1078 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1081 if (min > 0. || max < 0.)
1083 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1084 opt->min, opt->max);
1089 for (i = 0; i < bins; i++)
1093 /* bin left of zsym */
1094 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1095 if (j >= 0 && (j+1) < bins)
1097 /* interpolate profile linearly between bins j and j+1 */
1098 z1 = min+(j+0.5)*dz;
1100 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1101 /* average between left and right */
1102 prof2[i] = 0.5*(profsym+profile[i]);
1106 prof2[i] = profile[i];
1110 memcpy(profile, prof2, bins*sizeof(double));
1114 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1115 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1118 double unit_factor = 1., R_MolarGasConst, diff;
1120 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1123 /* Not log? Nothing to do! */
1129 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1130 if (opt->unit == en_kT)
1134 else if (opt->unit == en_kJ)
1136 unit_factor = R_MolarGasConst*opt->Temperature;
1138 else if (opt->unit == en_kCal)
1140 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1144 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1147 for (i = 0; i < bins; i++)
1149 if (profile[i] > 0.0)
1151 profile[i] = -log(profile[i])*unit_factor;
1155 /* shift to zero at z=opt->zProf0 */
1156 if (!opt->bProf0Set)
1162 /* Get bin with shortest distance to opt->zProf0
1163 (-0.5 from bin position and +0.5 from rounding cancel) */
1164 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1169 else if (imin >= bins)
1173 diff = profile[imin];
1177 for (i = 0; i < bins; i++)
1183 //! Make an array of random integers (used for bootstrapping)
1184 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1186 int ipull, blockBase, nr, ipullRandom;
1188 if (blockLength == 0)
1190 blockLength = nPull;
1193 for (ipull = 0; ipull < nPull; ipull++)
1195 blockBase = (ipull/blockLength)*blockLength;
1197 { /* make sure nothing bad happens in the last block */
1198 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1199 ipullRandom = blockBase + nr;
1201 while (ipullRandom >= nPull);
1202 if (ipullRandom < 0 || ipullRandom >= nPull)
1204 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1205 "blockLength = %d, blockBase = %d\n",
1206 ipullRandom, nPull, nr, blockLength, blockBase);
1208 randomArray[ipull] = ipullRandom;
1210 /*for (ipull=0; ipull<nPull; ipull++)
1211 printf("%d ",randomArray[ipull]); printf("\n"); */
1214 /*! \brief Set pull group information of a synthetic histogram
1216 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1217 * but it is not required if we bootstrap complete histograms.
1219 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1220 t_UmbrellaWindow *thisWindow, int pullid)
1222 synthWindow->N [0] = thisWindow->N [pullid];
1223 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1224 synthWindow->pos [0] = thisWindow->pos [pullid];
1225 synthWindow->z [0] = thisWindow->z [pullid];
1226 synthWindow->k [0] = thisWindow->k [pullid];
1227 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1228 synthWindow->g [0] = thisWindow->g [pullid];
1229 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1232 /*! \brief Calculate cumulative distribution function of of all histograms.
1234 * This allow to create random number sequences
1235 * which are distributed according to the histograms. Required to generate
1236 * the "synthetic" histograms for the Bootstrap method
1238 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1239 t_UmbrellaOptions *opt, const char *fnhist)
1243 char *fn = 0, *buf = 0;
1246 if (opt->bs_verbose)
1248 snew(fn, strlen(fnhist)+10);
1249 snew(buf, strlen(fnhist)+10);
1250 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1251 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1255 for (i = 0; i < nWindows; i++)
1257 snew(window[i].cum, window[i].nPull);
1258 for (j = 0; j < window[i].nPull; j++)
1260 snew(window[i].cum[j], nbin+1);
1261 window[i].cum[j][0] = 0.;
1262 for (k = 1; k <= nbin; k++)
1264 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1267 /* normalize CDFs. Ensure cum[nbin]==1 */
1268 last = window[i].cum[j][nbin];
1269 for (k = 0; k <= nbin; k++)
1271 window[i].cum[j][k] /= last;
1276 printf("Cumulative distriubtion functions of all histograms created.\n");
1277 if (opt->bs_verbose)
1279 for (k = 0; k <= nbin; k++)
1281 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1282 for (i = 0; i < nWindows; i++)
1284 for (j = 0; j < window[i].nPull; j++)
1286 fprintf(fp, "%g\t", window[i].cum[j][k]);
1291 printf("Wrote cumulative distribution functions to %s\n", fn);
1299 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1301 * This is used to generate a random sequence distributed according to a histogram
1303 void searchCumulative(double xx[], int n, double x, int *j)
1325 else if (x == xx[n-1])
1335 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1336 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1337 int pullid, t_UmbrellaOptions *opt)
1339 int N, i, nbins, r_index, ibin;
1340 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1343 N = thisWindow->N[pullid];
1344 dt = thisWindow->dt;
1345 nbins = thisWindow->nBin;
1347 /* tau = autocorrelation time */
1348 if (opt->tauBootStrap > 0.0)
1350 tausteps = opt->tauBootStrap/dt;
1352 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1354 /* calc tausteps from g=1+2tausteps */
1355 g = thisWindow->g[pullid];
1361 "When generating hypothetical trajctories from given umbrella histograms,\n"
1362 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1363 "cannot be predicted. You have 3 options:\n"
1364 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1365 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1367 " with option -iiact for all umbrella windows.\n"
1368 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1369 " Use option (3) only if you are sure what you're doing, you may severely\n"
1370 " underestimate the error if a too small ACT is given.\n");
1371 gmx_fatal(FARGS, errstr);
1374 synthWindow->N [0] = N;
1375 synthWindow->pos [0] = thisWindow->pos[pullid];
1376 synthWindow->z [0] = thisWindow->z[pullid];
1377 synthWindow->k [0] = thisWindow->k[pullid];
1378 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1379 synthWindow->g [0] = thisWindow->g [pullid];
1380 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1382 for (i = 0; i < nbins; i++)
1384 synthWindow->Histo[0][i] = 0.;
1387 if (opt->bsMethod == bsMethod_trajGauss)
1389 sig = thisWindow->sigma [pullid];
1390 mu = thisWindow->aver [pullid];
1393 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1395 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1397 z = a*x + sqrt(1-a^2)*y
1398 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1399 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1401 C(t) = exp(-t/tau) with tau=-1/ln(a)
1403 Then, use error function to turn the Gaussian random variable into a uniformly
1404 distributed one in [0,1]. Eventually, use cumulative distribution function of
1405 histogram to get random variables distributed according to histogram.
1406 Note: The ACT of the flat distribution and of the generated histogram is not
1407 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1409 a = exp(-1.0/tausteps);
1411 invsqrt2 = 1./sqrt(2.0);
1413 /* init random sequence */
1414 x = gmx_rng_gaussian_table(opt->rng);
1416 if (opt->bsMethod == bsMethod_traj)
1418 /* bootstrap points from the umbrella histograms */
1419 for (i = 0; i < N; i++)
1421 y = gmx_rng_gaussian_table(opt->rng);
1423 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1424 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1426 r = 0.5*(1+gmx_erf(x*invsqrt2));
1427 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1428 synthWindow->Histo[0][r_index] += 1.;
1431 else if (opt->bsMethod == bsMethod_trajGauss)
1433 /* bootstrap points from a Gaussian with the same average and sigma
1434 as the respective umbrella histogram. The idea was, that -given
1435 limited sampling- the bootstrapped histograms are otherwise biased
1436 from the limited sampling of the US histos. However, bootstrapping from
1437 the Gaussian seems to yield a similar estimate. */
1441 y = gmx_rng_gaussian_table(opt->rng);
1444 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1449 while ( (ibin += nbins) < 0)
1454 else if (ibin >= nbins)
1456 while ( (ibin -= nbins) >= nbins)
1463 if (ibin >= 0 && ibin < nbins)
1465 synthWindow->Histo[0][ibin] += 1.;
1472 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1476 /*! \brief Write all histograms to a file
1478 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1479 * sets of bootstrapped histograms.
1481 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1482 int bs_index, t_UmbrellaOptions *opt)
1484 char *fn = 0, *buf = 0, title[256];
1490 snew(fn, strlen(fnhist)+10);
1491 snew(buf, strlen(fnhist)+1);
1492 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1493 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1497 fn = gmx_strdup(fnhist);
1498 strcpy(title, "Umbrella histograms");
1501 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1504 /* Write histograms */
1505 for (l = 0; l < bins; ++l)
1507 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1508 for (i = 0; i < nWindows; ++i)
1510 for (j = 0; j < window[i].nPull; ++j)
1512 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1519 printf("Wrote %s\n", fn);
1527 //! Used for qsort to sort random numbers
1528 int func_wham_is_larger(const void *a, const void *b)
1547 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1548 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1555 /* generate ordered random numbers between 0 and nAllPull */
1556 for (i = 0; i < nAllPull-1; i++)
1558 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1560 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1561 r[nAllPull-1] = 1.0*nAllPull;
1563 synthwin[0].bsWeight[0] = r[0];
1564 for (i = 1; i < nAllPull; i++)
1566 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1569 /* avoid to have zero weight by adding a tiny value */
1570 for (i = 0; i < nAllPull; i++)
1572 if (synthwin[i].bsWeight[0] < 1e-5)
1574 synthwin[i].bsWeight[0] = 1e-5;
1581 //! The main bootstrapping routine
1582 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1583 char* ylabel, double *profile,
1584 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1586 t_UmbrellaWindow * synthWindow;
1587 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1588 int i, j, *randomArray = 0, winid, pullid, ib;
1589 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1591 gmx_bool bExact = FALSE;
1593 /* init random generator */
1594 if (opt->bsSeed == -1)
1596 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1600 opt->rng = gmx_rng_init(opt->bsSeed);
1603 snew(bsProfile, opt->bins);
1604 snew(bsProfiles_av, opt->bins);
1605 snew(bsProfiles_av2, opt->bins);
1607 /* Create array of all pull groups. Note that different windows
1608 may have different nr of pull groups
1609 First: Get total nr of pull groups */
1611 for (i = 0; i < nWindows; i++)
1613 nAllPull += window[i].nPull;
1615 snew(allPull_winId, nAllPull);
1616 snew(allPull_pullId, nAllPull);
1618 /* Setup one array of all pull groups */
1619 for (i = 0; i < nWindows; i++)
1621 for (j = 0; j < window[i].nPull; j++)
1623 allPull_winId[iAllPull] = i;
1624 allPull_pullId[iAllPull] = j;
1629 /* setup stuff for synthetic windows */
1630 snew(synthWindow, nAllPull);
1631 for (i = 0; i < nAllPull; i++)
1633 synthWindow[i].nPull = 1;
1634 synthWindow[i].nBin = opt->bins;
1635 snew(synthWindow[i].Histo, 1);
1636 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1638 snew(synthWindow[i].Histo[0], opt->bins);
1640 snew(synthWindow[i].N, 1);
1641 snew(synthWindow[i].pos, 1);
1642 snew(synthWindow[i].z, 1);
1643 snew(synthWindow[i].k, 1);
1644 snew(synthWindow[i].bContrib, 1);
1645 snew(synthWindow[i].g, 1);
1646 snew(synthWindow[i].bsWeight, 1);
1649 switch (opt->bsMethod)
1652 snew(randomArray, nAllPull);
1653 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1654 please_cite(stdout, "Hub2006");
1656 case bsMethod_BayesianHist:
1657 /* just copy all histogams into synthWindow array */
1658 for (i = 0; i < nAllPull; i++)
1660 winid = allPull_winId [i];
1661 pullid = allPull_pullId[i];
1662 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1666 case bsMethod_trajGauss:
1667 calc_cumulatives(window, nWindows, opt, fnhist);
1670 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1673 /* do bootstrapping */
1674 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1675 for (ib = 0; ib < opt->nBootStrap; ib++)
1677 printf(" *******************************************\n"
1678 " ******** Start bootstrap nr %d ************\n"
1679 " *******************************************\n", ib+1);
1681 switch (opt->bsMethod)
1684 /* bootstrap complete histograms from given histograms */
1685 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1686 for (i = 0; i < nAllPull; i++)
1688 winid = allPull_winId [randomArray[i]];
1689 pullid = allPull_pullId[randomArray[i]];
1690 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1693 case bsMethod_BayesianHist:
1694 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1695 setRandomBsWeights(synthWindow, nAllPull, opt);
1698 case bsMethod_trajGauss:
1699 /* create new histos from given histos, that is generate new hypothetical
1701 for (i = 0; i < nAllPull; i++)
1703 winid = allPull_winId[i];
1704 pullid = allPull_pullId[i];
1705 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1710 /* write histos in case of verbose output */
1711 if (opt->bs_verbose)
1713 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1720 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1723 if ( (i%opt->stepUpdateContrib) == 0)
1725 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1727 if (maxchange < opt->Tolerance)
1731 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1733 printf("\t%4d) Maximum change %e\n", i, maxchange);
1735 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1738 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1739 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1743 prof_normalization_and_unit(bsProfile, opt);
1746 /* symmetrize profile around z=0 */
1749 symmetrizeProfile(bsProfile, opt);
1752 /* save stuff to get average and stddev */
1753 for (i = 0; i < opt->bins; i++)
1756 bsProfiles_av[i] += tmp;
1757 bsProfiles_av2[i] += tmp*tmp;
1758 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1760 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1764 /* write average and stddev */
1765 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1766 if (output_env_get_print_xvgr_codes(opt->oenv))
1768 fprintf(fp, "@TYPE xydy\n");
1770 for (i = 0; i < opt->bins; i++)
1772 bsProfiles_av [i] /= opt->nBootStrap;
1773 bsProfiles_av2[i] /= opt->nBootStrap;
1774 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1775 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1776 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1779 printf("Wrote boot strap result to %s\n", fnres);
1782 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1783 int whaminFileType(char *fn)
1787 if (strcmp(fn+len-3, "tpr") == 0)
1791 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1793 return whamin_pullxf;
1795 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1801 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1803 return whamin_unknown;
1806 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1807 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1808 t_UmbrellaOptions *opt)
1810 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1811 int nread, sizenow, i, block = 1;
1814 fp = gmx_ffopen(fn, "r");
1817 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1819 if (strlen(tmp) >= WHAM_MAXFILELEN)
1821 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1823 if (nread >= sizenow)
1826 srenew(filename, sizenow);
1827 for (i = sizenow-block; i < sizenow; i++)
1829 snew(filename[i], WHAM_MAXFILELEN);
1832 /* remove newline if there is one */
1833 if (tmp[strlen(tmp)-1] == '\n')
1835 tmp[strlen(tmp)-1] = '\0';
1837 strcpy(filename[nread], tmp);
1840 printf("Found file %s in %s\n", filename[nread], fn);
1844 *filenamesRet = filename;
1848 //! Open a file or a pipe to a gzipped file
1849 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1851 char Buffer[1024], gunzip[1024], *Path = 0;
1853 static gmx_bool bFirst = 1;
1855 /* gzipped pdo file? */
1856 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1858 /* search gunzip executable */
1859 if (!(Path = getenv("GMX_PATH_GZIP")))
1861 if (gmx_fexist("/bin/gunzip"))
1863 sprintf(gunzip, "%s", "/bin/gunzip");
1865 else if (gmx_fexist("/usr/bin/gunzip"))
1867 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1871 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1872 "You may want to define the path to gunzip "
1873 "with the environment variable GMX_PATH_GZIP.", gunzip);
1878 sprintf(gunzip, "%s/gunzip", Path);
1879 if (!gmx_fexist(gunzip))
1881 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1882 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1887 printf("Using gunzig executable %s\n", gunzip);
1890 if (!gmx_fexist(fn))
1892 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1894 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1897 printf("Executing command '%s'\n", Buffer);
1900 if ((pipe = popen(Buffer, "r")) == NULL)
1902 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1905 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1911 pipe = gmx_ffopen(fn, "r");
1918 //! Close file or pipe
1919 void pdo_close_file(FILE *fp)
1928 //! Reading all pdo files
1929 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1930 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1933 real mintmp, maxtmp, done = 0.;
1936 /* char Buffer0[1000]; */
1940 gmx_fatal(FARGS, "No files found. Hick.");
1943 /* if min and max are not given, get min and max from the input files */
1946 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1949 for (i = 0; i < nfiles; ++i)
1951 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1952 /*fgets(Buffer0,999,file);
1953 fprintf(stderr,"First line '%s'\n",Buffer0); */
1954 done = 100.0*(i+1)/nfiles;
1955 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1960 read_pdo_header(file, header, opt);
1961 /* here only determine min and max of this window */
1962 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1963 if (maxtmp > opt->max)
1967 if (mintmp < opt->min)
1973 pdo_close_file(file);
1981 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1982 if (opt->bBoundsOnly)
1984 printf("Found option -boundsonly, now exiting.\n");
1988 /* store stepsize in profile */
1989 opt->dz = (opt->max-opt->min)/opt->bins;
1991 /* Having min and max, we read in all files */
1992 /* Loop over all files */
1993 for (i = 0; i < nfiles; ++i)
1995 done = 100.0*(i+1)/nfiles;
1996 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2001 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2002 read_pdo_header(file, header, opt);
2003 /* load data into window */
2004 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2005 if ((window+i)->Ntot[0] == 0)
2007 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2011 pdo_close_file(file);
2019 for (i = 0; i < nfiles; ++i)
2026 //! translate 0/1 to N/Y to write pull dimensions
2027 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2029 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2030 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2035 static int first = 1;
2037 /* printf("Reading %s \n",fn); */
2038 read_tpx_state(fn, &ir, &state, NULL, NULL);
2042 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2045 /* nr of pull groups */
2047 for (i = 0; i < ir.pull->ncoord; i++)
2049 if (ir.pull->coord[i].eType == epullUMBRELLA)
2055 header->pull_geometry = ir.pull->coord[i].eGeom;
2056 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2061 /* TODO: remove this restriction */
2062 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2065 for (m = 0; m < DIM; m++)
2067 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2069 /* TODO: remove the restriction */
2070 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2073 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2075 /* TODO: remove the restriction */
2076 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2086 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2089 header->npullcrds_tot = ir.pull->ncoord;
2090 header->npullcrds = ncrd;
2091 header->bPrintRef = ir.pull->bPrintCOM1;
2092 if (ir.pull->bPrintCOM2)
2094 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2096 if (ir.pull->bPrintRefValue)
2098 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2100 header->bPrintComp = ir.pull->bPrintComp;
2101 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2102 /* We should add a struct for each pull coord with all pull coord data
2103 instead of allocating many arrays for each property */
2104 snew(header->k, ncrd);
2105 snew(header->init_dist, ncrd);
2106 snew(header->umbInitDist, ncrd);
2108 /* only z-direction with epullgCYL? */
2109 if (header->pull_geometry == epullgCYL)
2111 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2113 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2114 "However, found dimensions [%s %s %s]\n",
2115 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2116 int2YN(header->pull_dim[ZZ]));
2120 for (i = 0; i < ncrd; i++)
2122 header->k[i] = ir.pull->coord[i].k;
2123 if (header->k[i] == 0.0)
2125 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2126 "That doesn't seem to be an Umbrella tpr.\n",
2129 header->init_dist[i] = ir.pull->coord[i].init;
2131 /* initial distance to reference */
2132 switch (header->pull_geometry)
2135 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2139 header->umbInitDist[i] = header->init_dist[i];
2142 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2146 if (opt->verbose || first)
2148 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2149 "\tPull group coordinates%s expected in pullx files.\n",
2150 fn, header->npullcrds, epullg_names[header->pull_geometry],
2151 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2152 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2153 for (i = 0; i < ncrd; i++)
2155 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2158 if (!opt->verbose && first)
2160 printf("\tUse option -v to see this output for all input tpr files\n\n");
2166 //! 2-norm in a ndim-dimensional space
2167 double dist_ndim(double **dx, int ndim, int line)
2171 for (i = 0; i < ndim; i++)
2173 r2 += sqr(dx[i][line]);
2178 //! Read pullx.xvg or pullf.xvg
2179 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2180 t_UmbrellaWindow * window,
2181 t_UmbrellaOptions *opt,
2182 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2183 t_groupselection *groupsel)
2185 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2186 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2187 real min, max, minfound = 1e20, maxfound = -1e20;
2188 gmx_bool dt_ok, timeok, bHaveForce;
2189 const char *quantity;
2190 const int blocklen = 4096;
2192 static gmx_bool bFirst = TRUE;
2195 * Data columns in pull output:
2196 * - in force output pullf.xvg:
2197 * No reference columns, one column per pull coordinate
2199 * - in position output pullx.xvg
2200 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2201 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2205 if (opt->bPullx && header->bPrintComp)
2207 nColPerCrd += header->pull_ndim;
2209 quantity = opt->bPullx ? "position" : "force";
2211 if (opt->bPullx && header->bPrintRef)
2213 nColRefEachCrd = header->pull_ndim;
2220 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2221 bHaveForce = opt->bPullf;
2223 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2224 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2225 Sorry for the laziness, this is a To-do. */
2226 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2229 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2230 "reading \n(option -if) is supported at present, "
2231 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2232 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2233 epullg_names[header->pull_geometry]);
2236 nt = read_xvg(fn, &y, &ny);
2238 /* Check consistency */
2241 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2245 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2246 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2248 /* Since this tool code has not updated yet to allow difference pull
2249 * dimensions per pull coordinate, we can't easily check the exact
2250 * number of expected columns, so we only check what we expect for
2251 * the pull coordinates selected for the WHAM analysis.
2253 printf("Expecting these columns in pull file:\n"
2254 "\t%d reference columns for each individual pull coordinate\n"
2255 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2256 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2258 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2262 if (ny < nColExpect ||
2263 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2265 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2266 "\nMaybe you confused options -ix and -if ?\n",
2267 header->npullcrds, fntpr, ny-1, fn,
2268 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2274 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2284 window->dt = y[0][1]-y[0][0];
2286 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2288 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2291 /* Need to alocate memory and set up structure */
2295 /* Use only groups selected with option -is file */
2296 if (header->npullcrds != groupsel->n)
2298 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2299 header->npullcrds, groupsel->n);
2301 window->nPull = groupsel->nUse;
2305 window->nPull = header->npullcrds;
2308 window->nBin = bins;
2309 snew(window->Histo, window->nPull);
2310 snew(window->z, window->nPull);
2311 snew(window->k, window->nPull);
2312 snew(window->pos, window->nPull);
2313 snew(window->N, window->nPull);
2314 snew(window->Ntot, window->nPull);
2315 snew(window->g, window->nPull);
2316 snew(window->bsWeight, window->nPull);
2317 window->bContrib = 0;
2319 if (opt->bCalcTauInt)
2321 snew(window->ztime, window->nPull);
2325 window->ztime = NULL;
2327 snew(lennow, window->nPull);
2329 for (g = 0; g < window->nPull; ++g)
2332 window->bsWeight[g] = 1.;
2333 snew(window->Histo[g], bins);
2335 window->Ntot[g] = 0;
2337 if (opt->bCalcTauInt)
2339 window->ztime[g] = NULL;
2343 /* Copying umbrella center and force const is more involved since not
2344 all pull groups from header (tpr file) may be used in window variable */
2345 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2347 if (groupsel && (groupsel->bUse[g] == FALSE))
2351 window->k[gUsed] = header->k[g];
2352 window->pos[gUsed] = header->umbInitDist[g];
2357 { /* only determine min and max */
2360 min = max = bins = 0; /* Get rid of warnings */
2364 for (i = 0; i < nt; i++)
2366 /* Do you want that time frame? */
2367 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2369 /* get time step of pdo file and get dstep from opt->dt */
2379 dstep = static_cast<int>(opt->dt/dt+0.5);
2387 window->dt = dt*dstep;
2391 dt_ok = (i%dstep == 0);
2392 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2394 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2395 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2398 /* Note: if groupsel == NULL:
2399 * all groups in pullf/x file are stored in this window, and gUsed == g
2400 * if groupsel != NULL:
2401 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2404 for (g = 0; g < header->npullcrds; ++g)
2406 /* was this group selected for application in WHAM? */
2407 if (groupsel && (groupsel->bUse[g] == FALSE))
2416 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2418 pos = -force/header->k[g] + header->umbInitDist[g];
2422 /* pick the right column from:
2423 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2425 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2429 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2443 if (gUsed >= window->nPull)
2445 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2446 gUsed, window->nPull);
2449 if (opt->bCalcTauInt && !bGetMinMax)
2451 /* save time series for autocorrelation analysis */
2452 ntot = window->Ntot[gUsed];
2453 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2454 if (ntot >= lennow[gUsed])
2456 lennow[gUsed] += blocklen;
2457 srenew(window->ztime[gUsed], lennow[gUsed]);
2459 window->ztime[gUsed][ntot] = pos;
2462 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2467 while ( (ibin += bins) < 0)
2472 else if (ibin >= bins)
2474 while ( (ibin -= bins) >= bins)
2480 if (ibin >= 0 && ibin < bins)
2482 window->Histo[gUsed][ibin] += 1.;
2485 window->Ntot[gUsed]++;
2489 else if (t > opt->tmax)
2493 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2505 for (i = 0; i < ny; i++)
2511 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2512 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2513 t_UmbrellaHeader* header,
2514 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2517 real mintmp, maxtmp;
2519 printf("Reading %d tpr and pullf files\n", nfiles/2);
2521 /* min and max not given? */
2524 printf("Automatic determination of boundaries...\n");
2527 for (i = 0; i < nfiles; i++)
2529 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2531 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2533 read_tpr_header(fnTprs[i], header, opt);
2534 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2536 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2538 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2539 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2540 if (maxtmp > opt->max)
2544 if (mintmp < opt->min)
2549 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2550 if (opt->bBoundsOnly)
2552 printf("Found option -boundsonly, now exiting.\n");
2556 /* store stepsize in profile */
2557 opt->dz = (opt->max-opt->min)/opt->bins;
2559 for (i = 0; i < nfiles; i++)
2561 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2563 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2565 read_tpr_header(fnTprs[i], header, opt);
2566 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2568 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2570 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2571 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2572 if (window[i].Ntot[0] == 0)
2574 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2578 for (i = 0; i < nfiles; i++)
2587 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2589 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2590 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2592 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2594 int nlines, ny, i, ig;
2597 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2598 nlines = read_xvg(fn, &iact, &ny);
2599 if (nlines != nwins)
2601 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2604 for (i = 0; i < nlines; i++)
2606 if (window[i].nPull != ny)
2608 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2609 "number of pull groups is different in different simulations. That is not\n"
2610 "supported yet. Sorry.\n");
2612 for (ig = 0; ig < window[i].nPull; ig++)
2614 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2615 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2617 if (iact[ig][i] <= 0.0)
2619 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2626 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2629 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2630 * that -in case of limited sampling- the ACT may be severely underestimated.
2631 * Note: the g=1+2tau are overwritten.
2632 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2635 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2638 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2640 /* only evaluate within +- 3sigma of the Gausian */
2641 siglim = 3.0*opt->sigSmoothIact;
2642 siglim2 = dsqr(siglim);
2643 /* pre-factor of Gaussian */
2644 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2645 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2647 for (i = 0; i < nwins; i++)
2649 snew(window[i].tausmooth, window[i].nPull);
2650 for (ig = 0; ig < window[i].nPull; ig++)
2654 pos = window[i].pos[ig];
2655 for (j = 0; j < nwins; j++)
2657 for (jg = 0; jg < window[j].nPull; jg++)
2659 dpos2 = dsqr(window[j].pos[jg]-pos);
2660 if (dpos2 < siglim2)
2662 w = gaufact*exp(-dpos2*invtwosig2);
2664 tausm += w*window[j].tau[jg];
2665 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2666 w,dpos2,pos,gaufact,invtwosig2); */
2671 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2673 window[i].tausmooth[ig] = tausm;
2677 window[i].tausmooth[ig] = window[i].tau[ig];
2679 window[i].g[ig] = 1+2*tausm/window[i].dt;
2684 //! Stop integrating autoccorelation function when ACF drops under this value
2685 #define WHAM_AC_ZERO_LIMIT 0.05
2687 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2689 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2690 t_UmbrellaOptions *opt, const char *fn)
2692 int i, ig, ncorr, ntot, j, k, *count, restart;
2693 real *corr, c0, dt, tmp;
2694 real *ztime, av, tausteps;
2695 FILE *fp, *fpcorr = 0;
2699 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2700 "time [ps]", "autocorrelation function", opt->oenv);
2704 for (i = 0; i < nwins; i++)
2706 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2708 ntot = window[i].Ntot[0];
2710 /* using half the maximum time as length of autocorrelation function */
2714 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2715 " points. Provide more pull data!", ntot);
2718 /* snew(corrSq,ncorr); */
2721 snew(window[i].tau, window[i].nPull);
2722 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2728 for (ig = 0; ig < window[i].nPull; ig++)
2730 if (ntot != window[i].Ntot[ig])
2732 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2733 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2735 ztime = window[i].ztime[ig];
2737 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2738 for (j = 0, av = 0; (j < ntot); j++)
2743 for (k = 0; (k < ncorr); k++)
2748 for (j = 0; (j < ntot); j += restart)
2750 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2752 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2754 /* corrSq[k] += tmp*tmp; */
2758 /* divide by nr of frames for each time displacement */
2759 for (k = 0; (k < ncorr); k++)
2761 /* count probably = (ncorr-k+(restart-1))/restart; */
2762 corr[k] = corr[k]/count[k];
2763 /* variance of autocorrelation function */
2764 /* corrSq[k]=corrSq[k]/count[k]; */
2766 /* normalize such that corr[0] == 0 */
2768 for (k = 0; (k < ncorr); k++)
2771 /* corrSq[k]*=c0*c0; */
2774 /* write ACFs in verbose mode */
2777 for (k = 0; (k < ncorr); k++)
2779 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2781 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2784 /* esimate integrated correlation time, fitting is too unstable */
2785 tausteps = 0.5*corr[0];
2786 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2787 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2789 tausteps += corr[j];
2792 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2793 Kumar et al, eq. 28 ff. */
2794 window[i].tau[ig] = tausteps*dt;
2795 window[i].g[ig] = 1+2*tausteps;
2796 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2807 /* plot IACT along reaction coordinate */
2808 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2809 if (output_env_get_print_xvgr_codes(opt->oenv))
2811 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2812 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2813 for (i = 0; i < nwins; i++)
2815 fprintf(fp, "# %3d ", i);
2816 for (ig = 0; ig < window[i].nPull; ig++)
2818 fprintf(fp, " %11g", window[i].tau[ig]);
2823 for (i = 0; i < nwins; i++)
2825 for (ig = 0; ig < window[i].nPull; ig++)
2827 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2830 if (opt->sigSmoothIact > 0.0)
2832 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2833 opt->sigSmoothIact);
2834 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2835 smoothIact(window, nwins, opt);
2836 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2837 if (output_env_get_print_xvgr_codes(opt->oenv))
2839 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2840 fprintf(fp, "@ s1 symbol color 2\n");
2842 for (i = 0; i < nwins; i++)
2844 for (ig = 0; ig < window[i].nPull; ig++)
2846 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2851 printf("Wrote %s\n", fn);
2855 * compute average and sigma of each umbrella histogram
2857 void averageSigma(t_UmbrellaWindow *window, int nwins)
2860 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2862 for (i = 0; i < nwins; i++)
2864 snew(window[i].aver, window[i].nPull);
2865 snew(window[i].sigma, window[i].nPull);
2867 ntot = window[i].Ntot[0];
2868 for (ig = 0; ig < window[i].nPull; ig++)
2870 ztime = window[i].ztime[ig];
2871 for (k = 0, av = 0.; k < ntot; k++)
2876 for (k = 0, sum2 = 0.; k < ntot; k++)
2881 sig = sqrt(sum2/ntot);
2882 window[i].aver[ig] = av;
2884 /* Note: This estimate for sigma is biased from the limited sampling.
2885 Correct sigma by n/(n-1) where n = number of independent
2886 samples. Only possible if IACT is known.
2890 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2891 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2895 window[i].sigma[ig] = sig;
2897 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2904 * Use histograms to compute average force on pull group.
2906 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2908 int i, j, bins = opt->bins, k;
2909 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2912 dz = (max-min)/bins;
2913 ztot = opt->max-min;
2916 /* Compute average displacement from histograms */
2917 for (j = 0; j < nWindows; ++j)
2919 snew(window[j].forceAv, window[j].nPull);
2920 for (k = 0; k < window[j].nPull; ++k)
2924 for (i = 0; i < opt->bins; ++i)
2926 temp = (1.0*i+0.5)*dz+min;
2927 distance = temp - window[j].pos[k];
2929 { /* in cyclic wham: */
2930 if (distance > ztot_half) /* |distance| < ztot_half */
2934 else if (distance < -ztot_half)
2939 w = window[j].Histo[k][i]/window[j].g[k];
2940 displAv += w*distance;
2942 /* Are we near min or max? We are getting wrong forces from the histgrams since
2943 the histograms are zero outside [min,max). Therefore, assume that the position
2944 on the other side of the histomgram center is equally likely. */
2947 posmirrored = window[j].pos[k]-distance;
2948 if (posmirrored >= max || posmirrored < min)
2950 displAv += -w*distance;
2957 /* average force from average displacement */
2958 window[j].forceAv[k] = displAv*window[j].k[k];
2959 /* sigma from average square displacement */
2960 /* window[j].sigma [k] = sqrt(displAv2); */
2961 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2967 * Check if the complete reaction coordinate is covered by the histograms
2969 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2970 t_UmbrellaOptions *opt)
2972 int i, ig, j, bins = opt->bins, bBoundary;
2973 real avcount = 0, z, relcount, *count;
2974 snew(count, opt->bins);
2976 for (j = 0; j < opt->bins; ++j)
2978 for (i = 0; i < nwins; i++)
2980 for (ig = 0; ig < window[i].nPull; ig++)
2982 count[j] += window[i].Histo[ig][j];
2985 avcount += 1.0*count[j];
2988 for (j = 0; j < bins; ++j)
2990 relcount = count[j]/avcount;
2991 z = (j+0.5)*opt->dz+opt->min;
2992 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2993 /* check for bins with no data */
2996 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2997 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2999 /* and check for poor sampling */
3000 else if (relcount < 0.005 && !bBoundary)
3002 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3008 /*! \brief Compute initial potential by integrating the average force
3010 * This speeds up the convergence by roughly a factor of 2
3012 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3014 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3015 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3018 dz = (opt->max-min)/bins;
3020 printf("Getting initial potential by integration.\n");
3022 /* Compute average displacement from histograms */
3023 computeAverageForce(window, nWindows, opt);
3025 /* Get force for each bin from all histograms in this bin, or, alternatively,
3026 if no histograms are inside this bin, from the closest histogram */
3029 for (j = 0; j < opt->bins; ++j)
3031 pos = (1.0*j+0.5)*dz+min;
3035 groupmin = winmin = 0;
3036 for (i = 0; i < nWindows; i++)
3038 for (ig = 0; ig < window[i].nPull; ig++)
3040 hispos = window[i].pos[ig];
3041 dist = fabs(hispos-pos);
3042 /* average force within bin */
3046 fAv += window[i].forceAv[ig];
3048 /* at the same time, remember closest histogram */
3057 /* if no histogram found in this bin, use closest histogram */
3064 fAv = window[winmin].forceAv[groupmin];
3068 for (j = 1; j < opt->bins; ++j)
3070 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3073 /* cyclic wham: linearly correct possible offset */
3076 diff = (pot[bins-1]-pot[0])/(bins-1);
3077 for (j = 1; j < opt->bins; ++j)
3084 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3085 for (j = 0; j < opt->bins; ++j)
3087 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3090 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3093 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3094 energy offsets which are usually determined by wham
3095 First: turn pot into probabilities:
3097 for (j = 0; j < opt->bins; ++j)
3099 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3101 calc_z(pot, window, nWindows, opt, TRUE);
3107 //! Count number of words in an line
3108 static int wordcount(char *ptr)
3113 if (strlen(ptr) == 0)
3117 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3119 for (i = 0; (ptr[i] != '\0'); i++)
3121 is[cur] = isspace(ptr[i]);
3122 if ((i > 0) && (is[cur] && !is[1-cur]))
3131 /*! \brief Read input file for pull group selection (option -is)
3133 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3135 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3138 int i, iline, n, len = STRLEN, temp;
3139 char *ptr = 0, *tmpbuf = 0;
3140 char fmt[1024], fmtign[1024];
3141 int block = 1, sizenow;
3143 fp = gmx_ffopen(opt->fnGroupsel, "r");
3144 opt->groupsel = NULL;
3149 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3154 if (iline >= sizenow)
3157 srenew(opt->groupsel, sizenow);
3159 opt->groupsel[iline].n = n;
3160 opt->groupsel[iline].nUse = 0;
3161 snew(opt->groupsel[iline].bUse, n);
3164 for (i = 0; i < n; i++)
3166 strcpy(fmt, fmtign);
3168 if (sscanf(ptr, fmt, &temp))
3170 opt->groupsel[iline].bUse[i] = (temp > 0);
3171 if (opt->groupsel[iline].bUse[i])
3173 opt->groupsel[iline].nUse++;
3176 strcat(fmtign, "%*s");
3180 opt->nGroupsel = iline;
3181 if (nTpr != opt->nGroupsel)
3183 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3187 printf("\nUse only these pull groups:\n");
3188 for (iline = 0; iline < nTpr; iline++)
3190 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3191 for (i = 0; i < opt->groupsel[iline].n; i++)
3193 if (opt->groupsel[iline].bUse[i])
3206 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3208 //! Number of elements in fnm (used for command line parsing)
3209 #define NFILE asize(fnm)
3211 //! The main g_wham routine
3212 int gmx_wham(int argc, char *argv[])
3214 const char *desc[] = {
3215 "[THISMODULE] is an analysis program that implements the Weighted",
3216 "Histogram Analysis Method (WHAM). It is intended to analyze",
3217 "output files generated by umbrella sampling simulations to ",
3218 "compute a potential of mean force (PMF).[PAR]",
3220 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3221 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3222 "and, if multiple coordinates need to be analyzed, all used the same",
3223 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3225 "At present, three input modes are supported.",
3227 "* With option [TT]-it[tt], the user provides a file which contains the",
3228 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3229 " AND, with option [TT]-ix[tt], a file which contains file names of",
3230 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3231 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3232 " first pullx, etc.",
3233 "* Same as the previous input mode, except that the the user",
3234 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3235 " From the pull force the position in the umbrella potential is",
3236 " computed. This does not work with tabulated umbrella potentials.",
3237 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3238 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3239 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3240 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3241 " must be similar to the following::",
3244 " # Component selection: 0 0 1",
3246 " # Ref. Group 'TestAtom'",
3247 " # Nr. of pull groups 2",
3248 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3249 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3252 " The number of pull groups, umbrella positions, force constants, and names ",
3253 " may (of course) differ. Following the header, a time column and ",
3254 " a data column for each pull group follows (i.e. the displacement",
3255 " with respect to the umbrella center). Up to four pull groups are possible ",
3256 " per [REF].pdo[ref] file at present.",
3258 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3259 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3260 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3261 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3262 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3263 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3264 "used, groupsel.dat looks like this::",
3270 "By default, the output files are",
3272 "* [TT]-o[tt] PMF output file",
3273 "* [TT]-hist[tt] Histograms output file",
3275 "Always check whether the histograms sufficiently overlap.[PAR]",
3276 "The umbrella potential is assumed to be harmonic and the force constants are ",
3277 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3278 "a tabulated potential can be provided with [TT]-tab[tt].",
3283 "* [TT]-bins[tt] Number of bins used in analysis",
3284 "* [TT]-temp[tt] Temperature in the simulations",
3285 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3286 "* [TT]-auto[tt] Automatic determination of boundaries",
3287 "* [TT]-min,-max[tt] Boundaries of the profile",
3289 "The data points that are used to compute the profile",
3290 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3291 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3292 "umbrella window.[PAR]",
3293 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3294 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3295 "With energy output, the energy in the first bin is defined to be zero. ",
3296 "If you want the free energy at a different ",
3297 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3298 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3299 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3300 "[THISMODULE] will make use of the",
3301 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3302 "reaction coordinate will assumed be be neighbors.[PAR]",
3303 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3304 "which may be useful for, e.g. membranes.",
3309 "If available, the number of OpenMP threads used by g_wham is controlled with [TT]-nt[tt].",
3314 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3315 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3316 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3317 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3318 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3319 "Because the IACTs can be severely underestimated in case of limited ",
3320 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3321 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3322 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3323 "integration of the ACFs while the ACFs are larger 0.05.",
3324 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3325 "less robust) method such as fitting to a double exponential, you can ",
3326 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3327 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3328 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3333 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3334 "otherwise the statistical error may be substantially underestimated. ",
3335 "More background and examples for the bootstrap technique can be found in ",
3336 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3337 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3338 "Four bootstrapping methods are supported and ",
3339 "selected with [TT]-bs-method[tt].",
3341 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3342 " data points, and the bootstrap is carried out by assigning random weights to the ",
3343 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3344 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3345 " statistical error is underestimated.",
3346 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3347 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3348 " (allowing duplication, i.e. sampling with replacement).",
3349 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3350 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3351 " divided into blocks and only histograms within each block are mixed. Note that ",
3352 " the histograms within each block must be representative for all possible histograms, ",
3353 " otherwise the statistical error is underestimated.",
3354 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3355 " such that the generated data points are distributed according the given histograms ",
3356 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3357 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3358 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3359 " Note that this method may severely underestimate the error in case of limited sampling, ",
3360 " that is if individual histograms do not represent the complete phase space at ",
3361 " the respective positions.",
3362 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3363 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3364 " and width of the umbrella histograms. That method yields similar error estimates ",
3365 " like method [TT]traj[tt].",
3367 "Bootstrapping output:",
3369 "* [TT]-bsres[tt] Average profile and standard deviations",
3370 "* [TT]-bsprof[tt] All bootstrapping profiles",
3372 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3373 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3377 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3378 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3379 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3380 static t_UmbrellaOptions opt;
3381 static int nthreads = -1;
3385 { "-nt", FALSE, etINT, {&nthreads},
3386 "Number of threads used by g_wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3388 { "-min", FALSE, etREAL, {&opt.min},
3389 "Minimum coordinate in profile"},
3390 { "-max", FALSE, etREAL, {&opt.max},
3391 "Maximum coordinate in profile"},
3392 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3393 "Determine min and max automatically"},
3394 { "-bins", FALSE, etINT, {&opt.bins},
3395 "Number of bins in profile"},
3396 { "-temp", FALSE, etREAL, {&opt.Temperature},
3398 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3400 { "-v", FALSE, etBOOL, {&opt.verbose},
3402 { "-b", FALSE, etREAL, {&opt.tmin},
3403 "First time to analyse (ps)"},
3404 { "-e", FALSE, etREAL, {&opt.tmax},
3405 "Last time to analyse (ps)"},
3406 { "-dt", FALSE, etREAL, {&opt.dt},
3407 "Analyse only every dt ps"},
3408 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3409 "Write histograms and exit"},
3410 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3411 "Determine min and max and exit (with [TT]-auto[tt])"},
3412 { "-log", FALSE, etBOOL, {&opt.bLog},
3413 "Calculate the log of the profile before printing"},
3414 { "-unit", FALSE, etENUM, {en_unit},
3415 "Energy unit in case of log output" },
3416 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3417 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3418 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3419 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3420 { "-sym", FALSE, etBOOL, {&opt.bSym},
3421 "Symmetrize profile around z=0"},
3422 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3423 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3424 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3425 "Calculate integrated autocorrelation times and use in wham"},
3426 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3427 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3428 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3429 "When computing autocorrelation functions, restart computing every .. (ps)"},
3430 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3431 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3432 "during smoothing"},
3433 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3434 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3435 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3436 "Bootstrap method" },
3437 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3438 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3439 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3440 "Seed for bootstrapping. (-1 = use time)"},
3441 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3442 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3443 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3444 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3445 { "-stepout", FALSE, etINT, {&opt.stepchange},
3446 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3447 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3448 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3452 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3453 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3454 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3455 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3456 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3457 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3458 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3459 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3460 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3461 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3462 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3463 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3466 int i, j, l, nfiles, nwins, nfiles2;
3467 t_UmbrellaHeader header;
3468 t_UmbrellaWindow * window = NULL;
3469 double *profile, maxchange = 1e20;
3470 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3471 char **fninTpr, **fninPull, **fninPdo;
3473 FILE *histout, *profout;
3474 char ylabel[256], title[256];
3477 opt.verbose = FALSE;
3478 opt.bHistOnly = FALSE;
3488 /* bootstrapping stuff */
3490 opt.bsMethod = bsMethod_hist;
3491 opt.tauBootStrap = 0.0;
3493 opt.histBootStrapBlockLength = 8;
3494 opt.bs_verbose = FALSE;
3499 opt.Temperature = 298;
3500 opt.Tolerance = 1e-6;
3501 opt.bBoundsOnly = FALSE;
3503 opt.bCalcTauInt = FALSE;
3504 opt.sigSmoothIact = 0.0;
3505 opt.bAllowReduceIact = TRUE;
3506 opt.bInitPotByIntegration = TRUE;
3507 opt.acTrestart = 1.0;
3508 opt.stepchange = 100;
3509 opt.stepUpdateContrib = 100;
3511 if (!parse_common_args(&argc, argv, 0,
3512 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3517 opt.unit = nenum(en_unit);
3518 opt.bsMethod = nenum(en_bsMethod);
3520 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3522 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3523 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3524 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3525 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3526 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3527 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3528 if (opt.bTab && opt.bPullf)
3530 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3531 "Provide pullx.xvg or pdo files!");
3534 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3536 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3538 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3540 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3541 "\n\n Check g_wham -h !");
3544 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3545 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3546 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3547 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3548 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3550 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3551 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3552 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3553 if ( (bMinSet || bMaxSet) && bAutoSet)
3555 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3558 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3560 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3563 if (bMinSet && opt.bAuto)
3565 printf("Note: min and max given, switching off -auto.\n");
3569 if (opt.bTauIntGiven && opt.bCalcTauInt)
3571 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3572 "the autocorrelation times. Not both.");
3575 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3577 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3578 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3580 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3582 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3583 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3586 /* Set # of OpenMP threads */
3589 gmx_omp_set_num_threads(nthreads);
3593 nthreads = gmx_omp_get_max_threads();
3597 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3600 /* Reading gmx4 pull output and tpr files */
3601 if (opt.bTpr || opt.bPullf || opt.bPullx)
3603 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3605 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3606 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3607 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3608 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3609 if (nfiles != nfiles2)
3611 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3612 opt.fnTpr, nfiles2, fnPull);
3615 /* Read file that selects the pull group to be used */
3616 if (opt.fnGroupsel != NULL)
3618 readPullGroupSelection(&opt, fninTpr, nfiles);
3621 window = initUmbrellaWindows(nfiles);
3622 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3625 { /* reading pdo files */
3626 if (opt.fnGroupsel != NULL)
3628 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3629 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3631 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3632 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3633 window = initUmbrellaWindows(nfiles);
3634 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3638 /* enforce equal weight for all histograms? */
3641 enforceEqualWeights(window, nwins);
3644 /* write histograms */
3645 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3646 xlabel, "count", opt.oenv);
3647 for (l = 0; l < opt.bins; ++l)
3649 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3650 for (i = 0; i < nwins; ++i)
3652 for (j = 0; j < window[i].nPull; ++j)
3654 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3657 fprintf(histout, "\n");
3660 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3663 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3667 /* Using tabulated umbrella potential */
3670 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3673 /* Integrated autocorrelation times provided ? */
3674 if (opt.bTauIntGiven)
3676 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3679 /* Compute integrated autocorrelation times */
3680 if (opt.bCalcTauInt)
3682 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3685 /* calc average and sigma for each histogram
3686 (maybe required for bootstrapping. If not, this is fast anyhow) */
3687 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3689 averageSigma(window, nwins);
3692 /* Get initial potential by simple integration */
3693 if (opt.bInitPotByIntegration)
3695 guessPotByIntegration(window, nwins, &opt);
3698 /* Check if complete reaction coordinate is covered */
3699 checkReactionCoordinateCovered(window, nwins, &opt);
3701 /* Calculate profile */
3702 snew(profile, opt.bins);
3710 if ( (i%opt.stepUpdateContrib) == 0)
3712 setup_acc_wham(profile, window, nwins, &opt);
3714 if (maxchange < opt.Tolerance)
3717 /* if (opt.verbose) */
3718 printf("Switched to exact iteration in iteration %d\n", i);
3720 calc_profile(profile, window, nwins, &opt, bExact);
3721 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3723 printf("\t%4d) Maximum change %e\n", i, maxchange);
3727 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3728 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3730 /* calc error from Kumar's formula */
3731 /* Unclear how the error propagates along reaction coordinate, therefore
3733 /* calc_error_kumar(profile,window, nwins,&opt); */
3735 /* Write profile in energy units? */
3738 prof_normalization_and_unit(profile, &opt);
3739 strcpy(ylabel, en_unit_label[opt.unit]);
3740 strcpy(title, "Umbrella potential");
3744 strcpy(ylabel, "Density of states");
3745 strcpy(title, "Density of states");
3748 /* symmetrize profile around z=0? */
3751 symmetrizeProfile(profile, &opt);
3754 /* write profile or density of states */
3755 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3756 for (i = 0; i < opt.bins; ++i)
3758 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3761 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3763 /* Bootstrap Method */
3766 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3767 opt2fn("-hist", NFILE, fnm),
3768 ylabel, profile, window, nwins, &opt);
3772 freeUmbrellaWindows(window, nfiles);
3774 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3775 please_cite(stdout, "Hub2010");