2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/copyrite.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/random/random.h"
65 #include "gromacs/utility/arraysize.h"
66 #include "gromacs/utility/cstringutil.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/futil.h"
70 #include "gromacs/utility/gmxomp.h"
71 #include "gromacs/utility/smalloc.h"
73 //! longest file names allowed in input files
74 #define WHAM_MAXFILELEN 2048
77 * x-axis legend for output files
79 static const char *xlabel = "\\xx\\f{} (nm)";
82 * enum for energy units
85 enSel, en_kJ, en_kCal, en_kT, enNr
88 * enum for type of input files (pdos, tpr, or pullf)
91 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
94 /*! \brief enum for bootstrapping method
96 * These bootstrap methods are supported:
97 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
98 * (bsMethod_BayesianHist)
99 * - bootstrap complete histograms (bsMethod_hist)
100 * - bootstrap trajectories from given umbrella histograms. This generates new
101 * "synthetic" histograms (bsMethod_traj)
102 * - bootstrap trajectories from Gaussian with mu/sigma computed from
103 * the respective histogram (bsMethod_trajGauss). This gives very similar
104 * results compared to bsMethod_traj.
106 * ********************************************************************
107 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
108 * JS Hub, BL de Groot, D van der Spoel
109 * g_wham - A free weighted histogram analysis implementation including
110 * robust error and autocorrelation estimates,
111 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
112 * ********************************************************************
115 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
116 bsMethod_traj, bsMethod_trajGauss
120 //! Parameters of the umbrella potentials
124 * \name Using umbrella pull code since gromacs 4.x
127 int npullcrds_tot; //!< nr of pull coordinates in tpr file
128 int npullcrds; //!< nr of umbrella pull coordinates for reading
129 int pull_geometry; //!< such as distance, direction
130 ivec pull_dim; //!< pull dimension with geometry distance
131 int pull_ndim; //!< nr of pull_dim != 0
132 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
133 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
134 real *k; //!< force constants in tpr file
135 real *init_dist; //!< reference displacements
136 real *umbInitDist; //!< reference displacement in umbrella direction
139 * \name Using PDO files common until gromacs 3.x
147 char PullName[4][256];
149 double UmbCons[4][3];
153 //! Data in the umbrella histograms
156 int nPull; //!< nr of pull groups in this pdo or pullf/x file
157 double **Histo; //!< nPull histograms
158 double **cum; //!< nPull cumulative distribution functions
159 int nBin; //!< nr of bins. identical to opt->bins
160 double *k; //!< force constants for the nPull groups
161 double *pos; //!< umbrella positions for the nPull groups
162 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
163 int *N; //!< nr of data points in nPull histograms.
164 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
166 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
168 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
169 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
172 double *tau; //!< intetrated autocorrelation time (IACT)
173 double *tausmooth; //!< smoothed IACT
175 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
177 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
179 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
181 /*! \brief average force estimated from average displacement, fAv=dzAv*k
183 * Used for integration to guess the potential.
186 real *aver; //!< average of histograms
187 real *sigma; //!< stddev of histograms
188 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
191 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
194 int n; //!< total nr of pull groups in this tpr file
195 int nUse; //!< nr of pull groups used
196 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
199 //! Parameters of WHAM
206 const char *fnTpr, *fnPullf, *fnGroupsel;
207 const char *fnPdo, *fnPullx; //!< file names of input
208 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
209 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
211 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
212 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
213 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
214 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
217 * \name Basic WHAM options
220 int bins; //!< nr of bins, min, max, and dz of profile
222 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
223 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
226 * \name Output control
229 gmx_bool bLog; //!< energy output (instead of probability) for profile
230 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
231 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
232 /*! \brief after wham, set prof to zero at this z-position.
233 * When bootstrapping, set zProf0 to a "stable" reference position.
236 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
238 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
239 gmx_bool bAuto; //!< determine min and max automatically but do not exit
241 gmx_bool verbose; //!< more noisy wham mode
242 int stepchange; //!< print maximum change in prof after how many interations
243 gmx_output_env_t *oenv; //!< xvgr options
246 * \name Autocorrelation stuff
249 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
250 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
251 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
252 real acTrestart; //!< when computing ACT, time between restarting points
254 /* \brief Enforce the same weight for each umbella window, that is
255 * calculate with the same number of data points for
256 * each window. That can be reasonable, if the histograms
257 * have different length, but due to autocorrelation,
258 * a longer simulation should not have larger weightin wham.
264 * \name Bootstrapping stuff
267 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
269 /* \brief bootstrap method
271 * if == bsMethod_hist, consider complete histograms as independent
272 * data points and, hence, only mix complete histograms.
273 * if == bsMethod_BayesianHist, consider complete histograms
274 * as independent data points, but assign random weights
275 * to the histograms during the bootstrapping ("Bayesian bootstrap")
276 * In case of long correlations (e.g., inside a channel), these
277 * will yield a more realistic error.
278 * if == bsMethod_traj(Gauss), generate synthetic histograms
280 * histogram by generating an autocorrelated random sequence
281 * that is distributed according to the respective given
282 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
283 * (instead of from the umbrella histogram) to generate a new
288 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
291 /* \brief when mixing histograms, mix only histograms withing blocks
292 long the reaction coordinate xi. Avoids gaps along xi. */
293 int histBootStrapBlockLength;
295 int bsSeed; //!< random seed for bootstrapping
297 /* \brief Write cumulative distribution functions (CDFs) of histograms
298 and write the generated histograms for each bootstrap */
302 * \name tabulated umbrella potential stuff
306 double *tabX, *tabY, tabMin, tabMax, tabDz;
309 gmx_rng_t rng; //!< gromacs random number generator
312 //! Make an umbrella window (may contain several histograms)
313 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
315 t_UmbrellaWindow *win;
318 for (i = 0; i < nwin; i++)
320 win[i].Histo = win[i].cum = 0;
321 win[i].k = win[i].pos = win[i].z = 0;
322 win[i].N = win[i].Ntot = 0;
323 win[i].g = win[i].tau = win[i].tausmooth = 0;
327 win[i].aver = win[i].sigma = 0;
333 //! Delete an umbrella window (may contain several histograms)
334 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
337 for (i = 0; i < nwin; i++)
341 for (j = 0; j < win[i].nPull; j++)
343 sfree(win[i].Histo[j]);
348 for (j = 0; j < win[i].nPull; j++)
350 sfree(win[i].cum[j]);
355 for (j = 0; j < win[i].nPull; j++)
357 sfree(win[i].bContrib[j]);
369 sfree(win[i].tausmooth);
370 sfree(win[i].bContrib);
372 sfree(win[i].forceAv);
375 sfree(win[i].bsWeight);
381 * Read and setup tabulated umbrella potential
383 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
388 printf("Setting up tabulated potential from file %s\n", fn);
389 nl = read_xvg(fn, &y, &ny);
393 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
395 opt->tabMin = y[0][0];
396 opt->tabMax = y[0][nl-1];
397 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
400 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
401 "ascending z-direction", fn);
403 for (i = 0; i < nl-1; i++)
405 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
407 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
412 for (i = 0; i < nl; i++)
414 opt->tabX[i] = y[0][i];
415 opt->tabY[i] = y[1][i];
417 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
418 opt->tabMin, opt->tabMax, opt->tabDz);
421 //! Read the header of an PDO file (position, force const, nr of groups)
422 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
425 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
427 std::istringstream ist;
430 if (fgets(line, 2048, file) == NULL)
432 gmx_fatal(FARGS, "Error reading header from pdo file\n");
435 ist >> Buffer0 >> Buffer1 >> Buffer2;
436 if (std::strcmp(Buffer1, "UMBRELLA"))
438 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
439 "(Found in first line: `%s')\n",
440 Buffer1, "UMBRELLA", line);
442 if (std::strcmp(Buffer2, "3.0"))
444 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
448 if (fgets(line, 2048, file) == NULL)
450 gmx_fatal(FARGS, "Error reading header from pdo file\n");
453 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
454 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
456 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
457 if (header->nDim != 1)
459 gmx_fatal(FARGS, "Currently only supports one dimension");
463 if (fgets(line, 2048, file) == NULL)
465 gmx_fatal(FARGS, "Error reading header from pdo file\n");
468 ist >> Buffer0 >> Buffer1 >> header->nSkip;
471 if (fgets(line, 2048, file) == NULL)
473 gmx_fatal(FARGS, "Error reading header from pdo file\n");
476 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
479 if (fgets(line, 2048, file) == NULL)
481 gmx_fatal(FARGS, "Error reading header from pdo file\n");
484 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
488 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
492 for (i = 0; i < header->nPull; ++i)
494 if (fgets(line, 2048, file) == NULL)
496 gmx_fatal(FARGS, "Error reading header from pdo file\n");
499 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
500 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
501 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
505 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
506 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
510 if (fgets(line, 2048, file) == NULL)
512 gmx_fatal(FARGS, "Cannot read from file\n");
516 if (std::strcmp(Buffer3, "#####") != 0)
518 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
523 static char *fgets3(FILE *fp, char ptr[], int *len)
528 if (fgets(ptr, *len-1, fp) == NULL)
533 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
535 /* This line is longer than len characters, let's increase len! */
539 if (fgets(p-1, STRLEN, fp) == NULL)
544 slen = std::strlen(ptr);
545 if (ptr[slen-1] == '\n')
553 /*! \brief Read the data columns of and PDO file.
555 * TO DO: Get rid of the scanf function to avoid the clang warning.
556 * At the moment, this warning is avoided by hiding the format string
557 * the variable fmtlf.
559 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
560 int fileno, t_UmbrellaWindow * win,
561 t_UmbrellaOptions *opt,
562 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
564 int i, inttemp, bins, count, ntot;
565 real minval, maxval, minfound = 1e20, maxfound = -1e20;
566 double temp, time, time0 = 0, dt;
568 t_UmbrellaWindow * window = 0;
569 gmx_bool timeok, dt_ok = 1;
570 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
571 int len = STRLEN, dstep = 1;
572 const int blocklen = 4096;
582 /* Need to alocate memory and set up structure */
583 window->nPull = header->nPull;
586 snew(window->Histo, window->nPull);
587 snew(window->z, window->nPull);
588 snew(window->k, window->nPull);
589 snew(window->pos, window->nPull);
590 snew(window->N, window->nPull);
591 snew(window->Ntot, window->nPull);
592 snew(window->g, window->nPull);
593 snew(window->bsWeight, window->nPull);
595 window->bContrib = 0;
597 if (opt->bCalcTauInt)
599 snew(window->ztime, window->nPull);
605 snew(lennow, window->nPull);
607 for (i = 0; i < window->nPull; ++i)
610 window->bsWeight[i] = 1.;
611 snew(window->Histo[i], bins);
612 window->k[i] = header->UmbCons[i][0];
613 window->pos[i] = header->UmbPos[i][0];
617 if (opt->bCalcTauInt)
619 window->ztime[i] = 0;
623 /* Done with setup */
629 minval = maxval = bins = 0; /* Get rid of warnings */
634 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
638 if (ptr[0] == '#' || std::strlen(ptr) < 2)
643 /* Initiate format string */
645 std::strcat(fmtign, "%*s");
647 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
648 /* Round time to fs */
649 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
651 /* get time step of pdo file */
661 dstep = static_cast<int>(opt->dt/dt+0.5);
669 window->dt = dt*dstep;
674 dt_ok = ((count-1)%dstep == 0);
675 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
677 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
678 time,opt->tmin, opt->tmax, dt_ok,timeok); */
682 for (i = 0; i < header->nPull; ++i)
684 std::strcpy(fmt, fmtign);
685 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
686 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
687 if (sscanf(ptr, fmt, &temp))
689 temp += header->UmbPos[i][0];
703 if (opt->bCalcTauInt)
705 /* save time series for autocorrelation analysis */
706 ntot = window->Ntot[i];
707 if (ntot >= lennow[i])
709 lennow[i] += blocklen;
710 srenew(window->ztime[i], lennow[i]);
712 window->ztime[i][ntot] = temp;
716 temp /= (maxval-minval);
718 temp = std::floor(temp);
720 inttemp = static_cast<int> (temp);
727 else if (inttemp >= bins)
733 if (inttemp >= 0 && inttemp < bins)
735 window->Histo[i][inttemp] += 1.;
743 if (time > opt->tmax)
747 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
763 /*! \brief Set identical weights for all histograms
765 * Normally, the weight is given by the number data points in each
766 * histogram, together with the autocorrelation time. This can be overwritten
767 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
768 * an appropriate autocorrelation times instead of using this function.
770 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
772 int i, k, j, NEnforced;
775 NEnforced = window[0].Ntot[0];
776 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
777 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
778 /* enforce all histograms to have the same weight as the very first histogram */
780 for (j = 0; j < nWindows; ++j)
782 for (k = 0; k < window[j].nPull; ++k)
784 ratio = 1.0*NEnforced/window[j].Ntot[k];
785 for (i = 0; i < window[0].nBin; ++i)
787 window[j].Histo[k][i] *= ratio;
789 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
794 /*! \brief Simple linear interpolation between two given tabulated points
796 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
799 double pl, pu, dz, dp;
801 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
803 if (jl < 0 || ju >= opt->tabNbins)
805 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
806 "Provide an extended table.", dist, jl, ju);
810 dz = dist-opt->tabX[jl];
811 dp = (pu-pl)*dz/opt->tabDz;
817 * Check which bins substiantially contribute (accelerates WHAM)
819 * Don't worry, that routine does not mean we compute the PMF in limited precision.
820 * After rapid convergence (using only substiantal contributions), we always switch to
823 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
824 t_UmbrellaOptions *opt)
826 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
827 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
828 gmx_bool bAnyContrib;
829 static int bFirst = 1;
830 static double wham_contrib_lim;
834 for (i = 0; i < nWindows; ++i)
836 nGrptot += window[i].nPull;
838 wham_contrib_lim = opt->Tolerance/nGrptot;
841 ztot = opt->max-opt->min;
844 for (i = 0; i < nWindows; ++i)
846 if (!window[i].bContrib)
848 snew(window[i].bContrib, window[i].nPull);
850 for (j = 0; j < window[i].nPull; ++j)
852 if (!window[i].bContrib[j])
854 snew(window[i].bContrib[j], opt->bins);
857 for (k = 0; k < opt->bins; ++k)
859 temp = (1.0*k+0.5)*dz+min;
860 distance = temp - window[i].pos[j]; /* distance to umbrella center */
862 { /* in cyclic wham: */
863 if (distance > ztot_half) /* |distance| < ztot_half */
867 else if (distance < -ztot_half)
872 /* Note: there are two contributions to bin k in the wham equations:
873 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
874 ii) exp(- U/(BOLTZ*opt->Temperature))
875 where U is the umbrella potential
876 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
881 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
885 U = tabulated_pot(distance, opt); /* Use tabulated potential */
887 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
888 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
889 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
890 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
891 if (window[i].bContrib[j][k])
897 /* If this histo is far outside min and max all bContrib may be FALSE,
898 causing a floating point exception later on. To avoid that, switch
902 for (k = 0; k < opt->bins; ++k)
904 window[i].bContrib[j][k] = TRUE;
911 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
912 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
917 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
923 //! Compute the PMF (one of the two main WHAM routines)
924 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
925 t_UmbrellaOptions *opt, gmx_bool bExact)
927 double ztot_half, ztot, min = opt->min, dz = opt->dz;
929 ztot = opt->max-opt->min;
936 int nthreads = gmx_omp_get_max_threads();
937 int thread_id = gmx_omp_get_thread_num();
939 int i0 = thread_id*opt->bins/nthreads;
940 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
942 for (i = i0; i < i1; ++i)
945 double num, denom, invg, temp = 0, distance, U = 0;
947 for (j = 0; j < nWindows; ++j)
949 for (k = 0; k < window[j].nPull; ++k)
951 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
952 temp = (1.0*i+0.5)*dz+min;
953 num += invg*window[j].Histo[k][i];
955 if (!(bExact || window[j].bContrib[k][i]))
959 distance = temp - window[j].pos[k]; /* distance to umbrella center */
961 { /* in cyclic wham: */
962 if (distance > ztot_half) /* |distance| < ztot_half */
966 else if (distance < -ztot_half)
974 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
978 U = tabulated_pot(distance, opt); /* Use tabulated potential */
980 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
983 profile[i] = num/denom;
986 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
990 //! Compute the free energy offsets z (one of the two main WHAM routines)
991 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
992 t_UmbrellaOptions *opt, gmx_bool bExact)
994 double min = opt->min, dz = opt->dz, ztot_half, ztot;
995 double maxglob = -1e20;
997 ztot = opt->max-opt->min;
1000 #pragma omp parallel
1004 int nthreads = gmx_omp_get_max_threads();
1005 int thread_id = gmx_omp_get_thread_num();
1007 int i0 = thread_id*nWindows/nthreads;
1008 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1009 double maxloc = -1e20;
1011 for (i = i0; i < i1; ++i)
1013 double total = 0, temp, distance, U = 0;
1016 for (j = 0; j < window[i].nPull; ++j)
1019 for (k = 0; k < window[i].nBin; ++k)
1021 if (!(bExact || window[i].bContrib[j][k]))
1025 temp = (1.0*k+0.5)*dz+min;
1026 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1028 { /* in cyclic wham: */
1029 if (distance > ztot_half) /* |distance| < ztot_half */
1033 else if (distance < -ztot_half)
1041 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1045 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1047 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1049 /* Avoid floating point exception if window is far outside min and max */
1052 total = -std::log(total);
1058 temp = std::abs(total - window[i].z[j]);
1063 window[i].z[j] = total;
1066 /* Now get maximum maxloc from the threads and put in maxglob */
1067 if (maxloc > maxglob)
1069 #pragma omp critical
1071 if (maxloc > maxglob)
1078 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1084 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1085 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1087 int i, j, bins = opt->bins;
1088 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1091 if (min > 0. || max < 0.)
1093 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1094 opt->min, opt->max);
1099 for (i = 0; i < bins; i++)
1103 /* bin left of zsym */
1104 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1105 if (j >= 0 && (j+1) < bins)
1107 /* interpolate profile linearly between bins j and j+1 */
1108 z1 = min+(j+0.5)*dz;
1110 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1111 /* average between left and right */
1112 prof2[i] = 0.5*(profsym+profile[i]);
1116 prof2[i] = profile[i];
1120 std::memcpy(profile, prof2, bins*sizeof(double));
1124 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1125 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1128 double unit_factor = 1., diff;
1132 /* Not log? Nothing to do! */
1138 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1139 if (opt->unit == en_kT)
1143 else if (opt->unit == en_kJ)
1145 unit_factor = BOLTZ*opt->Temperature;
1147 else if (opt->unit == en_kCal)
1149 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1153 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1156 for (i = 0; i < bins; i++)
1158 if (profile[i] > 0.0)
1160 profile[i] = -std::log(profile[i])*unit_factor;
1164 /* shift to zero at z=opt->zProf0 */
1165 if (!opt->bProf0Set)
1171 /* Get bin with shortest distance to opt->zProf0
1172 (-0.5 from bin position and +0.5 from rounding cancel) */
1173 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1178 else if (imin >= bins)
1182 diff = profile[imin];
1186 for (i = 0; i < bins; i++)
1192 //! Make an array of random integers (used for bootstrapping)
1193 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1195 int ipull, blockBase, nr, ipullRandom;
1197 if (blockLength == 0)
1199 blockLength = nPull;
1202 for (ipull = 0; ipull < nPull; ipull++)
1204 blockBase = (ipull/blockLength)*blockLength;
1206 { /* make sure nothing bad happens in the last block */
1207 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1208 ipullRandom = blockBase + nr;
1210 while (ipullRandom >= nPull);
1211 if (ipullRandom < 0 || ipullRandom >= nPull)
1213 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1214 "blockLength = %d, blockBase = %d\n",
1215 ipullRandom, nPull, nr, blockLength, blockBase);
1217 randomArray[ipull] = ipullRandom;
1219 /*for (ipull=0; ipull<nPull; ipull++)
1220 printf("%d ",randomArray[ipull]); printf("\n"); */
1223 /*! \brief Set pull group information of a synthetic histogram
1225 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1226 * but it is not required if we bootstrap complete histograms.
1228 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1229 t_UmbrellaWindow *thisWindow, int pullid)
1231 synthWindow->N [0] = thisWindow->N [pullid];
1232 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1233 synthWindow->pos [0] = thisWindow->pos [pullid];
1234 synthWindow->z [0] = thisWindow->z [pullid];
1235 synthWindow->k [0] = thisWindow->k [pullid];
1236 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1237 synthWindow->g [0] = thisWindow->g [pullid];
1238 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1241 /*! \brief Calculate cumulative distribution function of of all histograms.
1243 * This allow to create random number sequences
1244 * which are distributed according to the histograms. Required to generate
1245 * the "synthetic" histograms for the Bootstrap method
1247 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1248 t_UmbrellaOptions *opt, const char *fnhist)
1252 char *fn = 0, *buf = 0;
1255 if (opt->bs_verbose)
1257 snew(fn, std::strlen(fnhist)+10);
1258 snew(buf, std::strlen(fnhist)+10);
1259 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1260 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1264 for (i = 0; i < nWindows; i++)
1266 snew(window[i].cum, window[i].nPull);
1267 for (j = 0; j < window[i].nPull; j++)
1269 snew(window[i].cum[j], nbin+1);
1270 window[i].cum[j][0] = 0.;
1271 for (k = 1; k <= nbin; k++)
1273 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1276 /* normalize CDFs. Ensure cum[nbin]==1 */
1277 last = window[i].cum[j][nbin];
1278 for (k = 0; k <= nbin; k++)
1280 window[i].cum[j][k] /= last;
1285 printf("Cumulative distriubtion functions of all histograms created.\n");
1286 if (opt->bs_verbose)
1288 for (k = 0; k <= nbin; k++)
1290 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1291 for (i = 0; i < nWindows; i++)
1293 for (j = 0; j < window[i].nPull; j++)
1295 fprintf(fp, "%g\t", window[i].cum[j][k]);
1300 printf("Wrote cumulative distribution functions to %s\n", fn);
1308 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1310 * This is used to generate a random sequence distributed according to a histogram
1312 void searchCumulative(double xx[], int n, double x, int *j)
1334 else if (x == xx[n-1])
1344 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1345 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1346 int pullid, t_UmbrellaOptions *opt)
1348 int N, i, nbins, r_index, ibin;
1349 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1352 N = thisWindow->N[pullid];
1353 dt = thisWindow->dt;
1354 nbins = thisWindow->nBin;
1356 /* tau = autocorrelation time */
1357 if (opt->tauBootStrap > 0.0)
1359 tausteps = opt->tauBootStrap/dt;
1361 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1363 /* calc tausteps from g=1+2tausteps */
1364 g = thisWindow->g[pullid];
1370 "When generating hypothetical trajctories from given umbrella histograms,\n"
1371 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1372 "cannot be predicted. You have 3 options:\n"
1373 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1374 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1376 " with option -iiact for all umbrella windows.\n"
1377 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1378 " Use option (3) only if you are sure what you're doing, you may severely\n"
1379 " underestimate the error if a too small ACT is given.\n");
1380 gmx_fatal(FARGS, errstr);
1383 synthWindow->N [0] = N;
1384 synthWindow->pos [0] = thisWindow->pos[pullid];
1385 synthWindow->z [0] = thisWindow->z[pullid];
1386 synthWindow->k [0] = thisWindow->k[pullid];
1387 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1388 synthWindow->g [0] = thisWindow->g [pullid];
1389 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1391 for (i = 0; i < nbins; i++)
1393 synthWindow->Histo[0][i] = 0.;
1396 if (opt->bsMethod == bsMethod_trajGauss)
1398 sig = thisWindow->sigma [pullid];
1399 mu = thisWindow->aver [pullid];
1402 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1404 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1406 z = a*x + sqrt(1-a^2)*y
1407 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1408 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1410 C(t) = exp(-t/tau) with tau=-1/ln(a)
1412 Then, use error function to turn the Gaussian random variable into a uniformly
1413 distributed one in [0,1]. Eventually, use cumulative distribution function of
1414 histogram to get random variables distributed according to histogram.
1415 Note: The ACT of the flat distribution and of the generated histogram is not
1416 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1418 a = std::exp(-1.0/tausteps);
1419 ap = std::sqrt(1.0-a*a);
1420 invsqrt2 = 1.0/std::sqrt(2.0);
1422 /* init random sequence */
1423 x = gmx_rng_gaussian_table(opt->rng);
1425 if (opt->bsMethod == bsMethod_traj)
1427 /* bootstrap points from the umbrella histograms */
1428 for (i = 0; i < N; i++)
1430 y = gmx_rng_gaussian_table(opt->rng);
1432 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1433 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1435 r = 0.5*(1+std::erf(x*invsqrt2));
1436 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1437 synthWindow->Histo[0][r_index] += 1.;
1440 else if (opt->bsMethod == bsMethod_trajGauss)
1442 /* bootstrap points from a Gaussian with the same average and sigma
1443 as the respective umbrella histogram. The idea was, that -given
1444 limited sampling- the bootstrapped histograms are otherwise biased
1445 from the limited sampling of the US histos. However, bootstrapping from
1446 the Gaussian seems to yield a similar estimate. */
1450 y = gmx_rng_gaussian_table(opt->rng);
1453 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1458 while ( (ibin += nbins) < 0)
1463 else if (ibin >= nbins)
1465 while ( (ibin -= nbins) >= nbins)
1472 if (ibin >= 0 && ibin < nbins)
1474 synthWindow->Histo[0][ibin] += 1.;
1481 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1485 /*! \brief Write all histograms to a file
1487 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1488 * sets of bootstrapped histograms.
1490 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1491 int bs_index, t_UmbrellaOptions *opt)
1493 char *fn = 0, *buf = 0, title[256];
1499 snew(fn, std::strlen(fnhist)+10);
1500 snew(buf, std::strlen(fnhist)+1);
1501 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1502 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1506 fn = gmx_strdup(fnhist);
1507 std::strcpy(title, "Umbrella histograms");
1510 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1513 /* Write histograms */
1514 for (l = 0; l < bins; ++l)
1516 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1517 for (i = 0; i < nWindows; ++i)
1519 for (j = 0; j < window[i].nPull; ++j)
1521 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1528 printf("Wrote %s\n", fn);
1536 //! Used for qsort to sort random numbers
1537 int func_wham_is_larger(const void *a, const void *b)
1556 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1557 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1564 /* generate ordered random numbers between 0 and nAllPull */
1565 for (i = 0; i < nAllPull-1; i++)
1567 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1569 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1570 r[nAllPull-1] = 1.0*nAllPull;
1572 synthwin[0].bsWeight[0] = r[0];
1573 for (i = 1; i < nAllPull; i++)
1575 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1578 /* avoid to have zero weight by adding a tiny value */
1579 for (i = 0; i < nAllPull; i++)
1581 if (synthwin[i].bsWeight[0] < 1e-5)
1583 synthwin[i].bsWeight[0] = 1e-5;
1590 //! The main bootstrapping routine
1591 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1592 char* ylabel, double *profile,
1593 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1595 t_UmbrellaWindow * synthWindow;
1596 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1597 int i, j, *randomArray = 0, winid, pullid, ib;
1598 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1600 gmx_bool bExact = FALSE;
1602 /* init random generator */
1603 if (opt->bsSeed == -1)
1605 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1609 opt->rng = gmx_rng_init(opt->bsSeed);
1612 snew(bsProfile, opt->bins);
1613 snew(bsProfiles_av, opt->bins);
1614 snew(bsProfiles_av2, opt->bins);
1616 /* Create array of all pull groups. Note that different windows
1617 may have different nr of pull groups
1618 First: Get total nr of pull groups */
1620 for (i = 0; i < nWindows; i++)
1622 nAllPull += window[i].nPull;
1624 snew(allPull_winId, nAllPull);
1625 snew(allPull_pullId, nAllPull);
1627 /* Setup one array of all pull groups */
1628 for (i = 0; i < nWindows; i++)
1630 for (j = 0; j < window[i].nPull; j++)
1632 allPull_winId[iAllPull] = i;
1633 allPull_pullId[iAllPull] = j;
1638 /* setup stuff for synthetic windows */
1639 snew(synthWindow, nAllPull);
1640 for (i = 0; i < nAllPull; i++)
1642 synthWindow[i].nPull = 1;
1643 synthWindow[i].nBin = opt->bins;
1644 snew(synthWindow[i].Histo, 1);
1645 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1647 snew(synthWindow[i].Histo[0], opt->bins);
1649 snew(synthWindow[i].N, 1);
1650 snew(synthWindow[i].pos, 1);
1651 snew(synthWindow[i].z, 1);
1652 snew(synthWindow[i].k, 1);
1653 snew(synthWindow[i].bContrib, 1);
1654 snew(synthWindow[i].g, 1);
1655 snew(synthWindow[i].bsWeight, 1);
1658 switch (opt->bsMethod)
1661 snew(randomArray, nAllPull);
1662 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1663 please_cite(stdout, "Hub2006");
1665 case bsMethod_BayesianHist:
1666 /* just copy all histogams into synthWindow array */
1667 for (i = 0; i < nAllPull; i++)
1669 winid = allPull_winId [i];
1670 pullid = allPull_pullId[i];
1671 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1675 case bsMethod_trajGauss:
1676 calc_cumulatives(window, nWindows, opt, fnhist);
1679 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1682 /* do bootstrapping */
1683 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1684 for (ib = 0; ib < opt->nBootStrap; ib++)
1686 printf(" *******************************************\n"
1687 " ******** Start bootstrap nr %d ************\n"
1688 " *******************************************\n", ib+1);
1690 switch (opt->bsMethod)
1693 /* bootstrap complete histograms from given histograms */
1694 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1695 for (i = 0; i < nAllPull; i++)
1697 winid = allPull_winId [randomArray[i]];
1698 pullid = allPull_pullId[randomArray[i]];
1699 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1702 case bsMethod_BayesianHist:
1703 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1704 setRandomBsWeights(synthWindow, nAllPull, opt);
1707 case bsMethod_trajGauss:
1708 /* create new histos from given histos, that is generate new hypothetical
1710 for (i = 0; i < nAllPull; i++)
1712 winid = allPull_winId[i];
1713 pullid = allPull_pullId[i];
1714 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1719 /* write histos in case of verbose output */
1720 if (opt->bs_verbose)
1722 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1729 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1732 if ( (i%opt->stepUpdateContrib) == 0)
1734 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1736 if (maxchange < opt->Tolerance)
1740 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1742 printf("\t%4d) Maximum change %e\n", i, maxchange);
1744 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1747 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1748 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1752 prof_normalization_and_unit(bsProfile, opt);
1755 /* symmetrize profile around z=0 */
1758 symmetrizeProfile(bsProfile, opt);
1761 /* save stuff to get average and stddev */
1762 for (i = 0; i < opt->bins; i++)
1765 bsProfiles_av[i] += tmp;
1766 bsProfiles_av2[i] += tmp*tmp;
1767 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1769 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1773 /* write average and stddev */
1774 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1775 if (output_env_get_print_xvgr_codes(opt->oenv))
1777 fprintf(fp, "@TYPE xydy\n");
1779 for (i = 0; i < opt->bins; i++)
1781 bsProfiles_av [i] /= opt->nBootStrap;
1782 bsProfiles_av2[i] /= opt->nBootStrap;
1783 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1784 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1785 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1788 printf("Wrote boot strap result to %s\n", fnres);
1791 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1792 int whaminFileType(char *fn)
1795 len = std::strlen(fn);
1796 if (std::strcmp(fn+len-3, "tpr") == 0)
1800 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1802 return whamin_pullxf;
1804 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1810 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1812 return whamin_unknown;
1815 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1816 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1817 t_UmbrellaOptions *opt)
1819 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1820 int nread, sizenow, i, block = 1;
1823 fp = gmx_ffopen(fn, "r");
1826 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1828 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1830 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1832 if (nread >= sizenow)
1835 srenew(filename, sizenow);
1836 for (i = sizenow-block; i < sizenow; i++)
1838 snew(filename[i], WHAM_MAXFILELEN);
1841 /* remove newline if there is one */
1842 if (tmp[std::strlen(tmp)-1] == '\n')
1844 tmp[std::strlen(tmp)-1] = '\0';
1846 std::strcpy(filename[nread], tmp);
1849 printf("Found file %s in %s\n", filename[nread], fn);
1853 *filenamesRet = filename;
1857 //! Open a file or a pipe to a gzipped file
1858 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1860 char Buffer[1024], gunzip[1024], *Path = 0;
1862 static gmx_bool bFirst = 1;
1864 /* gzipped pdo file? */
1865 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1867 /* search gunzip executable */
1868 if (!(Path = getenv("GMX_PATH_GZIP")))
1870 if (gmx_fexist("/bin/gunzip"))
1872 sprintf(gunzip, "%s", "/bin/gunzip");
1874 else if (gmx_fexist("/usr/bin/gunzip"))
1876 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1880 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1881 "You may want to define the path to gunzip "
1882 "with the environment variable GMX_PATH_GZIP.", gunzip);
1887 sprintf(gunzip, "%s/gunzip", Path);
1888 if (!gmx_fexist(gunzip))
1890 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1891 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1896 printf("Using gunzig executable %s\n", gunzip);
1899 if (!gmx_fexist(fn))
1901 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1903 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1906 printf("Executing command '%s'\n", Buffer);
1909 if ((pipe = popen(Buffer, "r")) == NULL)
1911 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1914 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1920 pipe = gmx_ffopen(fn, "r");
1927 //! Close file or pipe
1928 void pdo_close_file(FILE *fp)
1937 //! Reading all pdo files
1938 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1939 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1942 real mintmp, maxtmp, done = 0.;
1945 /* char Buffer0[1000]; */
1949 gmx_fatal(FARGS, "No files found. Hick.");
1952 /* if min and max are not given, get min and max from the input files */
1955 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1958 for (i = 0; i < nfiles; ++i)
1960 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1961 /*fgets(Buffer0,999,file);
1962 fprintf(stderr,"First line '%s'\n",Buffer0); */
1963 done = 100.0*(i+1)/nfiles;
1964 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1969 read_pdo_header(file, header, opt);
1970 /* here only determine min and max of this window */
1971 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1972 if (maxtmp > opt->max)
1976 if (mintmp < opt->min)
1982 pdo_close_file(file);
1990 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1991 if (opt->bBoundsOnly)
1993 printf("Found option -boundsonly, now exiting.\n");
1997 /* store stepsize in profile */
1998 opt->dz = (opt->max-opt->min)/opt->bins;
2000 /* Having min and max, we read in all files */
2001 /* Loop over all files */
2002 for (i = 0; i < nfiles; ++i)
2004 done = 100.0*(i+1)/nfiles;
2005 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2010 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2011 read_pdo_header(file, header, opt);
2012 /* load data into window */
2013 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2014 if ((window+i)->Ntot[0] == 0)
2016 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2020 pdo_close_file(file);
2028 for (i = 0; i < nfiles; ++i)
2035 //! translate 0/1 to N/Y to write pull dimensions
2036 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2038 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2039 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2044 static int first = 1;
2046 /* printf("Reading %s \n",fn); */
2047 read_tpx_state(fn, &ir, &state, NULL);
2051 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2054 /* nr of pull groups */
2056 for (i = 0; i < ir.pull->ncoord; i++)
2058 if (ir.pull->coord[i].eType == epullUMBRELLA)
2064 header->pull_geometry = ir.pull->coord[i].eGeom;
2065 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2070 /* TODO: remove this restriction */
2071 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2074 for (m = 0; m < DIM; m++)
2076 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2078 /* TODO: remove the restriction */
2079 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2082 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2084 /* TODO: remove the restriction */
2085 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2095 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2098 header->npullcrds_tot = ir.pull->ncoord;
2099 header->npullcrds = ncrd;
2100 header->bPrintRef = ir.pull->bPrintCOM1;
2101 if (ir.pull->bPrintCOM2)
2103 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2105 if (ir.pull->bPrintRefValue)
2107 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2109 header->bPrintComp = ir.pull->bPrintComp;
2110 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2111 /* We should add a struct for each pull coord with all pull coord data
2112 instead of allocating many arrays for each property */
2113 snew(header->k, ncrd);
2114 snew(header->init_dist, ncrd);
2115 snew(header->umbInitDist, ncrd);
2117 /* only z-direction with epullgCYL? */
2118 if (header->pull_geometry == epullgCYL)
2120 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2122 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2123 "However, found dimensions [%s %s %s]\n",
2124 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2125 int2YN(header->pull_dim[ZZ]));
2129 for (i = 0; i < ncrd; i++)
2131 header->k[i] = ir.pull->coord[i].k;
2132 if (header->k[i] == 0.0)
2134 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2135 "That doesn't seem to be an Umbrella tpr.\n",
2138 header->init_dist[i] = ir.pull->coord[i].init;
2140 /* initial distance to reference */
2141 switch (header->pull_geometry)
2144 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2148 header->umbInitDist[i] = header->init_dist[i];
2151 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2155 if (opt->verbose || first)
2157 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2158 "\tPull group coordinates%s expected in pullx files.\n",
2159 fn, header->npullcrds, epullg_names[header->pull_geometry],
2160 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2161 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2162 for (i = 0; i < ncrd; i++)
2164 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2167 if (!opt->verbose && first)
2169 printf("\tUse option -v to see this output for all input tpr files\n\n");
2175 //! 2-norm in a ndim-dimensional space
2176 double dist_ndim(double **dx, int ndim, int line)
2180 for (i = 0; i < ndim; i++)
2182 r2 += sqr(dx[i][line]);
2184 return std::sqrt(r2);
2187 //! Read pullx.xvg or pullf.xvg
2188 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2189 t_UmbrellaWindow * window,
2190 t_UmbrellaOptions *opt,
2191 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2192 t_groupselection *groupsel)
2194 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2195 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2196 real min, max, minfound = 1e20, maxfound = -1e20;
2197 gmx_bool dt_ok, timeok, bHaveForce;
2198 const char *quantity;
2199 const int blocklen = 4096;
2201 static gmx_bool bFirst = TRUE;
2204 * Data columns in pull output:
2205 * - in force output pullf.xvg:
2206 * No reference columns, one column per pull coordinate
2208 * - in position output pullx.xvg
2209 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2210 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2214 if (opt->bPullx && header->bPrintComp)
2216 nColPerCrd += header->pull_ndim;
2218 quantity = opt->bPullx ? "position" : "force";
2220 if (opt->bPullx && header->bPrintRef)
2222 nColRefEachCrd = header->pull_ndim;
2229 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2230 bHaveForce = opt->bPullf;
2232 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2233 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2234 Sorry for the laziness, this is a To-do. */
2235 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2238 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2239 "reading \n(option -if) is supported at present, "
2240 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2241 "forces (pullf.xvg files)\nand provide them to gmx wham with option -if.",
2242 epullg_names[header->pull_geometry]);
2245 nt = read_xvg(fn, &y, &ny);
2247 /* Check consistency */
2250 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2254 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2255 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2257 /* Since this tool code has not updated yet to allow difference pull
2258 * dimensions per pull coordinate, we can't easily check the exact
2259 * number of expected columns, so we only check what we expect for
2260 * the pull coordinates selected for the WHAM analysis.
2262 printf("Expecting these columns in pull file:\n"
2263 "\t%d reference columns for each individual pull coordinate\n"
2264 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2265 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2267 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2271 if (ny < nColExpect ||
2272 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2274 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2275 "\nMaybe you confused options -ix and -if ?\n",
2276 header->npullcrds, fntpr, ny-1, fn,
2277 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2283 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2293 window->dt = y[0][1]-y[0][0];
2295 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2297 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2300 /* Need to alocate memory and set up structure */
2304 /* Use only groups selected with option -is file */
2305 if (header->npullcrds != groupsel->n)
2307 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2308 header->npullcrds, groupsel->n);
2310 window->nPull = groupsel->nUse;
2314 window->nPull = header->npullcrds;
2317 window->nBin = bins;
2318 snew(window->Histo, window->nPull);
2319 snew(window->z, window->nPull);
2320 snew(window->k, window->nPull);
2321 snew(window->pos, window->nPull);
2322 snew(window->N, window->nPull);
2323 snew(window->Ntot, window->nPull);
2324 snew(window->g, window->nPull);
2325 snew(window->bsWeight, window->nPull);
2326 window->bContrib = 0;
2328 if (opt->bCalcTauInt)
2330 snew(window->ztime, window->nPull);
2334 window->ztime = NULL;
2336 snew(lennow, window->nPull);
2338 for (g = 0; g < window->nPull; ++g)
2341 window->bsWeight[g] = 1.;
2342 snew(window->Histo[g], bins);
2344 window->Ntot[g] = 0;
2346 if (opt->bCalcTauInt)
2348 window->ztime[g] = NULL;
2352 /* Copying umbrella center and force const is more involved since not
2353 all pull groups from header (tpr file) may be used in window variable */
2354 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2356 if (groupsel && (groupsel->bUse[g] == FALSE))
2360 window->k[gUsed] = header->k[g];
2361 window->pos[gUsed] = header->umbInitDist[g];
2366 { /* only determine min and max */
2369 min = max = bins = 0; /* Get rid of warnings */
2373 for (i = 0; i < nt; i++)
2375 /* Do you want that time frame? */
2376 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2378 /* get time step of pdo file and get dstep from opt->dt */
2388 dstep = static_cast<int>(opt->dt/dt+0.5);
2396 window->dt = dt*dstep;
2400 dt_ok = (i%dstep == 0);
2401 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2403 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2404 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2407 /* Note: if groupsel == NULL:
2408 * all groups in pullf/x file are stored in this window, and gUsed == g
2409 * if groupsel != NULL:
2410 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2413 for (g = 0; g < header->npullcrds; ++g)
2415 /* was this group selected for application in WHAM? */
2416 if (groupsel && (groupsel->bUse[g] == FALSE))
2425 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2427 pos = -force/header->k[g] + header->umbInitDist[g];
2431 /* pick the right column from:
2432 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2434 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2438 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2452 if (gUsed >= window->nPull)
2454 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2455 gUsed, window->nPull);
2458 if (opt->bCalcTauInt && !bGetMinMax)
2460 /* save time series for autocorrelation analysis */
2461 ntot = window->Ntot[gUsed];
2462 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2463 if (ntot >= lennow[gUsed])
2465 lennow[gUsed] += blocklen;
2466 srenew(window->ztime[gUsed], lennow[gUsed]);
2468 window->ztime[gUsed][ntot] = pos;
2471 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2476 while ( (ibin += bins) < 0)
2481 else if (ibin >= bins)
2483 while ( (ibin -= bins) >= bins)
2489 if (ibin >= 0 && ibin < bins)
2491 window->Histo[gUsed][ibin] += 1.;
2494 window->Ntot[gUsed]++;
2498 else if (t > opt->tmax)
2502 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2514 for (i = 0; i < ny; i++)
2520 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2521 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2522 t_UmbrellaHeader* header,
2523 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2526 real mintmp, maxtmp;
2528 printf("Reading %d tpr and pullf files\n", nfiles/2);
2530 /* min and max not given? */
2533 printf("Automatic determination of boundaries...\n");
2536 for (i = 0; i < nfiles; i++)
2538 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2540 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2542 read_tpr_header(fnTprs[i], header, opt);
2543 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2545 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2547 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2548 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2549 if (maxtmp > opt->max)
2553 if (mintmp < opt->min)
2558 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2559 if (opt->bBoundsOnly)
2561 printf("Found option -boundsonly, now exiting.\n");
2565 /* store stepsize in profile */
2566 opt->dz = (opt->max-opt->min)/opt->bins;
2568 for (i = 0; i < nfiles; i++)
2570 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2572 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2574 read_tpr_header(fnTprs[i], header, opt);
2575 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2577 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2579 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2580 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2581 if (window[i].Ntot[0] == 0)
2583 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2587 for (i = 0; i < nfiles; i++)
2596 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2598 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2599 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2601 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2603 int nlines, ny, i, ig;
2606 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2607 nlines = read_xvg(fn, &iact, &ny);
2608 if (nlines != nwins)
2610 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2613 for (i = 0; i < nlines; i++)
2615 if (window[i].nPull != ny)
2617 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2618 "number of pull groups is different in different simulations. That is not\n"
2619 "supported yet. Sorry.\n");
2621 for (ig = 0; ig < window[i].nPull; ig++)
2623 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2624 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2626 if (iact[ig][i] <= 0.0)
2628 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2635 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2638 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2639 * that -in case of limited sampling- the ACT may be severely underestimated.
2640 * Note: the g=1+2tau are overwritten.
2641 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2644 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2647 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2649 /* only evaluate within +- 3sigma of the Gausian */
2650 siglim = 3.0*opt->sigSmoothIact;
2651 siglim2 = dsqr(siglim);
2652 /* pre-factor of Gaussian */
2653 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2654 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2656 for (i = 0; i < nwins; i++)
2658 snew(window[i].tausmooth, window[i].nPull);
2659 for (ig = 0; ig < window[i].nPull; ig++)
2663 pos = window[i].pos[ig];
2664 for (j = 0; j < nwins; j++)
2666 for (jg = 0; jg < window[j].nPull; jg++)
2668 dpos2 = dsqr(window[j].pos[jg]-pos);
2669 if (dpos2 < siglim2)
2671 w = gaufact*std::exp(-dpos2*invtwosig2);
2673 tausm += w*window[j].tau[jg];
2674 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2675 w,dpos2,pos,gaufact,invtwosig2); */
2680 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2682 window[i].tausmooth[ig] = tausm;
2686 window[i].tausmooth[ig] = window[i].tau[ig];
2688 window[i].g[ig] = 1+2*tausm/window[i].dt;
2693 //! Stop integrating autoccorelation function when ACF drops under this value
2694 #define WHAM_AC_ZERO_LIMIT 0.05
2696 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2698 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2699 t_UmbrellaOptions *opt, const char *fn)
2701 int i, ig, ncorr, ntot, j, k, *count, restart;
2702 real *corr, c0, dt, tmp;
2703 real *ztime, av, tausteps;
2704 FILE *fp, *fpcorr = 0;
2708 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2709 "time [ps]", "autocorrelation function", opt->oenv);
2713 for (i = 0; i < nwins; i++)
2715 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2717 ntot = window[i].Ntot[0];
2719 /* using half the maximum time as length of autocorrelation function */
2723 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2724 " points. Provide more pull data!", ntot);
2727 /* snew(corrSq,ncorr); */
2730 snew(window[i].tau, window[i].nPull);
2731 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2737 for (ig = 0; ig < window[i].nPull; ig++)
2739 if (ntot != window[i].Ntot[ig])
2741 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2742 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2744 ztime = window[i].ztime[ig];
2746 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2747 for (j = 0, av = 0; (j < ntot); j++)
2752 for (k = 0; (k < ncorr); k++)
2757 for (j = 0; (j < ntot); j += restart)
2759 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2761 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2763 /* corrSq[k] += tmp*tmp; */
2767 /* divide by nr of frames for each time displacement */
2768 for (k = 0; (k < ncorr); k++)
2770 /* count probably = (ncorr-k+(restart-1))/restart; */
2771 corr[k] = corr[k]/count[k];
2772 /* variance of autocorrelation function */
2773 /* corrSq[k]=corrSq[k]/count[k]; */
2775 /* normalize such that corr[0] == 0 */
2777 for (k = 0; (k < ncorr); k++)
2780 /* corrSq[k]*=c0*c0; */
2783 /* write ACFs in verbose mode */
2786 for (k = 0; (k < ncorr); k++)
2788 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2790 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2793 /* esimate integrated correlation time, fitting is too unstable */
2794 tausteps = 0.5*corr[0];
2795 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2796 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2798 tausteps += corr[j];
2801 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2802 Kumar et al, eq. 28 ff. */
2803 window[i].tau[ig] = tausteps*dt;
2804 window[i].g[ig] = 1+2*tausteps;
2805 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2816 /* plot IACT along reaction coordinate */
2817 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2818 if (output_env_get_print_xvgr_codes(opt->oenv))
2820 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2821 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2822 for (i = 0; i < nwins; i++)
2824 fprintf(fp, "# %3d ", i);
2825 for (ig = 0; ig < window[i].nPull; ig++)
2827 fprintf(fp, " %11g", window[i].tau[ig]);
2832 for (i = 0; i < nwins; i++)
2834 for (ig = 0; ig < window[i].nPull; ig++)
2836 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2839 if (opt->sigSmoothIact > 0.0)
2841 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2842 opt->sigSmoothIact);
2843 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2844 smoothIact(window, nwins, opt);
2845 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2846 if (output_env_get_print_xvgr_codes(opt->oenv))
2848 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2849 fprintf(fp, "@ s1 symbol color 2\n");
2851 for (i = 0; i < nwins; i++)
2853 for (ig = 0; ig < window[i].nPull; ig++)
2855 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2860 printf("Wrote %s\n", fn);
2864 * compute average and sigma of each umbrella histogram
2866 void averageSigma(t_UmbrellaWindow *window, int nwins)
2869 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2871 for (i = 0; i < nwins; i++)
2873 snew(window[i].aver, window[i].nPull);
2874 snew(window[i].sigma, window[i].nPull);
2876 ntot = window[i].Ntot[0];
2877 for (ig = 0; ig < window[i].nPull; ig++)
2879 ztime = window[i].ztime[ig];
2880 for (k = 0, av = 0.; k < ntot; k++)
2885 for (k = 0, sum2 = 0.; k < ntot; k++)
2890 sig = std::sqrt(sum2/ntot);
2891 window[i].aver[ig] = av;
2893 /* Note: This estimate for sigma is biased from the limited sampling.
2894 Correct sigma by n/(n-1) where n = number of independent
2895 samples. Only possible if IACT is known.
2899 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2900 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2904 window[i].sigma[ig] = sig;
2906 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2913 * Use histograms to compute average force on pull group.
2915 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2917 int i, j, bins = opt->bins, k;
2918 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2921 dz = (max-min)/bins;
2922 ztot = opt->max-min;
2925 /* Compute average displacement from histograms */
2926 for (j = 0; j < nWindows; ++j)
2928 snew(window[j].forceAv, window[j].nPull);
2929 for (k = 0; k < window[j].nPull; ++k)
2933 for (i = 0; i < opt->bins; ++i)
2935 temp = (1.0*i+0.5)*dz+min;
2936 distance = temp - window[j].pos[k];
2938 { /* in cyclic wham: */
2939 if (distance > ztot_half) /* |distance| < ztot_half */
2943 else if (distance < -ztot_half)
2948 w = window[j].Histo[k][i]/window[j].g[k];
2949 displAv += w*distance;
2951 /* Are we near min or max? We are getting wrong forces from the histgrams since
2952 the histograms are zero outside [min,max). Therefore, assume that the position
2953 on the other side of the histomgram center is equally likely. */
2956 posmirrored = window[j].pos[k]-distance;
2957 if (posmirrored >= max || posmirrored < min)
2959 displAv += -w*distance;
2966 /* average force from average displacement */
2967 window[j].forceAv[k] = displAv*window[j].k[k];
2968 /* sigma from average square displacement */
2969 /* window[j].sigma [k] = sqrt(displAv2); */
2970 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2976 * Check if the complete reaction coordinate is covered by the histograms
2978 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2979 t_UmbrellaOptions *opt)
2981 int i, ig, j, bins = opt->bins, bBoundary;
2982 real avcount = 0, z, relcount, *count;
2983 snew(count, opt->bins);
2985 for (j = 0; j < opt->bins; ++j)
2987 for (i = 0; i < nwins; i++)
2989 for (ig = 0; ig < window[i].nPull; ig++)
2991 count[j] += window[i].Histo[ig][j];
2994 avcount += 1.0*count[j];
2997 for (j = 0; j < bins; ++j)
2999 relcount = count[j]/avcount;
3000 z = (j+0.5)*opt->dz+opt->min;
3001 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3002 /* check for bins with no data */
3005 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3006 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3008 /* and check for poor sampling */
3009 else if (relcount < 0.005 && !bBoundary)
3011 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3017 /*! \brief Compute initial potential by integrating the average force
3019 * This speeds up the convergence by roughly a factor of 2
3021 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3023 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3024 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3027 dz = (opt->max-min)/bins;
3029 printf("Getting initial potential by integration.\n");
3031 /* Compute average displacement from histograms */
3032 computeAverageForce(window, nWindows, opt);
3034 /* Get force for each bin from all histograms in this bin, or, alternatively,
3035 if no histograms are inside this bin, from the closest histogram */
3038 for (j = 0; j < opt->bins; ++j)
3040 pos = (1.0*j+0.5)*dz+min;
3044 groupmin = winmin = 0;
3045 for (i = 0; i < nWindows; i++)
3047 for (ig = 0; ig < window[i].nPull; ig++)
3049 hispos = window[i].pos[ig];
3050 dist = std::abs(hispos-pos);
3051 /* average force within bin */
3055 fAv += window[i].forceAv[ig];
3057 /* at the same time, remember closest histogram */
3066 /* if no histogram found in this bin, use closest histogram */
3073 fAv = window[winmin].forceAv[groupmin];
3077 for (j = 1; j < opt->bins; ++j)
3079 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3082 /* cyclic wham: linearly correct possible offset */
3085 diff = (pot[bins-1]-pot[0])/(bins-1);
3086 for (j = 1; j < opt->bins; ++j)
3093 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3094 for (j = 0; j < opt->bins; ++j)
3096 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3099 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3102 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3103 energy offsets which are usually determined by wham
3104 First: turn pot into probabilities:
3106 for (j = 0; j < opt->bins; ++j)
3108 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3110 calc_z(pot, window, nWindows, opt, TRUE);
3116 //! Count number of words in an line
3117 static int wordcount(char *ptr)
3122 if (std::strlen(ptr) == 0)
3126 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3128 for (i = 0; (ptr[i] != '\0'); i++)
3130 is[cur] = isspace(ptr[i]);
3131 if ((i > 0) && (is[cur] && !is[1-cur]))
3140 /*! \brief Read input file for pull group selection (option -is)
3142 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3144 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3147 int i, iline, n, len = STRLEN, temp;
3148 char *ptr = 0, *tmpbuf = 0;
3149 char fmt[1024], fmtign[1024];
3150 int block = 1, sizenow;
3152 fp = gmx_ffopen(opt->fnGroupsel, "r");
3153 opt->groupsel = NULL;
3158 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3163 if (iline >= sizenow)
3166 srenew(opt->groupsel, sizenow);
3168 opt->groupsel[iline].n = n;
3169 opt->groupsel[iline].nUse = 0;
3170 snew(opt->groupsel[iline].bUse, n);
3173 for (i = 0; i < n; i++)
3175 std::strcpy(fmt, fmtign);
3176 std::strcat(fmt, "%d");
3177 if (sscanf(ptr, fmt, &temp))
3179 opt->groupsel[iline].bUse[i] = (temp > 0);
3180 if (opt->groupsel[iline].bUse[i])
3182 opt->groupsel[iline].nUse++;
3185 std::strcat(fmtign, "%*s");
3189 opt->nGroupsel = iline;
3190 if (nTpr != opt->nGroupsel)
3192 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3196 printf("\nUse only these pull groups:\n");
3197 for (iline = 0; iline < nTpr; iline++)
3199 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3200 for (i = 0; i < opt->groupsel[iline].n; i++)
3202 if (opt->groupsel[iline].bUse[i])
3215 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3217 //! Number of elements in fnm (used for command line parsing)
3218 #define NFILE asize(fnm)
3220 //! The main gmx wham routine
3221 int gmx_wham(int argc, char *argv[])
3223 const char *desc[] = {
3224 "[THISMODULE] is an analysis program that implements the Weighted",
3225 "Histogram Analysis Method (WHAM). It is intended to analyze",
3226 "output files generated by umbrella sampling simulations to ",
3227 "compute a potential of mean force (PMF).[PAR]",
3229 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3230 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3231 "and, if multiple coordinates need to be analyzed, all used the same",
3232 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3234 "At present, three input modes are supported.",
3236 "* With option [TT]-it[tt], the user provides a file which contains the",
3237 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3238 " AND, with option [TT]-ix[tt], a file which contains file names of",
3239 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3240 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3241 " first pullx, etc.",
3242 "* Same as the previous input mode, except that the the user",
3243 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3244 " From the pull force the position in the umbrella potential is",
3245 " computed. This does not work with tabulated umbrella potentials.",
3246 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3247 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3248 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3249 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3250 " must be similar to the following::",
3253 " # Component selection: 0 0 1",
3255 " # Ref. Group 'TestAtom'",
3256 " # Nr. of pull groups 2",
3257 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3258 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3261 " The number of pull groups, umbrella positions, force constants, and names ",
3262 " may (of course) differ. Following the header, a time column and ",
3263 " a data column for each pull group follows (i.e. the displacement",
3264 " with respect to the umbrella center). Up to four pull groups are possible ",
3265 " per [REF].pdo[ref] file at present.",
3267 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3268 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3269 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3270 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3271 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3272 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3273 "used, groupsel.dat looks like this::",
3279 "By default, the output files are",
3281 "* [TT]-o[tt] PMF output file",
3282 "* [TT]-hist[tt] Histograms output file",
3284 "Always check whether the histograms sufficiently overlap.[PAR]",
3285 "The umbrella potential is assumed to be harmonic and the force constants are ",
3286 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3287 "a tabulated potential can be provided with [TT]-tab[tt].",
3292 "* [TT]-bins[tt] Number of bins used in analysis",
3293 "* [TT]-temp[tt] Temperature in the simulations",
3294 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3295 "* [TT]-auto[tt] Automatic determination of boundaries",
3296 "* [TT]-min,-max[tt] Boundaries of the profile",
3298 "The data points that are used to compute the profile",
3299 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3300 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3301 "umbrella window.[PAR]",
3302 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3303 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3304 "With energy output, the energy in the first bin is defined to be zero. ",
3305 "If you want the free energy at a different ",
3306 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3307 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3308 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3309 "[THISMODULE] will make use of the",
3310 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3311 "reaction coordinate will assumed be be neighbors.[PAR]",
3312 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3313 "which may be useful for, e.g. membranes.",
3318 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3323 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3324 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3325 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3326 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3327 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3328 "Because the IACTs can be severely underestimated in case of limited ",
3329 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3330 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3331 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3332 "integration of the ACFs while the ACFs are larger 0.05.",
3333 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3334 "less robust) method such as fitting to a double exponential, you can ",
3335 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3336 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3337 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3342 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3343 "otherwise the statistical error may be substantially underestimated. ",
3344 "More background and examples for the bootstrap technique can be found in ",
3345 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3346 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3347 "Four bootstrapping methods are supported and ",
3348 "selected with [TT]-bs-method[tt].",
3350 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3351 " data points, and the bootstrap is carried out by assigning random weights to the ",
3352 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3353 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3354 " statistical error is underestimated.",
3355 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3356 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3357 " (allowing duplication, i.e. sampling with replacement).",
3358 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3359 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3360 " divided into blocks and only histograms within each block are mixed. Note that ",
3361 " the histograms within each block must be representative for all possible histograms, ",
3362 " otherwise the statistical error is underestimated.",
3363 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3364 " such that the generated data points are distributed according the given histograms ",
3365 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3366 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3367 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3368 " Note that this method may severely underestimate the error in case of limited sampling, ",
3369 " that is if individual histograms do not represent the complete phase space at ",
3370 " the respective positions.",
3371 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3372 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3373 " and width of the umbrella histograms. That method yields similar error estimates ",
3374 " like method [TT]traj[tt].",
3376 "Bootstrapping output:",
3378 "* [TT]-bsres[tt] Average profile and standard deviations",
3379 "* [TT]-bsprof[tt] All bootstrapping profiles",
3381 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3382 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3386 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3387 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3388 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3389 static t_UmbrellaOptions opt;
3390 static int nthreads = -1;
3394 { "-nt", FALSE, etINT, {&nthreads},
3395 "Number of threads used by gmx wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3397 { "-min", FALSE, etREAL, {&opt.min},
3398 "Minimum coordinate in profile"},
3399 { "-max", FALSE, etREAL, {&opt.max},
3400 "Maximum coordinate in profile"},
3401 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3402 "Determine min and max automatically"},
3403 { "-bins", FALSE, etINT, {&opt.bins},
3404 "Number of bins in profile"},
3405 { "-temp", FALSE, etREAL, {&opt.Temperature},
3407 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3409 { "-v", FALSE, etBOOL, {&opt.verbose},
3411 { "-b", FALSE, etREAL, {&opt.tmin},
3412 "First time to analyse (ps)"},
3413 { "-e", FALSE, etREAL, {&opt.tmax},
3414 "Last time to analyse (ps)"},
3415 { "-dt", FALSE, etREAL, {&opt.dt},
3416 "Analyse only every dt ps"},
3417 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3418 "Write histograms and exit"},
3419 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3420 "Determine min and max and exit (with [TT]-auto[tt])"},
3421 { "-log", FALSE, etBOOL, {&opt.bLog},
3422 "Calculate the log of the profile before printing"},
3423 { "-unit", FALSE, etENUM, {en_unit},
3424 "Energy unit in case of log output" },
3425 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3426 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3427 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3428 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3429 { "-sym", FALSE, etBOOL, {&opt.bSym},
3430 "Symmetrize profile around z=0"},
3431 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3432 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3433 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3434 "Calculate integrated autocorrelation times and use in wham"},
3435 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3436 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3437 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3438 "When computing autocorrelation functions, restart computing every .. (ps)"},
3439 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3440 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3441 "during smoothing"},
3442 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3443 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3444 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3445 "Bootstrap method" },
3446 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3447 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3448 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3449 "Seed for bootstrapping. (-1 = use time)"},
3450 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3451 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3452 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3453 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3454 { "-stepout", FALSE, etINT, {&opt.stepchange},
3455 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3456 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3457 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3461 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3462 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3463 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3464 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3465 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3466 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3467 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3468 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3469 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3470 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3471 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3472 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3475 int i, j, l, nfiles, nwins, nfiles2;
3476 t_UmbrellaHeader header;
3477 t_UmbrellaWindow * window = NULL;
3478 double *profile, maxchange = 1e20;
3479 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3480 char **fninTpr, **fninPull, **fninPdo;
3482 FILE *histout, *profout;
3483 char ylabel[256], title[256];
3486 opt.verbose = FALSE;
3487 opt.bHistOnly = FALSE;
3497 /* bootstrapping stuff */
3499 opt.bsMethod = bsMethod_hist;
3500 opt.tauBootStrap = 0.0;
3502 opt.histBootStrapBlockLength = 8;
3503 opt.bs_verbose = FALSE;
3508 opt.Temperature = 298;
3509 opt.Tolerance = 1e-6;
3510 opt.bBoundsOnly = FALSE;
3512 opt.bCalcTauInt = FALSE;
3513 opt.sigSmoothIact = 0.0;
3514 opt.bAllowReduceIact = TRUE;
3515 opt.bInitPotByIntegration = TRUE;
3516 opt.acTrestart = 1.0;
3517 opt.stepchange = 100;
3518 opt.stepUpdateContrib = 100;
3520 if (!parse_common_args(&argc, argv, 0,
3521 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3526 opt.unit = nenum(en_unit);
3527 opt.bsMethod = nenum(en_bsMethod);
3529 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3531 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3532 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3533 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3534 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3535 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3536 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3537 if (opt.bTab && opt.bPullf)
3539 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3540 "Provide pullx.xvg or pdo files!");
3543 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3545 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3547 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3549 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3550 "\n\n Check gmx wham -h !");
3553 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3554 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3555 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3556 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3557 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3559 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3560 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3561 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3562 if ( (bMinSet || bMaxSet) && bAutoSet)
3564 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3567 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3569 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3572 if (bMinSet && opt.bAuto)
3574 printf("Note: min and max given, switching off -auto.\n");
3578 if (opt.bTauIntGiven && opt.bCalcTauInt)
3580 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3581 "the autocorrelation times. Not both.");
3584 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3586 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3587 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3589 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3591 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3592 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3595 /* Set # of OpenMP threads */
3598 gmx_omp_set_num_threads(nthreads);
3602 nthreads = gmx_omp_get_max_threads();
3606 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3609 /* Reading gmx4 pull output and tpr files */
3610 if (opt.bTpr || opt.bPullf || opt.bPullx)
3612 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3614 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3615 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3616 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3617 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3618 if (nfiles != nfiles2)
3620 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3621 opt.fnTpr, nfiles2, fnPull);
3624 /* Read file that selects the pull group to be used */
3625 if (opt.fnGroupsel != NULL)
3627 readPullGroupSelection(&opt, fninTpr, nfiles);
3630 window = initUmbrellaWindows(nfiles);
3631 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3634 { /* reading pdo files */
3635 if (opt.fnGroupsel != NULL)
3637 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3638 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3640 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3641 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3642 window = initUmbrellaWindows(nfiles);
3643 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3647 /* enforce equal weight for all histograms? */
3650 enforceEqualWeights(window, nwins);
3653 /* write histograms */
3654 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3655 xlabel, "count", opt.oenv);
3656 for (l = 0; l < opt.bins; ++l)
3658 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3659 for (i = 0; i < nwins; ++i)
3661 for (j = 0; j < window[i].nPull; ++j)
3663 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3666 fprintf(histout, "\n");
3669 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3672 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3676 /* Using tabulated umbrella potential */
3679 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3682 /* Integrated autocorrelation times provided ? */
3683 if (opt.bTauIntGiven)
3685 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3688 /* Compute integrated autocorrelation times */
3689 if (opt.bCalcTauInt)
3691 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3694 /* calc average and sigma for each histogram
3695 (maybe required for bootstrapping. If not, this is fast anyhow) */
3696 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3698 averageSigma(window, nwins);
3701 /* Get initial potential by simple integration */
3702 if (opt.bInitPotByIntegration)
3704 guessPotByIntegration(window, nwins, &opt);
3707 /* Check if complete reaction coordinate is covered */
3708 checkReactionCoordinateCovered(window, nwins, &opt);
3710 /* Calculate profile */
3711 snew(profile, opt.bins);
3719 if ( (i%opt.stepUpdateContrib) == 0)
3721 setup_acc_wham(profile, window, nwins, &opt);
3723 if (maxchange < opt.Tolerance)
3726 /* if (opt.verbose) */
3727 printf("Switched to exact iteration in iteration %d\n", i);
3729 calc_profile(profile, window, nwins, &opt, bExact);
3730 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3732 printf("\t%4d) Maximum change %e\n", i, maxchange);
3736 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3737 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3739 /* calc error from Kumar's formula */
3740 /* Unclear how the error propagates along reaction coordinate, therefore
3742 /* calc_error_kumar(profile,window, nwins,&opt); */
3744 /* Write profile in energy units? */
3747 prof_normalization_and_unit(profile, &opt);
3748 std::strcpy(ylabel, en_unit_label[opt.unit]);
3749 std::strcpy(title, "Umbrella potential");
3753 std::strcpy(ylabel, "Density of states");
3754 std::strcpy(title, "Density of states");
3757 /* symmetrize profile around z=0? */
3760 symmetrizeProfile(profile, &opt);
3763 /* write profile or density of states */
3764 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3765 for (i = 0; i < opt.bins; ++i)
3767 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3770 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3772 /* Bootstrap Method */
3775 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3776 opt2fn("-hist", NFILE, fnm),
3777 ylabel, profile, window, nwins, &opt);
3781 freeUmbrellaWindows(window, nfiles);
3783 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3784 please_cite(stdout, "Hub2010");