2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/tpxio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxana/gmx_ana.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdrunutility/mdmodules.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull-params.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/random/tabulatednormaldistribution.h"
69 #include "gromacs/random/threefry.h"
70 #include "gromacs/random/uniformintdistribution.h"
71 #include "gromacs/random/uniformrealdistribution.h"
72 #include "gromacs/utility/arraysize.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/futil.h"
77 #include "gromacs/utility/gmxomp.h"
78 #include "gromacs/utility/pleasecite.h"
79 #include "gromacs/utility/smalloc.h"
81 //! longest file names allowed in input files
82 #define WHAM_MAXFILELEN 2048
85 * enum for energy units
88 enSel, en_kJ, en_kCal, en_kT, enNr
91 * enum for type of input files (pdos, tpr, or pullf)
94 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
97 /*! \brief enum for bootstrapping method
99 * These bootstrap methods are supported:
100 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
101 * (bsMethod_BayesianHist)
102 * - bootstrap complete histograms (bsMethod_hist)
103 * - bootstrap trajectories from given umbrella histograms. This generates new
104 * "synthetic" histograms (bsMethod_traj)
105 * - bootstrap trajectories from Gaussian with mu/sigma computed from
106 * the respective histogram (bsMethod_trajGauss). This gives very similar
107 * results compared to bsMethod_traj.
109 * ********************************************************************
110 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
111 * JS Hub, BL de Groot, D van der Spoel
112 * g_wham - A free weighted histogram analysis implementation including
113 * robust error and autocorrelation estimates,
114 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
115 * ********************************************************************
118 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
119 bsMethod_traj, bsMethod_trajGauss
122 //! Parameters of one pull coodinate
125 int pull_type; //!< such as constraint, umbrella, ...
126 int geometry; //!< such as distance, direction, cylinder
127 int ngroup; //!< the number of pull groups involved
128 ivec dim; //!< pull dimension with geometry distance
129 int ndim; //!< nr of pull_dim != 0
130 real k; //!< force constants in tpr file
131 real init_dist; //!< reference displacement
132 char coord_unit[256]; //!< unit of the displacement
135 //! Parameters of the umbrella potentials
139 * \name Using umbrella pull code since gromacs 4.x
142 int npullcrds; //!< nr of umbrella pull coordinates for reading
143 t_pullcoord *pcrd; //!< the pull coordinates
144 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
145 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
146 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
150 * \name Using PDO files common until gromacs 3.x
158 char PullName[4][256];
160 double UmbCons[4][3];
164 //! Data in the umbrella histograms
167 int nPull; //!< nr of pull groups in this pdo or pullf/x file
168 double **Histo; //!< nPull histograms
169 double **cum; //!< nPull cumulative distribution functions
170 int nBin; //!< nr of bins. identical to opt->bins
171 double *k; //!< force constants for the nPull coords
172 double *pos; //!< umbrella positions for the nPull coords
173 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
174 int *N; //!< nr of data points in nPull histograms.
175 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
177 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
179 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
180 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
183 double *tau; //!< intetrated autocorrelation time (IACT)
184 double *tausmooth; //!< smoothed IACT
186 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
188 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
190 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
192 /*! \brief average force estimated from average displacement, fAv=dzAv*k
194 * Used for integration to guess the potential.
197 real *aver; //!< average of histograms
198 real *sigma; //!< stddev of histograms
199 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
202 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
205 int n; //!< total nr of pull coords in this tpr file
206 int nUse; //!< nr of pull coords used
207 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
210 //! Parameters of WHAM
217 const char *fnTpr, *fnPullf, *fnCoordSel;
218 const char *fnPdo, *fnPullx; //!< file names of input
219 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
220 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
222 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
223 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
224 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
225 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
228 * \name Basic WHAM options
231 int bins; //!< nr of bins, min, max, and dz of profile
233 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
234 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
237 * \name Output control
240 gmx_bool bLog; //!< energy output (instead of probability) for profile
241 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
242 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
243 /*! \brief after wham, set prof to zero at this z-position.
244 * When bootstrapping, set zProf0 to a "stable" reference position.
247 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
249 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
250 gmx_bool bAuto; //!< determine min and max automatically but do not exit
252 gmx_bool verbose; //!< more noisy wham mode
253 int stepchange; //!< print maximum change in prof after how many interations
254 gmx_output_env_t *oenv; //!< xvgr options
257 * \name Autocorrelation stuff
260 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
261 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
262 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
263 real acTrestart; //!< when computing ACT, time between restarting points
265 /* \brief Enforce the same weight for each umbella window, that is
266 * calculate with the same number of data points for
267 * each window. That can be reasonable, if the histograms
268 * have different length, but due to autocorrelation,
269 * a longer simulation should not have larger weightin wham.
275 * \name Bootstrapping stuff
278 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
280 /* \brief bootstrap method
282 * if == bsMethod_hist, consider complete histograms as independent
283 * data points and, hence, only mix complete histograms.
284 * if == bsMethod_BayesianHist, consider complete histograms
285 * as independent data points, but assign random weights
286 * to the histograms during the bootstrapping ("Bayesian bootstrap")
287 * In case of long correlations (e.g., inside a channel), these
288 * will yield a more realistic error.
289 * if == bsMethod_traj(Gauss), generate synthetic histograms
291 * histogram by generating an autocorrelated random sequence
292 * that is distributed according to the respective given
293 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
294 * (instead of from the umbrella histogram) to generate a new
299 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
302 /* \brief when mixing histograms, mix only histograms withing blocks
303 long the reaction coordinate xi. Avoids gaps along xi. */
304 int histBootStrapBlockLength;
306 int bsSeed; //!< random seed for bootstrapping
308 /* \brief Write cumulative distribution functions (CDFs) of histograms
309 and write the generated histograms for each bootstrap */
313 * \name tabulated umbrella potential stuff
317 double *tabX, *tabY, tabMin, tabMax, tabDz;
320 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
321 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
324 //! Make an umbrella window (may contain several histograms)
325 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
327 t_UmbrellaWindow *win;
330 for (i = 0; i < nwin; i++)
332 win[i].Histo = win[i].cum = 0;
333 win[i].k = win[i].pos = win[i].z = 0;
334 win[i].N = win[i].Ntot = 0;
335 win[i].g = win[i].tau = win[i].tausmooth = 0;
339 win[i].aver = win[i].sigma = 0;
345 //! Delete an umbrella window (may contain several histograms)
346 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
349 for (i = 0; i < nwin; i++)
353 for (j = 0; j < win[i].nPull; j++)
355 sfree(win[i].Histo[j]);
360 for (j = 0; j < win[i].nPull; j++)
362 sfree(win[i].cum[j]);
367 for (j = 0; j < win[i].nPull; j++)
369 sfree(win[i].bContrib[j]);
381 sfree(win[i].tausmooth);
382 sfree(win[i].bContrib);
384 sfree(win[i].forceAv);
387 sfree(win[i].bsWeight);
393 * Read and setup tabulated umbrella potential
395 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
400 printf("Setting up tabulated potential from file %s\n", fn);
401 nl = read_xvg(fn, &y, &ny);
405 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
407 opt->tabMin = y[0][0];
408 opt->tabMax = y[0][nl-1];
409 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
412 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
413 "ascending z-direction", fn);
415 for (i = 0; i < nl-1; i++)
417 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
419 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
424 for (i = 0; i < nl; i++)
426 opt->tabX[i] = y[0][i];
427 opt->tabY[i] = y[1][i];
429 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
430 opt->tabMin, opt->tabMax, opt->tabDz);
433 //! Read the header of an PDO file (position, force const, nr of groups)
434 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
437 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
439 std::istringstream ist;
442 if (fgets(line, 2048, file) == NULL)
444 gmx_fatal(FARGS, "Error reading header from pdo file\n");
447 ist >> Buffer0 >> Buffer1 >> Buffer2;
448 if (std::strcmp(Buffer1, "UMBRELLA"))
450 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
451 "(Found in first line: `%s')\n",
452 Buffer1, "UMBRELLA", line);
454 if (std::strcmp(Buffer2, "3.0"))
456 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
460 if (fgets(line, 2048, file) == NULL)
462 gmx_fatal(FARGS, "Error reading header from pdo file\n");
465 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
466 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
468 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
469 if (header->nDim != 1)
471 gmx_fatal(FARGS, "Currently only supports one dimension");
475 if (fgets(line, 2048, file) == NULL)
477 gmx_fatal(FARGS, "Error reading header from pdo file\n");
480 ist >> Buffer0 >> Buffer1 >> header->nSkip;
483 if (fgets(line, 2048, file) == NULL)
485 gmx_fatal(FARGS, "Error reading header from pdo file\n");
488 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
491 if (fgets(line, 2048, file) == NULL)
493 gmx_fatal(FARGS, "Error reading header from pdo file\n");
496 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
500 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
504 for (i = 0; i < header->nPull; ++i)
506 if (fgets(line, 2048, file) == NULL)
508 gmx_fatal(FARGS, "Error reading header from pdo file\n");
511 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
512 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
513 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
517 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
518 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
522 if (fgets(line, 2048, file) == NULL)
524 gmx_fatal(FARGS, "Cannot read from file\n");
528 if (std::strcmp(Buffer3, "#####") != 0)
530 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
535 static char *fgets3(FILE *fp, char ptr[], int *len)
540 if (fgets(ptr, *len-1, fp) == NULL)
545 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
547 /* This line is longer than len characters, let's increase len! */
551 if (fgets(p-1, STRLEN, fp) == NULL)
556 slen = std::strlen(ptr);
557 if (ptr[slen-1] == '\n')
565 /*! \brief Read the data columns of and PDO file.
567 * TO DO: Get rid of the scanf function to avoid the clang warning.
568 * At the moment, this warning is avoided by hiding the format string
569 * the variable fmtlf.
571 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
572 int fileno, t_UmbrellaWindow * win,
573 t_UmbrellaOptions *opt,
574 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
576 int i, inttemp, bins, count, ntot;
577 real minval, maxval, minfound = 1e20, maxfound = -1e20;
578 double temp, time, time0 = 0, dt;
580 t_UmbrellaWindow * window = 0;
581 gmx_bool timeok, dt_ok = 1;
582 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
583 int len = STRLEN, dstep = 1;
584 const int blocklen = 4096;
594 /* Need to alocate memory and set up structure */
595 window->nPull = header->nPull;
598 snew(window->Histo, window->nPull);
599 snew(window->z, window->nPull);
600 snew(window->k, window->nPull);
601 snew(window->pos, window->nPull);
602 snew(window->N, window->nPull);
603 snew(window->Ntot, window->nPull);
604 snew(window->g, window->nPull);
605 snew(window->bsWeight, window->nPull);
607 window->bContrib = 0;
609 if (opt->bCalcTauInt)
611 snew(window->ztime, window->nPull);
617 snew(lennow, window->nPull);
619 for (i = 0; i < window->nPull; ++i)
622 window->bsWeight[i] = 1.;
623 snew(window->Histo[i], bins);
624 window->k[i] = header->UmbCons[i][0];
625 window->pos[i] = header->UmbPos[i][0];
629 if (opt->bCalcTauInt)
631 window->ztime[i] = 0;
635 /* Done with setup */
641 minval = maxval = bins = 0; /* Get rid of warnings */
646 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
650 if (ptr[0] == '#' || std::strlen(ptr) < 2)
655 /* Initiate format string */
657 std::strcat(fmtign, "%*s");
659 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
660 /* Round time to fs */
661 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
663 /* get time step of pdo file */
673 dstep = static_cast<int>(opt->dt/dt+0.5);
681 window->dt = dt*dstep;
686 dt_ok = ((count-1)%dstep == 0);
687 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
689 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
690 time,opt->tmin, opt->tmax, dt_ok,timeok); */
694 for (i = 0; i < header->nPull; ++i)
696 std::strcpy(fmt, fmtign);
697 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
698 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
699 if (sscanf(ptr, fmt, &temp))
701 temp += header->UmbPos[i][0];
715 if (opt->bCalcTauInt)
717 /* save time series for autocorrelation analysis */
718 ntot = window->Ntot[i];
719 if (ntot >= lennow[i])
721 lennow[i] += blocklen;
722 srenew(window->ztime[i], lennow[i]);
724 window->ztime[i][ntot] = temp;
728 temp /= (maxval-minval);
730 temp = std::floor(temp);
732 inttemp = static_cast<int> (temp);
739 else if (inttemp >= bins)
745 if (inttemp >= 0 && inttemp < bins)
747 window->Histo[i][inttemp] += 1.;
755 if (time > opt->tmax)
759 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
775 /*! \brief Set identical weights for all histograms
777 * Normally, the weight is given by the number data points in each
778 * histogram, together with the autocorrelation time. This can be overwritten
779 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
780 * an appropriate autocorrelation times instead of using this function.
782 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
784 int i, k, j, NEnforced;
787 NEnforced = window[0].Ntot[0];
788 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
789 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
790 /* enforce all histograms to have the same weight as the very first histogram */
792 for (j = 0; j < nWindows; ++j)
794 for (k = 0; k < window[j].nPull; ++k)
796 ratio = 1.0*NEnforced/window[j].Ntot[k];
797 for (i = 0; i < window[0].nBin; ++i)
799 window[j].Histo[k][i] *= ratio;
801 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
806 /*! \brief Simple linear interpolation between two given tabulated points
808 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
811 double pl, pu, dz, dp;
813 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
815 if (jl < 0 || ju >= opt->tabNbins)
817 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
818 "Provide an extended table.", dist, jl, ju);
822 dz = dist-opt->tabX[jl];
823 dp = (pu-pl)*dz/opt->tabDz;
829 * Check which bins substiantially contribute (accelerates WHAM)
831 * Don't worry, that routine does not mean we compute the PMF in limited precision.
832 * After rapid convergence (using only substiantal contributions), we always switch to
835 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
836 t_UmbrellaOptions *opt)
838 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
839 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
840 gmx_bool bAnyContrib;
841 static int bFirst = 1;
842 static double wham_contrib_lim;
846 for (i = 0; i < nWindows; ++i)
848 nGrptot += window[i].nPull;
850 wham_contrib_lim = opt->Tolerance/nGrptot;
853 ztot = opt->max-opt->min;
856 for (i = 0; i < nWindows; ++i)
858 if (!window[i].bContrib)
860 snew(window[i].bContrib, window[i].nPull);
862 for (j = 0; j < window[i].nPull; ++j)
864 if (!window[i].bContrib[j])
866 snew(window[i].bContrib[j], opt->bins);
869 for (k = 0; k < opt->bins; ++k)
871 temp = (1.0*k+0.5)*dz+min;
872 distance = temp - window[i].pos[j]; /* distance to umbrella center */
874 { /* in cyclic wham: */
875 if (distance > ztot_half) /* |distance| < ztot_half */
879 else if (distance < -ztot_half)
884 /* Note: there are two contributions to bin k in the wham equations:
885 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
886 ii) exp(- U/(BOLTZ*opt->Temperature))
887 where U is the umbrella potential
888 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
893 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
897 U = tabulated_pot(distance, opt); /* Use tabulated potential */
899 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
900 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
901 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
902 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
903 if (window[i].bContrib[j][k])
909 /* If this histo is far outside min and max all bContrib may be FALSE,
910 causing a floating point exception later on. To avoid that, switch
914 for (k = 0; k < opt->bins; ++k)
916 window[i].bContrib[j][k] = TRUE;
923 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
924 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
929 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
935 //! Compute the PMF (one of the two main WHAM routines)
936 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
937 t_UmbrellaOptions *opt, gmx_bool bExact)
939 double ztot_half, ztot, min = opt->min, dz = opt->dz;
941 ztot = opt->max-opt->min;
948 int nthreads = gmx_omp_get_max_threads();
949 int thread_id = gmx_omp_get_thread_num();
951 int i0 = thread_id*opt->bins/nthreads;
952 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
954 for (i = i0; i < i1; ++i)
957 double num, denom, invg, temp = 0, distance, U = 0;
959 for (j = 0; j < nWindows; ++j)
961 for (k = 0; k < window[j].nPull; ++k)
963 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
964 temp = (1.0*i+0.5)*dz+min;
965 num += invg*window[j].Histo[k][i];
967 if (!(bExact || window[j].bContrib[k][i]))
971 distance = temp - window[j].pos[k]; /* distance to umbrella center */
973 { /* in cyclic wham: */
974 if (distance > ztot_half) /* |distance| < ztot_half */
978 else if (distance < -ztot_half)
986 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
990 U = tabulated_pot(distance, opt); /* Use tabulated potential */
992 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
995 profile[i] = num/denom;
998 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1002 //! Compute the free energy offsets z (one of the two main WHAM routines)
1003 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
1004 t_UmbrellaOptions *opt, gmx_bool bExact)
1006 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1007 double maxglob = -1e20;
1009 ztot = opt->max-opt->min;
1012 #pragma omp parallel
1016 int nthreads = gmx_omp_get_max_threads();
1017 int thread_id = gmx_omp_get_thread_num();
1019 int i0 = thread_id*nWindows/nthreads;
1020 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1021 double maxloc = -1e20;
1023 for (i = i0; i < i1; ++i)
1025 double total = 0, temp, distance, U = 0;
1028 for (j = 0; j < window[i].nPull; ++j)
1031 for (k = 0; k < window[i].nBin; ++k)
1033 if (!(bExact || window[i].bContrib[j][k]))
1037 temp = (1.0*k+0.5)*dz+min;
1038 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1040 { /* in cyclic wham: */
1041 if (distance > ztot_half) /* |distance| < ztot_half */
1045 else if (distance < -ztot_half)
1053 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1057 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1059 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1061 /* Avoid floating point exception if window is far outside min and max */
1064 total = -std::log(total);
1070 temp = std::abs(total - window[i].z[j]);
1075 window[i].z[j] = total;
1078 /* Now get maximum maxloc from the threads and put in maxglob */
1079 if (maxloc > maxglob)
1081 #pragma omp critical
1083 if (maxloc > maxglob)
1090 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1096 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1097 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1099 int i, j, bins = opt->bins;
1100 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1103 if (min > 0. || max < 0.)
1105 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1106 opt->min, opt->max);
1111 for (i = 0; i < bins; i++)
1115 /* bin left of zsym */
1116 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1117 if (j >= 0 && (j+1) < bins)
1119 /* interpolate profile linearly between bins j and j+1 */
1120 z1 = min+(j+0.5)*dz;
1122 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1123 /* average between left and right */
1124 prof2[i] = 0.5*(profsym+profile[i]);
1128 prof2[i] = profile[i];
1132 std::memcpy(profile, prof2, bins*sizeof(double));
1136 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1137 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1140 double unit_factor = 1., diff;
1144 /* Not log? Nothing to do! */
1150 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1151 if (opt->unit == en_kT)
1155 else if (opt->unit == en_kJ)
1157 unit_factor = BOLTZ*opt->Temperature;
1159 else if (opt->unit == en_kCal)
1161 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1165 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1168 for (i = 0; i < bins; i++)
1170 if (profile[i] > 0.0)
1172 profile[i] = -std::log(profile[i])*unit_factor;
1176 /* shift to zero at z=opt->zProf0 */
1177 if (!opt->bProf0Set)
1183 /* Get bin with shortest distance to opt->zProf0
1184 (-0.5 from bin position and +0.5 from rounding cancel) */
1185 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1190 else if (imin >= bins)
1194 diff = profile[imin];
1198 for (i = 0; i < bins; i++)
1204 //! Make an array of random integers (used for bootstrapping)
1205 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1207 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1209 int ipull, blockBase, nr, ipullRandom;
1211 if (blockLength == 0)
1213 blockLength = nPull;
1216 for (ipull = 0; ipull < nPull; ipull++)
1218 blockBase = (ipull/blockLength)*blockLength;
1220 { /* make sure nothing bad happens in the last block */
1221 nr = dist(*rng); // [0,blockLength-1]
1222 ipullRandom = blockBase + nr;
1224 while (ipullRandom >= nPull);
1225 if (ipullRandom < 0 || ipullRandom >= nPull)
1227 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1228 "blockLength = %d, blockBase = %d\n",
1229 ipullRandom, nPull, nr, blockLength, blockBase);
1231 randomArray[ipull] = ipullRandom;
1233 /*for (ipull=0; ipull<nPull; ipull++)
1234 printf("%d ",randomArray[ipull]); printf("\n"); */
1237 /*! \brief Set pull group information of a synthetic histogram
1239 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1240 * but it is not required if we bootstrap complete histograms.
1242 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1243 t_UmbrellaWindow *thisWindow, int pullid)
1245 synthWindow->N [0] = thisWindow->N [pullid];
1246 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1247 synthWindow->pos [0] = thisWindow->pos [pullid];
1248 synthWindow->z [0] = thisWindow->z [pullid];
1249 synthWindow->k [0] = thisWindow->k [pullid];
1250 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1251 synthWindow->g [0] = thisWindow->g [pullid];
1252 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1255 /*! \brief Calculate cumulative distribution function of of all histograms.
1257 * This allow to create random number sequences
1258 * which are distributed according to the histograms. Required to generate
1259 * the "synthetic" histograms for the Bootstrap method
1261 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1262 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1266 char *fn = 0, *buf = 0;
1269 if (opt->bs_verbose)
1271 snew(fn, std::strlen(fnhist)+10);
1272 snew(buf, std::strlen(fnhist)+10);
1273 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1274 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1278 for (i = 0; i < nWindows; i++)
1280 snew(window[i].cum, window[i].nPull);
1281 for (j = 0; j < window[i].nPull; j++)
1283 snew(window[i].cum[j], nbin+1);
1284 window[i].cum[j][0] = 0.;
1285 for (k = 1; k <= nbin; k++)
1287 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1290 /* normalize CDFs. Ensure cum[nbin]==1 */
1291 last = window[i].cum[j][nbin];
1292 for (k = 0; k <= nbin; k++)
1294 window[i].cum[j][k] /= last;
1299 printf("Cumulative distribution functions of all histograms created.\n");
1300 if (opt->bs_verbose)
1302 for (k = 0; k <= nbin; k++)
1304 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1305 for (i = 0; i < nWindows; i++)
1307 for (j = 0; j < window[i].nPull; j++)
1309 fprintf(fp, "%g\t", window[i].cum[j][k]);
1314 printf("Wrote cumulative distribution functions to %s\n", fn);
1322 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1324 * This is used to generate a random sequence distributed according to a histogram
1326 void searchCumulative(double xx[], int n, double x, int *j)
1348 else if (x == xx[n-1])
1358 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1359 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1360 int pullid, t_UmbrellaOptions *opt)
1362 int N, i, nbins, r_index, ibin;
1363 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1366 N = thisWindow->N[pullid];
1367 dt = thisWindow->dt;
1368 nbins = thisWindow->nBin;
1370 /* tau = autocorrelation time */
1371 if (opt->tauBootStrap > 0.0)
1373 tausteps = opt->tauBootStrap/dt;
1375 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1377 /* calc tausteps from g=1+2tausteps */
1378 g = thisWindow->g[pullid];
1384 "When generating hypothetical trajectories from given umbrella histograms,\n"
1385 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1386 "cannot be predicted. You have 3 options:\n"
1387 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1388 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1390 " with option -iiact for all umbrella windows.\n"
1391 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1392 " Use option (3) only if you are sure what you're doing, you may severely\n"
1393 " underestimate the error if a too small ACT is given.\n");
1394 gmx_fatal(FARGS, errstr);
1397 synthWindow->N [0] = N;
1398 synthWindow->pos [0] = thisWindow->pos[pullid];
1399 synthWindow->z [0] = thisWindow->z[pullid];
1400 synthWindow->k [0] = thisWindow->k[pullid];
1401 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1402 synthWindow->g [0] = thisWindow->g [pullid];
1403 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1405 for (i = 0; i < nbins; i++)
1407 synthWindow->Histo[0][i] = 0.;
1410 if (opt->bsMethod == bsMethod_trajGauss)
1412 sig = thisWindow->sigma [pullid];
1413 mu = thisWindow->aver [pullid];
1416 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1418 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1420 z = a*x + sqrt(1-a^2)*y
1421 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1422 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1424 C(t) = exp(-t/tau) with tau=-1/ln(a)
1426 Then, use error function to turn the Gaussian random variable into a uniformly
1427 distributed one in [0,1]. Eventually, use cumulative distribution function of
1428 histogram to get random variables distributed according to histogram.
1429 Note: The ACT of the flat distribution and of the generated histogram is not
1430 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1432 a = std::exp(-1.0/tausteps);
1433 ap = std::sqrt(1.0-a*a);
1434 invsqrt2 = 1.0/std::sqrt(2.0);
1436 /* init random sequence */
1437 x = opt->normalDistribution(opt->rng);
1439 if (opt->bsMethod == bsMethod_traj)
1441 /* bootstrap points from the umbrella histograms */
1442 for (i = 0; i < N; i++)
1444 y = opt->normalDistribution(opt->rng);
1446 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1447 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1449 r = 0.5*(1+std::erf(x*invsqrt2));
1450 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1451 synthWindow->Histo[0][r_index] += 1.;
1454 else if (opt->bsMethod == bsMethod_trajGauss)
1456 /* bootstrap points from a Gaussian with the same average and sigma
1457 as the respective umbrella histogram. The idea was, that -given
1458 limited sampling- the bootstrapped histograms are otherwise biased
1459 from the limited sampling of the US histos. However, bootstrapping from
1460 the Gaussian seems to yield a similar estimate. */
1464 y = opt->normalDistribution(opt->rng);
1467 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1472 while ( (ibin += nbins) < 0)
1477 else if (ibin >= nbins)
1479 while ( (ibin -= nbins) >= nbins)
1486 if (ibin >= 0 && ibin < nbins)
1488 synthWindow->Histo[0][ibin] += 1.;
1495 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1499 /*! \brief Write all histograms to a file
1501 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1502 * sets of bootstrapped histograms.
1504 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1505 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1507 char *fn = 0, *buf = 0, title[256];
1513 snew(fn, std::strlen(fnhist)+10);
1514 snew(buf, std::strlen(fnhist)+1);
1515 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1516 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1520 fn = gmx_strdup(fnhist);
1521 std::strcpy(title, "Umbrella histograms");
1524 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1527 /* Write histograms */
1528 for (l = 0; l < bins; ++l)
1530 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1531 for (i = 0; i < nWindows; ++i)
1533 for (j = 0; j < window[i].nPull; ++j)
1535 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1542 printf("Wrote %s\n", fn);
1550 //! Used for qsort to sort random numbers
1551 int func_wham_is_larger(const void *a, const void *b)
1570 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1571 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1575 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1579 /* generate ordered random numbers between 0 and nAllPull */
1580 for (i = 0; i < nAllPull-1; i++)
1582 r[i] = dist(opt->rng);
1584 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1585 r[nAllPull-1] = 1.0*nAllPull;
1587 synthwin[0].bsWeight[0] = r[0];
1588 for (i = 1; i < nAllPull; i++)
1590 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1593 /* avoid to have zero weight by adding a tiny value */
1594 for (i = 0; i < nAllPull; i++)
1596 if (synthwin[i].bsWeight[0] < 1e-5)
1598 synthwin[i].bsWeight[0] = 1e-5;
1605 //! The main bootstrapping routine
1606 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1607 const char *xlabel, char* ylabel, double *profile,
1608 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1610 t_UmbrellaWindow * synthWindow;
1611 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1612 int i, j, *randomArray = 0, winid, pullid, ib;
1613 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1615 gmx_bool bExact = FALSE;
1617 /* init random generator */
1618 if (opt->bsSeed == 0)
1620 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1622 opt->rng.seed(opt->bsSeed);
1624 snew(bsProfile, opt->bins);
1625 snew(bsProfiles_av, opt->bins);
1626 snew(bsProfiles_av2, opt->bins);
1628 /* Create array of all pull groups. Note that different windows
1629 may have different nr of pull groups
1630 First: Get total nr of pull groups */
1632 for (i = 0; i < nWindows; i++)
1634 nAllPull += window[i].nPull;
1636 snew(allPull_winId, nAllPull);
1637 snew(allPull_pullId, nAllPull);
1639 /* Setup one array of all pull groups */
1640 for (i = 0; i < nWindows; i++)
1642 for (j = 0; j < window[i].nPull; j++)
1644 allPull_winId[iAllPull] = i;
1645 allPull_pullId[iAllPull] = j;
1650 /* setup stuff for synthetic windows */
1651 snew(synthWindow, nAllPull);
1652 for (i = 0; i < nAllPull; i++)
1654 synthWindow[i].nPull = 1;
1655 synthWindow[i].nBin = opt->bins;
1656 snew(synthWindow[i].Histo, 1);
1657 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1659 snew(synthWindow[i].Histo[0], opt->bins);
1661 snew(synthWindow[i].N, 1);
1662 snew(synthWindow[i].pos, 1);
1663 snew(synthWindow[i].z, 1);
1664 snew(synthWindow[i].k, 1);
1665 snew(synthWindow[i].bContrib, 1);
1666 snew(synthWindow[i].g, 1);
1667 snew(synthWindow[i].bsWeight, 1);
1670 switch (opt->bsMethod)
1673 snew(randomArray, nAllPull);
1674 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1675 please_cite(stdout, "Hub2006");
1677 case bsMethod_BayesianHist:
1678 /* just copy all histogams into synthWindow array */
1679 for (i = 0; i < nAllPull; i++)
1681 winid = allPull_winId [i];
1682 pullid = allPull_pullId[i];
1683 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1687 case bsMethod_trajGauss:
1688 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1691 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1694 /* do bootstrapping */
1695 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1696 for (ib = 0; ib < opt->nBootStrap; ib++)
1698 printf(" *******************************************\n"
1699 " ******** Start bootstrap nr %d ************\n"
1700 " *******************************************\n", ib+1);
1702 switch (opt->bsMethod)
1705 /* bootstrap complete histograms from given histograms */
1706 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1707 for (i = 0; i < nAllPull; i++)
1709 winid = allPull_winId [randomArray[i]];
1710 pullid = allPull_pullId[randomArray[i]];
1711 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1714 case bsMethod_BayesianHist:
1715 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1716 setRandomBsWeights(synthWindow, nAllPull, opt);
1719 case bsMethod_trajGauss:
1720 /* create new histos from given histos, that is generate new hypothetical
1722 for (i = 0; i < nAllPull; i++)
1724 winid = allPull_winId[i];
1725 pullid = allPull_pullId[i];
1726 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1731 /* write histos in case of verbose output */
1732 if (opt->bs_verbose)
1734 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1741 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1744 if ( (i%opt->stepUpdateContrib) == 0)
1746 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1748 if (maxchange < opt->Tolerance)
1752 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1754 printf("\t%4d) Maximum change %e\n", i, maxchange);
1756 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1759 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1760 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1764 prof_normalization_and_unit(bsProfile, opt);
1767 /* symmetrize profile around z=0 */
1770 symmetrizeProfile(bsProfile, opt);
1773 /* save stuff to get average and stddev */
1774 for (i = 0; i < opt->bins; i++)
1777 bsProfiles_av[i] += tmp;
1778 bsProfiles_av2[i] += tmp*tmp;
1779 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1781 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1785 /* write average and stddev */
1786 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1787 if (output_env_get_print_xvgr_codes(opt->oenv))
1789 fprintf(fp, "@TYPE xydy\n");
1791 for (i = 0; i < opt->bins; i++)
1793 bsProfiles_av [i] /= opt->nBootStrap;
1794 bsProfiles_av2[i] /= opt->nBootStrap;
1795 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1796 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1797 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1800 printf("Wrote boot strap result to %s\n", fnres);
1803 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1804 int whaminFileType(char *fn)
1807 len = std::strlen(fn);
1808 if (std::strcmp(fn+len-3, "tpr") == 0)
1812 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1814 return whamin_pullxf;
1816 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1822 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1824 return whamin_unknown;
1827 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1828 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1829 t_UmbrellaOptions *opt)
1831 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1832 int nread, sizenow, i, block = 1;
1835 fp = gmx_ffopen(fn, "r");
1838 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1840 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1842 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1844 if (nread >= sizenow)
1847 srenew(filename, sizenow);
1848 for (i = sizenow-block; i < sizenow; i++)
1850 snew(filename[i], WHAM_MAXFILELEN);
1853 /* remove newline if there is one */
1854 if (tmp[std::strlen(tmp)-1] == '\n')
1856 tmp[std::strlen(tmp)-1] = '\0';
1858 std::strcpy(filename[nread], tmp);
1861 printf("Found file %s in %s\n", filename[nread], fn);
1865 *filenamesRet = filename;
1869 //! Open a file or a pipe to a gzipped file
1870 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1872 char Buffer[1024], gunzip[1024], *Path = 0;
1874 static gmx_bool bFirst = 1;
1876 /* gzipped pdo file? */
1877 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1879 /* search gunzip executable */
1880 if (!(Path = getenv("GMX_PATH_GZIP")))
1882 if (gmx_fexist("/bin/gunzip"))
1884 sprintf(gunzip, "%s", "/bin/gunzip");
1886 else if (gmx_fexist("/usr/bin/gunzip"))
1888 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1892 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1893 "You may want to define the path to gunzip "
1894 "with the environment variable GMX_PATH_GZIP.", gunzip);
1899 sprintf(gunzip, "%s/gunzip", Path);
1900 if (!gmx_fexist(gunzip))
1902 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1903 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1908 printf("Using gunzip executable %s\n", gunzip);
1911 if (!gmx_fexist(fn))
1913 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1915 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1918 printf("Executing command '%s'\n", Buffer);
1921 if ((pipe = popen(Buffer, "r")) == NULL)
1923 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1926 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1932 pipe = gmx_ffopen(fn, "r");
1939 //! Close file or pipe
1940 void pdo_close_file(FILE *fp)
1949 //! Reading all pdo files
1950 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1951 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1954 real mintmp, maxtmp, done = 0.;
1957 /* char Buffer0[1000]; */
1961 gmx_fatal(FARGS, "No files found. Hick.");
1964 /* if min and max are not given, get min and max from the input files */
1967 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1970 for (i = 0; i < nfiles; ++i)
1972 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1973 /*fgets(Buffer0,999,file);
1974 fprintf(stderr,"First line '%s'\n",Buffer0); */
1975 done = 100.0*(i+1)/nfiles;
1976 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1981 read_pdo_header(file, header, opt);
1982 /* here only determine min and max of this window */
1983 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1984 if (maxtmp > opt->max)
1988 if (mintmp < opt->min)
1994 pdo_close_file(file);
2002 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2003 if (opt->bBoundsOnly)
2005 printf("Found option -boundsonly, now exiting.\n");
2009 /* store stepsize in profile */
2010 opt->dz = (opt->max-opt->min)/opt->bins;
2012 /* Having min and max, we read in all files */
2013 /* Loop over all files */
2014 for (i = 0; i < nfiles; ++i)
2016 done = 100.0*(i+1)/nfiles;
2017 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2022 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2023 read_pdo_header(file, header, opt);
2024 /* load data into window */
2025 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2026 if ((window+i)->Ntot[0] == 0)
2028 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2032 pdo_close_file(file);
2040 for (i = 0; i < nfiles; ++i)
2047 //! translate 0/1 to N/Y to write pull dimensions
2048 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2050 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2051 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2053 gmx::MDModules mdModules;
2054 t_inputrec *ir = mdModules.inputrec();
2056 static int first = 1;
2058 /* printf("Reading %s \n",fn); */
2059 read_tpx_state(fn, ir, &state, NULL);
2063 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2065 if (ir->pull->ncoord == 0)
2067 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2070 /* Read overall pull info */
2071 header->npullcrds = ir->pull->ncoord;
2072 header->bPrintCOM = ir->pull->bPrintCOM;
2073 header->bPrintRefValue = ir->pull->bPrintRefValue;
2074 header->bPrintComp = ir->pull->bPrintComp;
2076 /* Read pull coordinates */
2077 snew(header->pcrd, header->npullcrds);
2078 for (int i = 0; i < ir->pull->ncoord; i++)
2080 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2081 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2082 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2083 header->pcrd[i].k = ir->pull->coord[i].k;
2084 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2086 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2087 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2089 std::strcpy(header->pcrd[i].coord_unit,
2090 pull_coordinate_units(&ir->pull->coord[i]));
2092 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2094 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2095 " This is not supported.\n");
2097 if (coordsel && (coordsel->n != ir->pull->ncoord))
2099 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2100 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2104 /* Check pull coords for consistency */
2106 ivec thedim = { 0, 0, 0 };
2107 bool geometryIsSet = false;
2108 for (int i = 0; i < ir->pull->ncoord; i++)
2110 if (coordsel == NULL || coordsel->bUse[i])
2112 if (header->pcrd[i].pull_type != epullUMBRELLA)
2114 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2115 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2116 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2120 geom = header->pcrd[i].geometry;
2121 copy_ivec(header->pcrd[i].dim, thedim);
2122 geometryIsSet = true;
2124 if (geom != header->pcrd[i].geometry)
2126 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2127 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2128 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2130 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2132 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2133 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2134 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2135 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2137 if (header->pcrd[i].geometry == epullgCYL)
2139 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2141 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2142 "However, found dimensions [%s %s %s]\n",
2143 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2144 int2YN(header->pcrd[i].dim[ZZ]));
2147 if (header->pcrd[i].k <= 0.0)
2149 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g in %s.\n"
2150 "That doesn't seem to be an Umbrella tpr.\n",
2151 fn, i+1, header->pcrd[i].k);
2156 if (opt->verbose || first)
2158 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2160 for (int i = 0; i < ir->pull->ncoord; i++)
2162 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2163 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2166 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2168 for (int i = 0; i < ir->pull->ncoord; i++)
2170 bool use = (coordsel == NULL || coordsel->bUse[i]);
2172 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2173 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2174 header->pcrd[i].ndim, use ? "Yes" : "No");
2175 printf("\tPull group coordinates of %d groups expected in pullx files.\n", ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2177 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2178 header->bPrintRefValue ? "" : " not");
2180 if (!opt->verbose && first)
2182 printf("\tUse option -v to see this output for all input tpr files\n\n");
2188 //! 2-norm in a ndim-dimensional space
2189 double dist_ndim(double **dx, int ndim, int line)
2193 for (i = 0; i < ndim; i++)
2195 r2 += gmx::square(dx[i][line]);
2197 return std::sqrt(r2);
2200 //! Read pullx.xvg or pullf.xvg
2201 void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2202 t_UmbrellaWindow * window,
2203 t_UmbrellaOptions *opt,
2204 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2205 t_coordselection *coordsel)
2207 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2208 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2209 int nColExpect, ntot, column;
2210 real min, max, minfound = 1e20, maxfound = -1e20;
2211 gmx_bool dt_ok, timeok;
2212 const char *quantity;
2213 const int blocklen = 4096;
2215 static gmx_bool bFirst = TRUE;
2218 * Data columns in pull output:
2219 * - in force output pullf.xvg:
2220 * No reference columns, one column per pull coordinate
2222 * - in position output pullx.xvg:
2223 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2224 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2225 * be written separately into pullx file, but this is not supported and throws an error below;
2226 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2229 if (header->bPrintComp && opt->bPullx)
2231 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2232 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2235 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2236 snew(nColThisCrd, header->npullcrds);
2237 snew(nColCOMCrd, header->npullcrds);
2238 snew(nColRefCrd, header->npullcrds);
2240 if (opt->bPullx == FALSE)
2242 /* pullf reading: simply one column per coordinate */
2243 for (g = 0; g < header->npullcrds; g++)
2252 /* pullx reading. Note explanation above. */
2253 for (g = 0; g < header->npullcrds; g++)
2255 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2256 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim*header->pcrd[g].ngroup : 0);
2257 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2261 nColExpect = 1; /* time column */
2262 for (g = 0; g < header->npullcrds; g++)
2264 nColExpect += nColThisCrd[g];
2267 /* read pullf or pullx. Could be optimized if min and max are given. */
2268 nt = read_xvg(fn, &y, &ny);
2270 /* Check consistency */
2271 quantity = opt->bPullx ? "position" : "force";
2274 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2276 if (bFirst || opt->verbose)
2278 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2279 for (i = 0; i < header->npullcrds; i++)
2281 printf("\tColumns for pull coordinate %d\n", i+1);
2282 printf("\t\treaction coordinate: %d\n"
2283 "\t\tcenter-of-mass of groups: %d\n"
2284 "\t\treference position column: %s\n",
2285 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2287 printf("\tFound %d times in %s\n", nt, fn);
2290 if (nColExpect != ny)
2292 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2293 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2303 window->dt = y[0][1]-y[0][0];
2305 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2307 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2310 /* Need to alocate memory and set up structure for windows */
2313 /* Use only groups selected with option -is file */
2314 if (header->npullcrds != coordsel->n)
2316 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2317 header->npullcrds, coordsel->n);
2319 window->nPull = coordsel->nUse;
2323 window->nPull = header->npullcrds;
2326 window->nBin = bins;
2327 snew(window->Histo, window->nPull);
2328 snew(window->z, window->nPull);
2329 snew(window->k, window->nPull);
2330 snew(window->pos, window->nPull);
2331 snew(window->N, window->nPull);
2332 snew(window->Ntot, window->nPull);
2333 snew(window->g, window->nPull);
2334 snew(window->bsWeight, window->nPull);
2335 window->bContrib = 0;
2337 if (opt->bCalcTauInt)
2339 snew(window->ztime, window->nPull);
2343 window->ztime = NULL;
2345 snew(lennow, window->nPull);
2347 for (g = 0; g < window->nPull; ++g)
2350 window->bsWeight [g] = 1.;
2352 window->Ntot [g] = 0;
2354 snew(window->Histo[g], bins);
2356 if (opt->bCalcTauInt)
2358 window->ztime[g] = NULL;
2362 /* Copying umbrella center and force const is more involved since not
2363 all pull groups from header (tpr file) may be used in window variable */
2364 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2366 if (coordsel && (coordsel->bUse[g] == FALSE))
2370 window->k [gUsed] = header->pcrd[g].k;
2371 window->pos[gUsed] = header->pcrd[g].init_dist;
2376 { /* only determine min and max */
2379 min = max = bins = 0; /* Get rid of warnings */
2383 for (i = 0; i < nt; i++)
2385 /* Do you want that time frame? */
2386 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2388 /* get time step of pdo file and get dstep from opt->dt */
2398 dstep = static_cast<int>(opt->dt/dt+0.5);
2406 window->dt = dt*dstep;
2410 dt_ok = (i%dstep == 0);
2411 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2413 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2414 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2417 /* Note: if coordsel == NULL:
2418 * all groups in pullf/x file are stored in this window, and gUsed == g
2419 * if coordsel != NULL:
2420 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2423 for (g = 0; g < header->npullcrds; ++g)
2425 /* was this group selected for application in WHAM? */
2426 if (coordsel && (coordsel->bUse[g] == FALSE))
2434 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2436 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2440 /* Pick the correct column index.
2441 Note that there is always exactly one displacement column.
2444 for (int j = 0; j < g; j++)
2446 column += nColThisCrd[j];
2451 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2465 if (gUsed >= window->nPull)
2467 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2468 gUsed, window->nPull);
2471 if (opt->bCalcTauInt && !bGetMinMax)
2473 /* save time series for autocorrelation analysis */
2474 ntot = window->Ntot[gUsed];
2475 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2476 if (ntot >= lennow[gUsed])
2478 lennow[gUsed] += blocklen;
2479 srenew(window->ztime[gUsed], lennow[gUsed]);
2481 window->ztime[gUsed][ntot] = pos;
2484 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2489 while ( (ibin += bins) < 0)
2494 else if (ibin >= bins)
2496 while ( (ibin -= bins) >= bins)
2502 if (ibin >= 0 && ibin < bins)
2504 window->Histo[gUsed][ibin] += 1.;
2507 window->Ntot[gUsed]++;
2511 else if (t > opt->tmax)
2515 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2527 for (i = 0; i < ny; i++)
2533 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2534 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2535 t_UmbrellaHeader* header,
2536 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2539 real mintmp, maxtmp;
2541 printf("Reading %d tpr and pullf files\n", nfiles);
2543 /* min and max not given? */
2546 printf("Automatic determination of boundaries...\n");
2549 for (i = 0; i < nfiles; i++)
2551 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2553 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2555 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2556 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2558 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2560 read_pull_xf(fnPull[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2561 (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2562 if (maxtmp > opt->max)
2566 if (mintmp < opt->min)
2571 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2572 if (opt->bBoundsOnly)
2574 printf("Found option -boundsonly, now exiting.\n");
2578 /* store stepsize in profile */
2579 opt->dz = (opt->max-opt->min)/opt->bins;
2581 bool foundData = false;
2582 for (i = 0; i < nfiles; i++)
2584 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2586 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2588 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2589 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2591 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2593 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, NULL, NULL,
2594 (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2595 if (window[i].Ntot[0] == 0)
2597 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2606 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2609 for (i = 0; i < nfiles; i++)
2618 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2620 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2621 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2623 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2625 int nlines, ny, i, ig;
2628 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2629 nlines = read_xvg(fn, &iact, &ny);
2630 if (nlines != nwins)
2632 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2635 for (i = 0; i < nlines; i++)
2637 if (window[i].nPull != ny)
2639 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2640 "number of pull groups is different in different simulations. That is not\n"
2641 "supported yet. Sorry.\n");
2643 for (ig = 0; ig < window[i].nPull; ig++)
2645 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2646 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2648 if (iact[ig][i] <= 0.0)
2650 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2657 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2660 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2661 * that -in case of limited sampling- the ACT may be severely underestimated.
2662 * Note: the g=1+2tau are overwritten.
2663 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2666 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2669 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2671 /* only evaluate within +- 3sigma of the Gausian */
2672 siglim = 3.0*opt->sigSmoothIact;
2673 siglim2 = gmx::square(siglim);
2674 /* pre-factor of Gaussian */
2675 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2676 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2678 for (i = 0; i < nwins; i++)
2680 snew(window[i].tausmooth, window[i].nPull);
2681 for (ig = 0; ig < window[i].nPull; ig++)
2685 pos = window[i].pos[ig];
2686 for (j = 0; j < nwins; j++)
2688 for (jg = 0; jg < window[j].nPull; jg++)
2690 dpos2 = gmx::square(window[j].pos[jg]-pos);
2691 if (dpos2 < siglim2)
2693 w = gaufact*std::exp(-dpos2*invtwosig2);
2695 tausm += w*window[j].tau[jg];
2696 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2697 w,dpos2,pos,gaufact,invtwosig2); */
2702 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2704 window[i].tausmooth[ig] = tausm;
2708 window[i].tausmooth[ig] = window[i].tau[ig];
2710 window[i].g[ig] = 1+2*tausm/window[i].dt;
2715 //! Stop integrating autoccorelation function when ACF drops under this value
2716 #define WHAM_AC_ZERO_LIMIT 0.05
2718 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2720 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2721 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2723 int i, ig, ncorr, ntot, j, k, *count, restart;
2724 real *corr, c0, dt, tmp;
2725 real *ztime, av, tausteps;
2726 FILE *fp, *fpcorr = 0;
2730 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2731 "time [ps]", "autocorrelation function", opt->oenv);
2735 for (i = 0; i < nwins; i++)
2737 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2739 ntot = window[i].Ntot[0];
2741 /* using half the maximum time as length of autocorrelation function */
2745 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2746 " points. Provide more pull data!", ntot);
2749 /* snew(corrSq,ncorr); */
2752 snew(window[i].tau, window[i].nPull);
2753 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2759 for (ig = 0; ig < window[i].nPull; ig++)
2761 if (ntot != window[i].Ntot[ig])
2763 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2764 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2766 ztime = window[i].ztime[ig];
2768 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2769 for (j = 0, av = 0; (j < ntot); j++)
2774 for (k = 0; (k < ncorr); k++)
2779 for (j = 0; (j < ntot); j += restart)
2781 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2783 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2785 /* corrSq[k] += tmp*tmp; */
2789 /* divide by nr of frames for each time displacement */
2790 for (k = 0; (k < ncorr); k++)
2792 /* count probably = (ncorr-k+(restart-1))/restart; */
2793 corr[k] = corr[k]/count[k];
2794 /* variance of autocorrelation function */
2795 /* corrSq[k]=corrSq[k]/count[k]; */
2797 /* normalize such that corr[0] == 0 */
2799 for (k = 0; (k < ncorr); k++)
2802 /* corrSq[k]*=c0*c0; */
2805 /* write ACFs in verbose mode */
2808 for (k = 0; (k < ncorr); k++)
2810 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2812 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2815 /* esimate integrated correlation time, fitting is too unstable */
2816 tausteps = 0.5*corr[0];
2817 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2818 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2820 tausteps += corr[j];
2823 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2824 Kumar et al, eq. 28 ff. */
2825 window[i].tau[ig] = tausteps*dt;
2826 window[i].g[ig] = 1+2*tausteps;
2827 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2838 /* plot IACT along reaction coordinate */
2839 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2840 if (output_env_get_print_xvgr_codes(opt->oenv))
2842 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2843 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2844 for (i = 0; i < nwins; i++)
2846 fprintf(fp, "# %3d ", i);
2847 for (ig = 0; ig < window[i].nPull; ig++)
2849 fprintf(fp, " %11g", window[i].tau[ig]);
2854 for (i = 0; i < nwins; i++)
2856 for (ig = 0; ig < window[i].nPull; ig++)
2858 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2861 if (opt->sigSmoothIact > 0.0)
2863 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2864 opt->sigSmoothIact);
2865 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2866 smoothIact(window, nwins, opt);
2867 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2868 if (output_env_get_print_xvgr_codes(opt->oenv))
2870 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2871 fprintf(fp, "@ s1 symbol color 2\n");
2873 for (i = 0; i < nwins; i++)
2875 for (ig = 0; ig < window[i].nPull; ig++)
2877 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2882 printf("Wrote %s\n", fn);
2886 * compute average and sigma of each umbrella histogram
2888 void averageSigma(t_UmbrellaWindow *window, int nwins)
2891 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2893 for (i = 0; i < nwins; i++)
2895 snew(window[i].aver, window[i].nPull);
2896 snew(window[i].sigma, window[i].nPull);
2898 ntot = window[i].Ntot[0];
2899 for (ig = 0; ig < window[i].nPull; ig++)
2901 ztime = window[i].ztime[ig];
2902 for (k = 0, av = 0.; k < ntot; k++)
2907 for (k = 0, sum2 = 0.; k < ntot; k++)
2912 sig = std::sqrt(sum2/ntot);
2913 window[i].aver[ig] = av;
2915 /* Note: This estimate for sigma is biased from the limited sampling.
2916 Correct sigma by n/(n-1) where n = number of independent
2917 samples. Only possible if IACT is known.
2921 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2922 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2926 window[i].sigma[ig] = sig;
2928 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2935 * Use histograms to compute average force on pull group.
2937 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2939 int i, j, bins = opt->bins, k;
2940 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2943 dz = (max-min)/bins;
2944 ztot = opt->max-min;
2947 /* Compute average displacement from histograms */
2948 for (j = 0; j < nWindows; ++j)
2950 snew(window[j].forceAv, window[j].nPull);
2951 for (k = 0; k < window[j].nPull; ++k)
2955 for (i = 0; i < opt->bins; ++i)
2957 temp = (1.0*i+0.5)*dz+min;
2958 distance = temp - window[j].pos[k];
2960 { /* in cyclic wham: */
2961 if (distance > ztot_half) /* |distance| < ztot_half */
2965 else if (distance < -ztot_half)
2970 w = window[j].Histo[k][i]/window[j].g[k];
2971 displAv += w*distance;
2973 /* Are we near min or max? We are getting wrong forces from the histgrams since
2974 the histograms are zero outside [min,max). Therefore, assume that the position
2975 on the other side of the histomgram center is equally likely. */
2978 posmirrored = window[j].pos[k]-distance;
2979 if (posmirrored >= max || posmirrored < min)
2981 displAv += -w*distance;
2988 /* average force from average displacement */
2989 window[j].forceAv[k] = displAv*window[j].k[k];
2990 /* sigma from average square displacement */
2991 /* window[j].sigma [k] = sqrt(displAv2); */
2992 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2998 * Check if the complete reaction coordinate is covered by the histograms
3000 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
3001 t_UmbrellaOptions *opt)
3003 int i, ig, j, bins = opt->bins, bBoundary;
3004 real avcount = 0, z, relcount, *count;
3005 snew(count, opt->bins);
3007 for (j = 0; j < opt->bins; ++j)
3009 for (i = 0; i < nwins; i++)
3011 for (ig = 0; ig < window[i].nPull; ig++)
3013 count[j] += window[i].Histo[ig][j];
3016 avcount += 1.0*count[j];
3019 for (j = 0; j < bins; ++j)
3021 relcount = count[j]/avcount;
3022 z = (j+0.5)*opt->dz+opt->min;
3023 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3024 /* check for bins with no data */
3027 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3028 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3030 /* and check for poor sampling */
3031 else if (relcount < 0.005 && !bBoundary)
3033 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3039 /*! \brief Compute initial potential by integrating the average force
3041 * This speeds up the convergence by roughly a factor of 2
3043 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3045 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3046 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3049 dz = (opt->max-min)/bins;
3051 printf("Getting initial potential by integration.\n");
3053 /* Compute average displacement from histograms */
3054 computeAverageForce(window, nWindows, opt);
3056 /* Get force for each bin from all histograms in this bin, or, alternatively,
3057 if no histograms are inside this bin, from the closest histogram */
3060 for (j = 0; j < opt->bins; ++j)
3062 pos = (1.0*j+0.5)*dz+min;
3066 groupmin = winmin = 0;
3067 for (i = 0; i < nWindows; i++)
3069 for (ig = 0; ig < window[i].nPull; ig++)
3071 hispos = window[i].pos[ig];
3072 dist = std::abs(hispos-pos);
3073 /* average force within bin */
3077 fAv += window[i].forceAv[ig];
3079 /* at the same time, remember closest histogram */
3088 /* if no histogram found in this bin, use closest histogram */
3095 fAv = window[winmin].forceAv[groupmin];
3099 for (j = 1; j < opt->bins; ++j)
3101 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3104 /* cyclic wham: linearly correct possible offset */
3107 diff = (pot[bins-1]-pot[0])/(bins-1);
3108 for (j = 1; j < opt->bins; ++j)
3115 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3116 for (j = 0; j < opt->bins; ++j)
3118 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3121 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3124 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3125 energy offsets which are usually determined by wham
3126 First: turn pot into probabilities:
3128 for (j = 0; j < opt->bins; ++j)
3130 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3132 calc_z(pot, window, nWindows, opt, TRUE);
3138 //! Count number of words in an line
3139 static int wordcount(char *ptr)
3144 if (std::strlen(ptr) == 0)
3148 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3150 for (i = 0; (ptr[i] != '\0'); i++)
3152 is[cur] = isspace(ptr[i]);
3153 if ((i > 0) && (is[cur] && !is[1-cur]))
3162 /*! \brief Read input file for pull group selection (option -is)
3164 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3166 void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3169 int i, iline, n, len = STRLEN, temp;
3170 char *ptr = 0, *tmpbuf = 0;
3171 char fmt[1024], fmtign[1024];
3172 int block = 1, sizenow;
3174 fp = gmx_ffopen(opt->fnCoordSel, "r");
3175 opt->coordsel = NULL;
3180 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3185 if (iline >= sizenow)
3188 srenew(opt->coordsel, sizenow);
3190 opt->coordsel[iline].n = n;
3191 opt->coordsel[iline].nUse = 0;
3192 snew(opt->coordsel[iline].bUse, n);
3195 for (i = 0; i < n; i++)
3197 std::strcpy(fmt, fmtign);
3198 std::strcat(fmt, "%d");
3199 if (sscanf(ptr, fmt, &temp))
3201 opt->coordsel[iline].bUse[i] = (temp > 0);
3202 if (opt->coordsel[iline].bUse[i])
3204 opt->coordsel[iline].nUse++;
3207 std::strcat(fmtign, "%*s");
3211 opt->nCoordsel = iline;
3212 if (nTpr != opt->nCoordsel)
3214 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3218 printf("\nUse only these pull coordinates:\n");
3219 for (iline = 0; iline < nTpr; iline++)
3221 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3222 for (i = 0; i < opt->coordsel[iline].n; i++)
3224 if (opt->coordsel[iline].bUse[i])
3237 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3239 //! Number of elements in fnm (used for command line parsing)
3240 #define NFILE asize(fnm)
3242 //! The main gmx wham routine
3243 int gmx_wham(int argc, char *argv[])
3245 const char *desc[] = {
3246 "[THISMODULE] is an analysis program that implements the Weighted",
3247 "Histogram Analysis Method (WHAM). It is intended to analyze",
3248 "output files generated by umbrella sampling simulations to ",
3249 "compute a potential of mean force (PMF).[PAR]",
3251 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3252 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3253 "and, if multiple coordinates need to be analyzed, all used the same",
3254 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3255 "At present, three input modes are supported.",
3257 "* With option [TT]-it[tt], the user provides a file which contains the",
3258 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3259 " AND, with option [TT]-ix[tt], a file which contains file names of",
3260 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3261 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3262 " first pullx, etc.",
3263 "* Same as the previous input mode, except that the the user",
3264 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3265 " From the pull force the position in the umbrella potential is",
3266 " computed. This does not work with tabulated umbrella potentials.",
3267 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3268 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3269 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3270 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3271 " must be similar to the following::",
3274 " # Component selection: 0 0 1",
3276 " # Ref. Group 'TestAtom'",
3277 " # Nr. of pull groups 2",
3278 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3279 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3282 " The number of pull groups, umbrella positions, force constants, and names ",
3283 " may (of course) differ. Following the header, a time column and ",
3284 " a data column for each pull group follows (i.e. the displacement",
3285 " with respect to the umbrella center). Up to four pull groups are possible ",
3286 " per [REF].pdo[ref] file at present.[PAR]",
3287 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3288 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3289 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3290 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3291 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3292 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3293 "used, coordsel.dat looks like this::",
3299 "By default, the output files are::",
3301 " [TT]-o[tt] PMF output file",
3302 " [TT]-hist[tt] Histograms output file",
3304 "Always check whether the histograms sufficiently overlap.[PAR]",
3305 "The umbrella potential is assumed to be harmonic and the force constants are ",
3306 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3307 "a tabulated potential can be provided with [TT]-tab[tt].",
3312 "* [TT]-bins[tt] Number of bins used in analysis",
3313 "* [TT]-temp[tt] Temperature in the simulations",
3314 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3315 "* [TT]-auto[tt] Automatic determination of boundaries",
3316 "* [TT]-min,-max[tt] Boundaries of the profile",
3318 "The data points that are used to compute the profile",
3319 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3320 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3321 "umbrella window.[PAR]",
3322 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3323 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3324 "With energy output, the energy in the first bin is defined to be zero. ",
3325 "If you want the free energy at a different ",
3326 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3327 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3328 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3329 "[THISMODULE] will make use of the",
3330 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3331 "reaction coordinate will assumed be be neighbors.[PAR]",
3332 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3333 "which may be useful for, e.g. membranes.",
3338 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3343 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3344 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3345 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3346 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3347 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3348 "Because the IACTs can be severely underestimated in case of limited ",
3349 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3350 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3351 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3352 "integration of the ACFs while the ACFs are larger 0.05.",
3353 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3354 "less robust) method such as fitting to a double exponential, you can ",
3355 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3356 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3357 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3362 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3363 "otherwise the statistical error may be substantially underestimated. ",
3364 "More background and examples for the bootstrap technique can be found in ",
3365 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3366 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3367 "Four bootstrapping methods are supported and ",
3368 "selected with [TT]-bs-method[tt].",
3370 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3371 " data points, and the bootstrap is carried out by assigning random weights to the ",
3372 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3373 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3374 " statistical error is underestimated.",
3375 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3376 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3377 " (allowing duplication, i.e. sampling with replacement).",
3378 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3379 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3380 " divided into blocks and only histograms within each block are mixed. Note that ",
3381 " the histograms within each block must be representative for all possible histograms, ",
3382 " otherwise the statistical error is underestimated.",
3383 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3384 " such that the generated data points are distributed according the given histograms ",
3385 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3386 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3387 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3388 " Note that this method may severely underestimate the error in case of limited sampling, ",
3389 " that is if individual histograms do not represent the complete phase space at ",
3390 " the respective positions.",
3391 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3392 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3393 " and width of the umbrella histograms. That method yields similar error estimates ",
3394 " like method [TT]traj[tt].",
3396 "Bootstrapping output:",
3398 "* [TT]-bsres[tt] Average profile and standard deviations",
3399 "* [TT]-bsprof[tt] All bootstrapping profiles",
3401 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3402 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3406 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3407 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3408 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3409 static t_UmbrellaOptions opt;
3412 { "-min", FALSE, etREAL, {&opt.min},
3413 "Minimum coordinate in profile"},
3414 { "-max", FALSE, etREAL, {&opt.max},
3415 "Maximum coordinate in profile"},
3416 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3417 "Determine min and max automatically"},
3418 { "-bins", FALSE, etINT, {&opt.bins},
3419 "Number of bins in profile"},
3420 { "-temp", FALSE, etREAL, {&opt.Temperature},
3422 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3424 { "-v", FALSE, etBOOL, {&opt.verbose},
3426 { "-b", FALSE, etREAL, {&opt.tmin},
3427 "First time to analyse (ps)"},
3428 { "-e", FALSE, etREAL, {&opt.tmax},
3429 "Last time to analyse (ps)"},
3430 { "-dt", FALSE, etREAL, {&opt.dt},
3431 "Analyse only every dt ps"},
3432 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3433 "Write histograms and exit"},
3434 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3435 "Determine min and max and exit (with [TT]-auto[tt])"},
3436 { "-log", FALSE, etBOOL, {&opt.bLog},
3437 "Calculate the log of the profile before printing"},
3438 { "-unit", FALSE, etENUM, {en_unit},
3439 "Energy unit in case of log output" },
3440 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3441 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3442 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3443 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3444 { "-sym", FALSE, etBOOL, {&opt.bSym},
3445 "Symmetrize profile around z=0"},
3446 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3447 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3448 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3449 "Calculate integrated autocorrelation times and use in wham"},
3450 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3451 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3452 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3453 "When computing autocorrelation functions, restart computing every .. (ps)"},
3454 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3455 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3456 "during smoothing"},
3457 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3458 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3459 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3460 "Bootstrap method" },
3461 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3462 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3463 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3464 "Seed for bootstrapping. (-1 = use time)"},
3465 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3466 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3467 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3468 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3469 { "-stepout", FALSE, etINT, {&opt.stepchange},
3470 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3471 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3472 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3476 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3477 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3478 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3479 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3480 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3481 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3482 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3483 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3484 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3485 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3486 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3487 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3490 int i, j, l, nfiles, nwins, nfiles2;
3491 t_UmbrellaHeader header;
3492 t_UmbrellaWindow * window = NULL;
3493 double *profile, maxchange = 1e20;
3494 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3495 char **fninTpr, **fninPull, **fninPdo;
3497 FILE *histout, *profout;
3498 char xlabel[STRLEN], ylabel[256], title[256];
3501 opt.verbose = FALSE;
3502 opt.bHistOnly = FALSE;
3511 opt.coordsel = NULL;
3513 /* bootstrapping stuff */
3515 opt.bsMethod = bsMethod_hist;
3516 opt.tauBootStrap = 0.0;
3518 opt.histBootStrapBlockLength = 8;
3519 opt.bs_verbose = FALSE;
3524 opt.Temperature = 298;
3525 opt.Tolerance = 1e-6;
3526 opt.bBoundsOnly = FALSE;
3528 opt.bCalcTauInt = FALSE;
3529 opt.sigSmoothIact = 0.0;
3530 opt.bAllowReduceIact = TRUE;
3531 opt.bInitPotByIntegration = TRUE;
3532 opt.acTrestart = 1.0;
3533 opt.stepchange = 100;
3534 opt.stepUpdateContrib = 100;
3536 if (!parse_common_args(&argc, argv, 0,
3537 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3542 opt.unit = nenum(en_unit);
3543 opt.bsMethod = nenum(en_bsMethod);
3545 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3547 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3548 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3549 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3550 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3551 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3552 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3553 if (opt.bTab && opt.bPullf)
3555 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3556 "Provide pullx.xvg or pdo files!");
3559 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3561 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3563 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3565 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3566 "\n\n Check gmx wham -h !");
3569 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3570 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3571 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3572 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3573 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3575 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3576 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3577 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3578 if ( (bMinSet || bMaxSet) && bAutoSet)
3580 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3583 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3585 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3588 if (bMinSet && opt.bAuto)
3590 printf("Note: min and max given, switching off -auto.\n");
3594 if (opt.bTauIntGiven && opt.bCalcTauInt)
3596 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3597 "the autocorrelation times. Not both.");
3600 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3602 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3603 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3605 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3607 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3608 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3611 /* Reading gmx4/gmx5 pull output and tpr files */
3612 if (opt.bTpr || opt.bPullf || opt.bPullx)
3614 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3616 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3617 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3618 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3619 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3620 if (nfiles != nfiles2)
3622 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3623 opt.fnTpr, nfiles2, fnPull);
3626 /* Read file that selects the pull group to be used */
3627 if (opt.fnCoordSel != NULL)
3629 readPullCoordSelection(&opt, fninTpr, nfiles);
3632 window = initUmbrellaWindows(nfiles);
3633 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3636 { /* reading pdo files */
3637 if (opt.fnCoordSel != NULL)
3639 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3640 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3642 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3643 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3644 window = initUmbrellaWindows(nfiles);
3645 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3648 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3649 We can therefore get the units for the xlabel from the first coordinate. */
3650 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3654 /* enforce equal weight for all histograms? */
3657 enforceEqualWeights(window, nwins);
3660 /* write histograms */
3661 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3662 xlabel, "count", opt.oenv);
3663 for (l = 0; l < opt.bins; ++l)
3665 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3666 for (i = 0; i < nwins; ++i)
3668 for (j = 0; j < window[i].nPull; ++j)
3670 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3673 fprintf(histout, "\n");
3676 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3679 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3683 /* Using tabulated umbrella potential */
3686 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3689 /* Integrated autocorrelation times provided ? */
3690 if (opt.bTauIntGiven)
3692 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3695 /* Compute integrated autocorrelation times */
3696 if (opt.bCalcTauInt)
3698 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3701 /* calc average and sigma for each histogram
3702 (maybe required for bootstrapping. If not, this is fast anyhow) */
3703 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3705 averageSigma(window, nwins);
3708 /* Get initial potential by simple integration */
3709 if (opt.bInitPotByIntegration)
3711 guessPotByIntegration(window, nwins, &opt, xlabel);
3714 /* Check if complete reaction coordinate is covered */
3715 checkReactionCoordinateCovered(window, nwins, &opt);
3717 /* Calculate profile */
3718 snew(profile, opt.bins);
3726 if ( (i%opt.stepUpdateContrib) == 0)
3728 setup_acc_wham(profile, window, nwins, &opt);
3730 if (maxchange < opt.Tolerance)
3733 /* if (opt.verbose) */
3734 printf("Switched to exact iteration in iteration %d\n", i);
3736 calc_profile(profile, window, nwins, &opt, bExact);
3737 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3739 printf("\t%4d) Maximum change %e\n", i, maxchange);
3743 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3744 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3746 /* calc error from Kumar's formula */
3747 /* Unclear how the error propagates along reaction coordinate, therefore
3749 /* calc_error_kumar(profile,window, nwins,&opt); */
3751 /* Write profile in energy units? */
3754 prof_normalization_and_unit(profile, &opt);
3755 std::strcpy(ylabel, en_unit_label[opt.unit]);
3756 std::strcpy(title, "Umbrella potential");
3760 std::strcpy(ylabel, "Density of states");
3761 std::strcpy(title, "Density of states");
3764 /* symmetrize profile around z=0? */
3767 symmetrizeProfile(profile, &opt);
3770 /* write profile or density of states */
3771 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3772 for (i = 0; i < opt.bins; ++i)
3774 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3777 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3779 /* Bootstrap Method */
3782 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3783 opt2fn("-hist", NFILE, fnm),
3784 xlabel, ylabel, profile, window, nwins, &opt);
3788 freeUmbrellaWindows(window, nfiles);
3790 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3791 please_cite(stdout, "Hub2010");