2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/tpxio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxana/gmx_ana.h"
60 #include "gromacs/legacyheaders/copyrite.h"
61 #include "gromacs/legacyheaders/macros.h"
62 #include "gromacs/legacyheaders/names.h"
63 #include "gromacs/legacyheaders/typedefs.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/random/random.h"
66 #include "gromacs/utility/cstringutil.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/futil.h"
69 #include "gromacs/utility/gmxomp.h"
70 #include "gromacs/utility/smalloc.h"
72 //! longest file names allowed in input files
73 #define WHAM_MAXFILELEN 2048
76 * x-axis legend for output files
78 static const char *xlabel = "\\xx\\f{} (nm)";
81 * enum for energy units
84 enSel, en_kJ, en_kCal, en_kT, enNr
87 * enum for type of input files (pdos, tpr, or pullf)
90 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
93 /*! \brief enum for bootstrapping method
95 * These bootstrap methods are supported:
96 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
97 * (bsMethod_BayesianHist)
98 * - bootstrap complete histograms (bsMethod_hist)
99 * - bootstrap trajectories from given umbrella histograms. This generates new
100 * "synthetic" histograms (bsMethod_traj)
101 * - bootstrap trajectories from Gaussian with mu/sigma computed from
102 * the respective histogram (bsMethod_trajGauss). This gives very similar
103 * results compared to bsMethod_traj.
105 * ********************************************************************
106 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
107 * JS Hub, BL de Groot, D van der Spoel
108 * g_wham - A free weighted histogram analysis implementation including
109 * robust error and autocorrelation estimates,
110 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
111 * ********************************************************************
114 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
115 bsMethod_traj, bsMethod_trajGauss
119 //! Parameters of the umbrella potentials
123 * \name Using umbrella pull code since gromacs 4.x
126 int npullcrds_tot; //!< nr of pull coordinates in tpr file
127 int npullcrds; //!< nr of umbrella pull coordinates for reading
128 int pull_geometry; //!< such as distance, direction
129 ivec pull_dim; //!< pull dimension with geometry distance
130 int pull_ndim; //!< nr of pull_dim != 0
131 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
132 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
133 real *k; //!< force constants in tpr file
134 real *init_dist; //!< reference displacements
135 real *umbInitDist; //!< reference displacement in umbrella direction
138 * \name Using PDO files common until gromacs 3.x
146 char PullName[4][256];
148 double UmbCons[4][3];
152 //! Data in the umbrella histograms
155 int nPull; //!< nr of pull groups in this pdo or pullf/x file
156 double **Histo; //!< nPull histograms
157 double **cum; //!< nPull cumulative distribution functions
158 int nBin; //!< nr of bins. identical to opt->bins
159 double *k; //!< force constants for the nPull groups
160 double *pos; //!< umbrella positions for the nPull groups
161 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
162 int *N; //!< nr of data points in nPull histograms.
163 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
165 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
167 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
168 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
171 double *tau; //!< intetrated autocorrelation time (IACT)
172 double *tausmooth; //!< smoothed IACT
174 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
176 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
178 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
180 /*! \brief average force estimated from average displacement, fAv=dzAv*k
182 * Used for integration to guess the potential.
185 real *aver; //!< average of histograms
186 real *sigma; //!< stddev of histograms
187 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
190 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
193 int n; //!< total nr of pull groups in this tpr file
194 int nUse; //!< nr of pull groups used
195 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
198 //! Parameters of WHAM
205 const char *fnTpr, *fnPullf, *fnGroupsel;
206 const char *fnPdo, *fnPullx; //!< file names of input
207 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
208 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
210 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
211 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
212 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
213 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
216 * \name Basic WHAM options
219 int bins; //!< nr of bins, min, max, and dz of profile
221 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
222 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
225 * \name Output control
228 gmx_bool bLog; //!< energy output (instead of probability) for profile
229 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
230 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
231 /*! \brief after wham, set prof to zero at this z-position.
232 * When bootstrapping, set zProf0 to a "stable" reference position.
235 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
237 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
238 gmx_bool bAuto; //!< determine min and max automatically but do not exit
240 gmx_bool verbose; //!< more noisy wham mode
241 int stepchange; //!< print maximum change in prof after how many interations
242 output_env_t oenv; //!< xvgr options
245 * \name Autocorrelation stuff
248 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
249 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
250 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
251 real acTrestart; //!< when computing ACT, time between restarting points
253 /* \brief Enforce the same weight for each umbella window, that is
254 * calculate with the same number of data points for
255 * each window. That can be reasonable, if the histograms
256 * have different length, but due to autocorrelation,
257 * a longer simulation should not have larger weightin wham.
263 * \name Bootstrapping stuff
266 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
268 /* \brief bootstrap method
270 * if == bsMethod_hist, consider complete histograms as independent
271 * data points and, hence, only mix complete histograms.
272 * if == bsMethod_BayesianHist, consider complete histograms
273 * as independent data points, but assign random weights
274 * to the histograms during the bootstrapping ("Bayesian bootstrap")
275 * In case of long correlations (e.g., inside a channel), these
276 * will yield a more realistic error.
277 * if == bsMethod_traj(Gauss), generate synthetic histograms
279 * histogram by generating an autocorrelated random sequence
280 * that is distributed according to the respective given
281 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
282 * (instead of from the umbrella histogram) to generate a new
287 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
290 /* \brief when mixing histograms, mix only histograms withing blocks
291 long the reaction coordinate xi. Avoids gaps along xi. */
292 int histBootStrapBlockLength;
294 int bsSeed; //!< random seed for bootstrapping
296 /* \brief Write cumulative distribution functions (CDFs) of histograms
297 and write the generated histograms for each bootstrap */
301 * \name tabulated umbrella potential stuff
305 double *tabX, *tabY, tabMin, tabMax, tabDz;
308 gmx_rng_t rng; //!< gromacs random number generator
311 //! Make an umbrella window (may contain several histograms)
312 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
314 t_UmbrellaWindow *win;
317 for (i = 0; i < nwin; i++)
319 win[i].Histo = win[i].cum = 0;
320 win[i].k = win[i].pos = win[i].z = 0;
321 win[i].N = win[i].Ntot = 0;
322 win[i].g = win[i].tau = win[i].tausmooth = 0;
326 win[i].aver = win[i].sigma = 0;
332 //! Delete an umbrella window (may contain several histograms)
333 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
336 for (i = 0; i < nwin; i++)
340 for (j = 0; j < win[i].nPull; j++)
342 sfree(win[i].Histo[j]);
347 for (j = 0; j < win[i].nPull; j++)
349 sfree(win[i].cum[j]);
354 for (j = 0; j < win[i].nPull; j++)
356 sfree(win[i].bContrib[j]);
368 sfree(win[i].tausmooth);
369 sfree(win[i].bContrib);
371 sfree(win[i].forceAv);
374 sfree(win[i].bsWeight);
380 * Read and setup tabulated umbrella potential
382 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
387 printf("Setting up tabulated potential from file %s\n", fn);
388 nl = read_xvg(fn, &y, &ny);
392 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
394 opt->tabMin = y[0][0];
395 opt->tabMax = y[0][nl-1];
396 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
399 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
400 "ascending z-direction", fn);
402 for (i = 0; i < nl-1; i++)
404 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
406 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
411 for (i = 0; i < nl; i++)
413 opt->tabX[i] = y[0][i];
414 opt->tabY[i] = y[1][i];
416 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
417 opt->tabMin, opt->tabMax, opt->tabDz);
420 //! Read the header of an PDO file (position, force const, nr of groups)
421 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
424 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
426 std::istringstream ist;
429 if (fgets(line, 2048, file) == NULL)
431 gmx_fatal(FARGS, "Error reading header from pdo file\n");
434 ist >> Buffer0 >> Buffer1 >> Buffer2;
435 if (std::strcmp(Buffer1, "UMBRELLA"))
437 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
438 "(Found in first line: `%s')\n",
439 Buffer1, "UMBRELLA", line);
441 if (std::strcmp(Buffer2, "3.0"))
443 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
447 if (fgets(line, 2048, file) == NULL)
449 gmx_fatal(FARGS, "Error reading header from pdo file\n");
452 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
453 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
455 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
456 if (header->nDim != 1)
458 gmx_fatal(FARGS, "Currently only supports one dimension");
462 if (fgets(line, 2048, file) == NULL)
464 gmx_fatal(FARGS, "Error reading header from pdo file\n");
467 ist >> Buffer0 >> Buffer1 >> header->nSkip;
470 if (fgets(line, 2048, file) == NULL)
472 gmx_fatal(FARGS, "Error reading header from pdo file\n");
475 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
478 if (fgets(line, 2048, file) == NULL)
480 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
487 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
491 for (i = 0; i < header->nPull; ++i)
493 if (fgets(line, 2048, file) == NULL)
495 gmx_fatal(FARGS, "Error reading header from pdo file\n");
498 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
499 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
500 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
504 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
505 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
509 if (fgets(line, 2048, file) == NULL)
511 gmx_fatal(FARGS, "Cannot read from file\n");
515 if (std::strcmp(Buffer3, "#####") != 0)
517 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
522 static char *fgets3(FILE *fp, char ptr[], int *len)
527 if (fgets(ptr, *len-1, fp) == NULL)
532 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
534 /* This line is longer than len characters, let's increase len! */
538 if (fgets(p-1, STRLEN, fp) == NULL)
543 slen = std::strlen(ptr);
544 if (ptr[slen-1] == '\n')
552 /*! \brief Read the data columns of and PDO file.
554 * TO DO: Get rid of the scanf function to avoid the clang warning.
555 * At the moment, this warning is avoided by hiding the format string
556 * the variable fmtlf.
558 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
559 int fileno, t_UmbrellaWindow * win,
560 t_UmbrellaOptions *opt,
561 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
563 int i, inttemp, bins, count, ntot;
564 real minval, maxval, minfound = 1e20, maxfound = -1e20;
565 double temp, time, time0 = 0, dt;
567 t_UmbrellaWindow * window = 0;
568 gmx_bool timeok, dt_ok = 1;
569 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
570 int len = STRLEN, dstep = 1;
571 const int blocklen = 4096;
581 /* Need to alocate memory and set up structure */
582 window->nPull = header->nPull;
585 snew(window->Histo, window->nPull);
586 snew(window->z, window->nPull);
587 snew(window->k, window->nPull);
588 snew(window->pos, window->nPull);
589 snew(window->N, window->nPull);
590 snew(window->Ntot, window->nPull);
591 snew(window->g, window->nPull);
592 snew(window->bsWeight, window->nPull);
594 window->bContrib = 0;
596 if (opt->bCalcTauInt)
598 snew(window->ztime, window->nPull);
604 snew(lennow, window->nPull);
606 for (i = 0; i < window->nPull; ++i)
609 window->bsWeight[i] = 1.;
610 snew(window->Histo[i], bins);
611 window->k[i] = header->UmbCons[i][0];
612 window->pos[i] = header->UmbPos[i][0];
616 if (opt->bCalcTauInt)
618 window->ztime[i] = 0;
622 /* Done with setup */
628 minval = maxval = bins = 0; /* Get rid of warnings */
633 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
637 if (ptr[0] == '#' || std::strlen(ptr) < 2)
642 /* Initiate format string */
644 std::strcat(fmtign, "%*s");
646 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
647 /* Round time to fs */
648 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
650 /* get time step of pdo file */
660 dstep = static_cast<int>(opt->dt/dt+0.5);
668 window->dt = dt*dstep;
673 dt_ok = ((count-1)%dstep == 0);
674 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
676 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
677 time,opt->tmin, opt->tmax, dt_ok,timeok); */
681 for (i = 0; i < header->nPull; ++i)
683 std::strcpy(fmt, fmtign);
684 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
685 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
686 if (sscanf(ptr, fmt, &temp))
688 temp += header->UmbPos[i][0];
702 if (opt->bCalcTauInt)
704 /* save time series for autocorrelation analysis */
705 ntot = window->Ntot[i];
706 if (ntot >= lennow[i])
708 lennow[i] += blocklen;
709 srenew(window->ztime[i], lennow[i]);
711 window->ztime[i][ntot] = temp;
715 temp /= (maxval-minval);
717 temp = std::floor(temp);
719 inttemp = static_cast<int> (temp);
726 else if (inttemp >= bins)
732 if (inttemp >= 0 && inttemp < bins)
734 window->Histo[i][inttemp] += 1.;
742 if (time > opt->tmax)
746 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
762 /*! \brief Set identical weights for all histograms
764 * Normally, the weight is given by the number data points in each
765 * histogram, together with the autocorrelation time. This can be overwritten
766 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
767 * an appropriate autocorrelation times instead of using this function.
769 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
771 int i, k, j, NEnforced;
774 NEnforced = window[0].Ntot[0];
775 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
776 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
777 /* enforce all histograms to have the same weight as the very first histogram */
779 for (j = 0; j < nWindows; ++j)
781 for (k = 0; k < window[j].nPull; ++k)
783 ratio = 1.0*NEnforced/window[j].Ntot[k];
784 for (i = 0; i < window[0].nBin; ++i)
786 window[j].Histo[k][i] *= ratio;
788 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
793 /*! \brief Simple linear interpolation between two given tabulated points
795 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
798 double pl, pu, dz, dp;
800 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
802 if (jl < 0 || ju >= opt->tabNbins)
804 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
805 "Provide an extended table.", dist, jl, ju);
809 dz = dist-opt->tabX[jl];
810 dp = (pu-pl)*dz/opt->tabDz;
816 * Check which bins substiantially contribute (accelerates WHAM)
818 * Don't worry, that routine does not mean we compute the PMF in limited precision.
819 * After rapid convergence (using only substiantal contributions), we always switch to
822 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
823 t_UmbrellaOptions *opt)
825 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
826 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
827 gmx_bool bAnyContrib;
828 static int bFirst = 1;
829 static double wham_contrib_lim;
833 for (i = 0; i < nWindows; ++i)
835 nGrptot += window[i].nPull;
837 wham_contrib_lim = opt->Tolerance/nGrptot;
840 ztot = opt->max-opt->min;
843 for (i = 0; i < nWindows; ++i)
845 if (!window[i].bContrib)
847 snew(window[i].bContrib, window[i].nPull);
849 for (j = 0; j < window[i].nPull; ++j)
851 if (!window[i].bContrib[j])
853 snew(window[i].bContrib[j], opt->bins);
856 for (k = 0; k < opt->bins; ++k)
858 temp = (1.0*k+0.5)*dz+min;
859 distance = temp - window[i].pos[j]; /* distance to umbrella center */
861 { /* in cyclic wham: */
862 if (distance > ztot_half) /* |distance| < ztot_half */
866 else if (distance < -ztot_half)
871 /* Note: there are two contributions to bin k in the wham equations:
872 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
873 ii) exp(- U/(8.314e-3*opt->Temperature))
874 where U is the umbrella potential
875 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
880 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
884 U = tabulated_pot(distance, opt); /* Use tabulated potential */
886 contrib1 = profile[k]*std::exp(-U/(8.314e-3*opt->Temperature));
887 contrib2 = window[i].N[j]*std::exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
888 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
889 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
890 if (window[i].bContrib[j][k])
896 /* If this histo is far outside min and max all bContrib may be FALSE,
897 causing a floating point exception later on. To avoid that, switch
901 for (k = 0; k < opt->bins; ++k)
903 window[i].bContrib[j][k] = TRUE;
910 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
911 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
916 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
922 //! Compute the PMF (one of the two main WHAM routines)
923 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
924 t_UmbrellaOptions *opt, gmx_bool bExact)
926 double ztot_half, ztot, min = opt->min, dz = opt->dz;
928 ztot = opt->max-opt->min;
933 int nthreads = gmx_omp_get_max_threads();
934 int thread_id = gmx_omp_get_thread_num();
936 int i0 = thread_id*opt->bins/nthreads;
937 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
939 for (i = i0; i < i1; ++i)
942 double num, denom, invg, temp = 0, distance, U = 0;
944 for (j = 0; j < nWindows; ++j)
946 for (k = 0; k < window[j].nPull; ++k)
948 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
949 temp = (1.0*i+0.5)*dz+min;
950 num += invg*window[j].Histo[k][i];
952 if (!(bExact || window[j].bContrib[k][i]))
956 distance = temp - window[j].pos[k]; /* distance to umbrella center */
958 { /* in cyclic wham: */
959 if (distance > ztot_half) /* |distance| < ztot_half */
963 else if (distance < -ztot_half)
971 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
975 U = tabulated_pot(distance, opt); /* Use tabulated potential */
977 denom += invg*window[j].N[k]*std::exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
980 profile[i] = num/denom;
985 //! Compute the free energy offsets z (one of the two main WHAM routines)
986 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
987 t_UmbrellaOptions *opt, gmx_bool bExact)
989 double min = opt->min, dz = opt->dz, ztot_half, ztot;
990 double maxglob = -1e20;
992 ztot = opt->max-opt->min;
997 int nthreads = gmx_omp_get_max_threads();
998 int thread_id = gmx_omp_get_thread_num();
1000 int i0 = thread_id*nWindows/nthreads;
1001 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1002 double maxloc = -1e20;
1004 for (i = i0; i < i1; ++i)
1006 double total = 0, temp, distance, U = 0;
1009 for (j = 0; j < window[i].nPull; ++j)
1012 for (k = 0; k < window[i].nBin; ++k)
1014 if (!(bExact || window[i].bContrib[j][k]))
1018 temp = (1.0*k+0.5)*dz+min;
1019 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1021 { /* in cyclic wham: */
1022 if (distance > ztot_half) /* |distance| < ztot_half */
1026 else if (distance < -ztot_half)
1034 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1038 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1040 total += profile[k]*std::exp(-U/(8.314e-3*opt->Temperature));
1042 /* Avoid floating point exception if window is far outside min and max */
1045 total = -std::log(total);
1051 temp = std::abs(total - window[i].z[j]);
1056 window[i].z[j] = total;
1059 /* Now get maximum maxloc from the threads and put in maxglob */
1060 if (maxloc > maxglob)
1062 #pragma omp critical
1064 if (maxloc > maxglob)
1075 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1076 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1078 int i, j, bins = opt->bins;
1079 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1082 if (min > 0. || max < 0.)
1084 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1085 opt->min, opt->max);
1090 for (i = 0; i < bins; i++)
1094 /* bin left of zsym */
1095 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1096 if (j >= 0 && (j+1) < bins)
1098 /* interpolate profile linearly between bins j and j+1 */
1099 z1 = min+(j+0.5)*dz;
1101 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1102 /* average between left and right */
1103 prof2[i] = 0.5*(profsym+profile[i]);
1107 prof2[i] = profile[i];
1111 std::memcpy(profile, prof2, bins*sizeof(double));
1115 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1116 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1119 double unit_factor = 1., R_MolarGasConst, diff;
1121 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1124 /* Not log? Nothing to do! */
1130 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1131 if (opt->unit == en_kT)
1135 else if (opt->unit == en_kJ)
1137 unit_factor = R_MolarGasConst*opt->Temperature;
1139 else if (opt->unit == en_kCal)
1141 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1145 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1148 for (i = 0; i < bins; i++)
1150 if (profile[i] > 0.0)
1152 profile[i] = -std::log(profile[i])*unit_factor;
1156 /* shift to zero at z=opt->zProf0 */
1157 if (!opt->bProf0Set)
1163 /* Get bin with shortest distance to opt->zProf0
1164 (-0.5 from bin position and +0.5 from rounding cancel) */
1165 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1170 else if (imin >= bins)
1174 diff = profile[imin];
1178 for (i = 0; i < bins; i++)
1184 //! Make an array of random integers (used for bootstrapping)
1185 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1187 int ipull, blockBase, nr, ipullRandom;
1189 if (blockLength == 0)
1191 blockLength = nPull;
1194 for (ipull = 0; ipull < nPull; ipull++)
1196 blockBase = (ipull/blockLength)*blockLength;
1198 { /* make sure nothing bad happens in the last block */
1199 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1200 ipullRandom = blockBase + nr;
1202 while (ipullRandom >= nPull);
1203 if (ipullRandom < 0 || ipullRandom >= nPull)
1205 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1206 "blockLength = %d, blockBase = %d\n",
1207 ipullRandom, nPull, nr, blockLength, blockBase);
1209 randomArray[ipull] = ipullRandom;
1211 /*for (ipull=0; ipull<nPull; ipull++)
1212 printf("%d ",randomArray[ipull]); printf("\n"); */
1215 /*! \brief Set pull group information of a synthetic histogram
1217 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1218 * but it is not required if we bootstrap complete histograms.
1220 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1221 t_UmbrellaWindow *thisWindow, int pullid)
1223 synthWindow->N [0] = thisWindow->N [pullid];
1224 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1225 synthWindow->pos [0] = thisWindow->pos [pullid];
1226 synthWindow->z [0] = thisWindow->z [pullid];
1227 synthWindow->k [0] = thisWindow->k [pullid];
1228 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1229 synthWindow->g [0] = thisWindow->g [pullid];
1230 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1233 /*! \brief Calculate cumulative distribution function of of all histograms.
1235 * This allow to create random number sequences
1236 * which are distributed according to the histograms. Required to generate
1237 * the "synthetic" histograms for the Bootstrap method
1239 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1240 t_UmbrellaOptions *opt, const char *fnhist)
1244 char *fn = 0, *buf = 0;
1247 if (opt->bs_verbose)
1249 snew(fn, std::strlen(fnhist)+10);
1250 snew(buf, std::strlen(fnhist)+10);
1251 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1252 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1256 for (i = 0; i < nWindows; i++)
1258 snew(window[i].cum, window[i].nPull);
1259 for (j = 0; j < window[i].nPull; j++)
1261 snew(window[i].cum[j], nbin+1);
1262 window[i].cum[j][0] = 0.;
1263 for (k = 1; k <= nbin; k++)
1265 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1268 /* normalize CDFs. Ensure cum[nbin]==1 */
1269 last = window[i].cum[j][nbin];
1270 for (k = 0; k <= nbin; k++)
1272 window[i].cum[j][k] /= last;
1277 printf("Cumulative distriubtion functions of all histograms created.\n");
1278 if (opt->bs_verbose)
1280 for (k = 0; k <= nbin; k++)
1282 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1283 for (i = 0; i < nWindows; i++)
1285 for (j = 0; j < window[i].nPull; j++)
1287 fprintf(fp, "%g\t", window[i].cum[j][k]);
1292 printf("Wrote cumulative distribution functions to %s\n", fn);
1300 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1302 * This is used to generate a random sequence distributed according to a histogram
1304 void searchCumulative(double xx[], int n, double x, int *j)
1326 else if (x == xx[n-1])
1336 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1337 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1338 int pullid, t_UmbrellaOptions *opt)
1340 int N, i, nbins, r_index, ibin;
1341 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1344 N = thisWindow->N[pullid];
1345 dt = thisWindow->dt;
1346 nbins = thisWindow->nBin;
1348 /* tau = autocorrelation time */
1349 if (opt->tauBootStrap > 0.0)
1351 tausteps = opt->tauBootStrap/dt;
1353 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1355 /* calc tausteps from g=1+2tausteps */
1356 g = thisWindow->g[pullid];
1362 "When generating hypothetical trajctories from given umbrella histograms,\n"
1363 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1364 "cannot be predicted. You have 3 options:\n"
1365 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1366 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1368 " with option -iiact for all umbrella windows.\n"
1369 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1370 " Use option (3) only if you are sure what you're doing, you may severely\n"
1371 " underestimate the error if a too small ACT is given.\n");
1372 gmx_fatal(FARGS, errstr);
1375 synthWindow->N [0] = N;
1376 synthWindow->pos [0] = thisWindow->pos[pullid];
1377 synthWindow->z [0] = thisWindow->z[pullid];
1378 synthWindow->k [0] = thisWindow->k[pullid];
1379 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1380 synthWindow->g [0] = thisWindow->g [pullid];
1381 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1383 for (i = 0; i < nbins; i++)
1385 synthWindow->Histo[0][i] = 0.;
1388 if (opt->bsMethod == bsMethod_trajGauss)
1390 sig = thisWindow->sigma [pullid];
1391 mu = thisWindow->aver [pullid];
1394 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1396 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1398 z = a*x + sqrt(1-a^2)*y
1399 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1400 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1402 C(t) = exp(-t/tau) with tau=-1/ln(a)
1404 Then, use error function to turn the Gaussian random variable into a uniformly
1405 distributed one in [0,1]. Eventually, use cumulative distribution function of
1406 histogram to get random variables distributed according to histogram.
1407 Note: The ACT of the flat distribution and of the generated histogram is not
1408 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1410 a = std::exp(-1.0/tausteps);
1411 ap = std::sqrt(1.0-a*a);
1412 invsqrt2 = 1.0/std::sqrt(2.0);
1414 /* init random sequence */
1415 x = gmx_rng_gaussian_table(opt->rng);
1417 if (opt->bsMethod == bsMethod_traj)
1419 /* bootstrap points from the umbrella histograms */
1420 for (i = 0; i < N; i++)
1422 y = gmx_rng_gaussian_table(opt->rng);
1424 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1425 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1427 r = 0.5*(1+gmx_erf(x*invsqrt2));
1428 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1429 synthWindow->Histo[0][r_index] += 1.;
1432 else if (opt->bsMethod == bsMethod_trajGauss)
1434 /* bootstrap points from a Gaussian with the same average and sigma
1435 as the respective umbrella histogram. The idea was, that -given
1436 limited sampling- the bootstrapped histograms are otherwise biased
1437 from the limited sampling of the US histos. However, bootstrapping from
1438 the Gaussian seems to yield a similar estimate. */
1442 y = gmx_rng_gaussian_table(opt->rng);
1445 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1450 while ( (ibin += nbins) < 0)
1455 else if (ibin >= nbins)
1457 while ( (ibin -= nbins) >= nbins)
1464 if (ibin >= 0 && ibin < nbins)
1466 synthWindow->Histo[0][ibin] += 1.;
1473 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1477 /*! \brief Write all histograms to a file
1479 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1480 * sets of bootstrapped histograms.
1482 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1483 int bs_index, t_UmbrellaOptions *opt)
1485 char *fn = 0, *buf = 0, title[256];
1491 snew(fn, std::strlen(fnhist)+10);
1492 snew(buf, std::strlen(fnhist)+1);
1493 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1494 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1498 fn = gmx_strdup(fnhist);
1499 std::strcpy(title, "Umbrella histograms");
1502 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1505 /* Write histograms */
1506 for (l = 0; l < bins; ++l)
1508 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1509 for (i = 0; i < nWindows; ++i)
1511 for (j = 0; j < window[i].nPull; ++j)
1513 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1520 printf("Wrote %s\n", fn);
1528 //! Used for qsort to sort random numbers
1529 int func_wham_is_larger(const void *a, const void *b)
1548 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1549 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1556 /* generate ordered random numbers between 0 and nAllPull */
1557 for (i = 0; i < nAllPull-1; i++)
1559 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1561 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1562 r[nAllPull-1] = 1.0*nAllPull;
1564 synthwin[0].bsWeight[0] = r[0];
1565 for (i = 1; i < nAllPull; i++)
1567 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1570 /* avoid to have zero weight by adding a tiny value */
1571 for (i = 0; i < nAllPull; i++)
1573 if (synthwin[i].bsWeight[0] < 1e-5)
1575 synthwin[i].bsWeight[0] = 1e-5;
1582 //! The main bootstrapping routine
1583 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1584 char* ylabel, double *profile,
1585 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1587 t_UmbrellaWindow * synthWindow;
1588 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1589 int i, j, *randomArray = 0, winid, pullid, ib;
1590 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1592 gmx_bool bExact = FALSE;
1594 /* init random generator */
1595 if (opt->bsSeed == -1)
1597 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1601 opt->rng = gmx_rng_init(opt->bsSeed);
1604 snew(bsProfile, opt->bins);
1605 snew(bsProfiles_av, opt->bins);
1606 snew(bsProfiles_av2, opt->bins);
1608 /* Create array of all pull groups. Note that different windows
1609 may have different nr of pull groups
1610 First: Get total nr of pull groups */
1612 for (i = 0; i < nWindows; i++)
1614 nAllPull += window[i].nPull;
1616 snew(allPull_winId, nAllPull);
1617 snew(allPull_pullId, nAllPull);
1619 /* Setup one array of all pull groups */
1620 for (i = 0; i < nWindows; i++)
1622 for (j = 0; j < window[i].nPull; j++)
1624 allPull_winId[iAllPull] = i;
1625 allPull_pullId[iAllPull] = j;
1630 /* setup stuff for synthetic windows */
1631 snew(synthWindow, nAllPull);
1632 for (i = 0; i < nAllPull; i++)
1634 synthWindow[i].nPull = 1;
1635 synthWindow[i].nBin = opt->bins;
1636 snew(synthWindow[i].Histo, 1);
1637 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1639 snew(synthWindow[i].Histo[0], opt->bins);
1641 snew(synthWindow[i].N, 1);
1642 snew(synthWindow[i].pos, 1);
1643 snew(synthWindow[i].z, 1);
1644 snew(synthWindow[i].k, 1);
1645 snew(synthWindow[i].bContrib, 1);
1646 snew(synthWindow[i].g, 1);
1647 snew(synthWindow[i].bsWeight, 1);
1650 switch (opt->bsMethod)
1653 snew(randomArray, nAllPull);
1654 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1655 please_cite(stdout, "Hub2006");
1657 case bsMethod_BayesianHist:
1658 /* just copy all histogams into synthWindow array */
1659 for (i = 0; i < nAllPull; i++)
1661 winid = allPull_winId [i];
1662 pullid = allPull_pullId[i];
1663 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1667 case bsMethod_trajGauss:
1668 calc_cumulatives(window, nWindows, opt, fnhist);
1671 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1674 /* do bootstrapping */
1675 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1676 for (ib = 0; ib < opt->nBootStrap; ib++)
1678 printf(" *******************************************\n"
1679 " ******** Start bootstrap nr %d ************\n"
1680 " *******************************************\n", ib+1);
1682 switch (opt->bsMethod)
1685 /* bootstrap complete histograms from given histograms */
1686 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1687 for (i = 0; i < nAllPull; i++)
1689 winid = allPull_winId [randomArray[i]];
1690 pullid = allPull_pullId[randomArray[i]];
1691 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1694 case bsMethod_BayesianHist:
1695 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1696 setRandomBsWeights(synthWindow, nAllPull, opt);
1699 case bsMethod_trajGauss:
1700 /* create new histos from given histos, that is generate new hypothetical
1702 for (i = 0; i < nAllPull; i++)
1704 winid = allPull_winId[i];
1705 pullid = allPull_pullId[i];
1706 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1711 /* write histos in case of verbose output */
1712 if (opt->bs_verbose)
1714 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1721 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1724 if ( (i%opt->stepUpdateContrib) == 0)
1726 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1728 if (maxchange < opt->Tolerance)
1732 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1734 printf("\t%4d) Maximum change %e\n", i, maxchange);
1736 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1739 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1740 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1744 prof_normalization_and_unit(bsProfile, opt);
1747 /* symmetrize profile around z=0 */
1750 symmetrizeProfile(bsProfile, opt);
1753 /* save stuff to get average and stddev */
1754 for (i = 0; i < opt->bins; i++)
1757 bsProfiles_av[i] += tmp;
1758 bsProfiles_av2[i] += tmp*tmp;
1759 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1761 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1765 /* write average and stddev */
1766 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1767 if (output_env_get_print_xvgr_codes(opt->oenv))
1769 fprintf(fp, "@TYPE xydy\n");
1771 for (i = 0; i < opt->bins; i++)
1773 bsProfiles_av [i] /= opt->nBootStrap;
1774 bsProfiles_av2[i] /= opt->nBootStrap;
1775 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1776 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1777 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1780 printf("Wrote boot strap result to %s\n", fnres);
1783 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1784 int whaminFileType(char *fn)
1787 len = std::strlen(fn);
1788 if (std::strcmp(fn+len-3, "tpr") == 0)
1792 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1794 return whamin_pullxf;
1796 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1802 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1804 return whamin_unknown;
1807 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1808 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1809 t_UmbrellaOptions *opt)
1811 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1812 int nread, sizenow, i, block = 1;
1815 fp = gmx_ffopen(fn, "r");
1818 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1820 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1822 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1824 if (nread >= sizenow)
1827 srenew(filename, sizenow);
1828 for (i = sizenow-block; i < sizenow; i++)
1830 snew(filename[i], WHAM_MAXFILELEN);
1833 /* remove newline if there is one */
1834 if (tmp[std::strlen(tmp)-1] == '\n')
1836 tmp[std::strlen(tmp)-1] = '\0';
1838 std::strcpy(filename[nread], tmp);
1841 printf("Found file %s in %s\n", filename[nread], fn);
1845 *filenamesRet = filename;
1849 //! Open a file or a pipe to a gzipped file
1850 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1852 char Buffer[1024], gunzip[1024], *Path = 0;
1854 static gmx_bool bFirst = 1;
1856 /* gzipped pdo file? */
1857 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1859 /* search gunzip executable */
1860 if (!(Path = getenv("GMX_PATH_GZIP")))
1862 if (gmx_fexist("/bin/gunzip"))
1864 sprintf(gunzip, "%s", "/bin/gunzip");
1866 else if (gmx_fexist("/usr/bin/gunzip"))
1868 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1872 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1873 "You may want to define the path to gunzip "
1874 "with the environment variable GMX_PATH_GZIP.", gunzip);
1879 sprintf(gunzip, "%s/gunzip", Path);
1880 if (!gmx_fexist(gunzip))
1882 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1883 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1888 printf("Using gunzig executable %s\n", gunzip);
1891 if (!gmx_fexist(fn))
1893 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1895 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1898 printf("Executing command '%s'\n", Buffer);
1901 if ((pipe = popen(Buffer, "r")) == NULL)
1903 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1906 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1912 pipe = gmx_ffopen(fn, "r");
1919 //! Close file or pipe
1920 void pdo_close_file(FILE *fp)
1929 //! Reading all pdo files
1930 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1931 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1934 real mintmp, maxtmp, done = 0.;
1937 /* char Buffer0[1000]; */
1941 gmx_fatal(FARGS, "No files found. Hick.");
1944 /* if min and max are not given, get min and max from the input files */
1947 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1950 for (i = 0; i < nfiles; ++i)
1952 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1953 /*fgets(Buffer0,999,file);
1954 fprintf(stderr,"First line '%s'\n",Buffer0); */
1955 done = 100.0*(i+1)/nfiles;
1956 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1961 read_pdo_header(file, header, opt);
1962 /* here only determine min and max of this window */
1963 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1964 if (maxtmp > opt->max)
1968 if (mintmp < opt->min)
1974 pdo_close_file(file);
1982 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1983 if (opt->bBoundsOnly)
1985 printf("Found option -boundsonly, now exiting.\n");
1989 /* store stepsize in profile */
1990 opt->dz = (opt->max-opt->min)/opt->bins;
1992 /* Having min and max, we read in all files */
1993 /* Loop over all files */
1994 for (i = 0; i < nfiles; ++i)
1996 done = 100.0*(i+1)/nfiles;
1997 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2002 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2003 read_pdo_header(file, header, opt);
2004 /* load data into window */
2005 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2006 if ((window+i)->Ntot[0] == 0)
2008 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2012 pdo_close_file(file);
2020 for (i = 0; i < nfiles; ++i)
2027 //! translate 0/1 to N/Y to write pull dimensions
2028 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2030 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2031 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2036 static int first = 1;
2038 /* printf("Reading %s \n",fn); */
2039 read_tpx_state(fn, &ir, &state, NULL, NULL);
2043 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2046 /* nr of pull groups */
2048 for (i = 0; i < ir.pull->ncoord; i++)
2050 if (ir.pull->coord[i].eType == epullUMBRELLA)
2056 header->pull_geometry = ir.pull->coord[i].eGeom;
2057 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2062 /* TODO: remove this restriction */
2063 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2066 for (m = 0; m < DIM; m++)
2068 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2070 /* TODO: remove the restriction */
2071 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2074 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2076 /* TODO: remove the restriction */
2077 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2087 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2090 header->npullcrds_tot = ir.pull->ncoord;
2091 header->npullcrds = ncrd;
2092 header->bPrintRef = ir.pull->bPrintCOM1;
2093 if (ir.pull->bPrintCOM2)
2095 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2097 if (ir.pull->bPrintRefValue)
2099 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2101 header->bPrintComp = ir.pull->bPrintComp;
2102 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2103 /* We should add a struct for each pull coord with all pull coord data
2104 instead of allocating many arrays for each property */
2105 snew(header->k, ncrd);
2106 snew(header->init_dist, ncrd);
2107 snew(header->umbInitDist, ncrd);
2109 /* only z-direction with epullgCYL? */
2110 if (header->pull_geometry == epullgCYL)
2112 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2114 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2115 "However, found dimensions [%s %s %s]\n",
2116 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2117 int2YN(header->pull_dim[ZZ]));
2121 for (i = 0; i < ncrd; i++)
2123 header->k[i] = ir.pull->coord[i].k;
2124 if (header->k[i] == 0.0)
2126 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2127 "That doesn't seem to be an Umbrella tpr.\n",
2130 header->init_dist[i] = ir.pull->coord[i].init;
2132 /* initial distance to reference */
2133 switch (header->pull_geometry)
2136 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2140 header->umbInitDist[i] = header->init_dist[i];
2143 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2147 if (opt->verbose || first)
2149 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2150 "\tPull group coordinates%s expected in pullx files.\n",
2151 fn, header->npullcrds, epullg_names[header->pull_geometry],
2152 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2153 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2154 for (i = 0; i < ncrd; i++)
2156 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2159 if (!opt->verbose && first)
2161 printf("\tUse option -v to see this output for all input tpr files\n\n");
2167 //! 2-norm in a ndim-dimensional space
2168 double dist_ndim(double **dx, int ndim, int line)
2172 for (i = 0; i < ndim; i++)
2174 r2 += sqr(dx[i][line]);
2176 return std::sqrt(r2);
2179 //! Read pullx.xvg or pullf.xvg
2180 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2181 t_UmbrellaWindow * window,
2182 t_UmbrellaOptions *opt,
2183 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2184 t_groupselection *groupsel)
2186 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2187 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2188 real min, max, minfound = 1e20, maxfound = -1e20;
2189 gmx_bool dt_ok, timeok, bHaveForce;
2190 const char *quantity;
2191 const int blocklen = 4096;
2193 static gmx_bool bFirst = TRUE;
2196 * Data columns in pull output:
2197 * - in force output pullf.xvg:
2198 * No reference columns, one column per pull coordinate
2200 * - in position output pullx.xvg
2201 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2202 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2206 if (opt->bPullx && header->bPrintComp)
2208 nColPerCrd += header->pull_ndim;
2210 quantity = opt->bPullx ? "position" : "force";
2212 if (opt->bPullx && header->bPrintRef)
2214 nColRefEachCrd = header->pull_ndim;
2221 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2222 bHaveForce = opt->bPullf;
2224 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2225 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2226 Sorry for the laziness, this is a To-do. */
2227 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2230 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2231 "reading \n(option -if) is supported at present, "
2232 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2233 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2234 epullg_names[header->pull_geometry]);
2237 nt = read_xvg(fn, &y, &ny);
2239 /* Check consistency */
2242 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2246 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2247 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2249 /* Since this tool code has not updated yet to allow difference pull
2250 * dimensions per pull coordinate, we can't easily check the exact
2251 * number of expected columns, so we only check what we expect for
2252 * the pull coordinates selected for the WHAM analysis.
2254 printf("Expecting these columns in pull file:\n"
2255 "\t%d reference columns for each individual pull coordinate\n"
2256 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2257 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2259 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2263 if (ny < nColExpect ||
2264 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2266 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2267 "\nMaybe you confused options -ix and -if ?\n",
2268 header->npullcrds, fntpr, ny-1, fn,
2269 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2275 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2285 window->dt = y[0][1]-y[0][0];
2287 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2289 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2292 /* Need to alocate memory and set up structure */
2296 /* Use only groups selected with option -is file */
2297 if (header->npullcrds != groupsel->n)
2299 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2300 header->npullcrds, groupsel->n);
2302 window->nPull = groupsel->nUse;
2306 window->nPull = header->npullcrds;
2309 window->nBin = bins;
2310 snew(window->Histo, window->nPull);
2311 snew(window->z, window->nPull);
2312 snew(window->k, window->nPull);
2313 snew(window->pos, window->nPull);
2314 snew(window->N, window->nPull);
2315 snew(window->Ntot, window->nPull);
2316 snew(window->g, window->nPull);
2317 snew(window->bsWeight, window->nPull);
2318 window->bContrib = 0;
2320 if (opt->bCalcTauInt)
2322 snew(window->ztime, window->nPull);
2326 window->ztime = NULL;
2328 snew(lennow, window->nPull);
2330 for (g = 0; g < window->nPull; ++g)
2333 window->bsWeight[g] = 1.;
2334 snew(window->Histo[g], bins);
2336 window->Ntot[g] = 0;
2338 if (opt->bCalcTauInt)
2340 window->ztime[g] = NULL;
2344 /* Copying umbrella center and force const is more involved since not
2345 all pull groups from header (tpr file) may be used in window variable */
2346 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2348 if (groupsel && (groupsel->bUse[g] == FALSE))
2352 window->k[gUsed] = header->k[g];
2353 window->pos[gUsed] = header->umbInitDist[g];
2358 { /* only determine min and max */
2361 min = max = bins = 0; /* Get rid of warnings */
2365 for (i = 0; i < nt; i++)
2367 /* Do you want that time frame? */
2368 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2370 /* get time step of pdo file and get dstep from opt->dt */
2380 dstep = static_cast<int>(opt->dt/dt+0.5);
2388 window->dt = dt*dstep;
2392 dt_ok = (i%dstep == 0);
2393 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2395 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2396 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2399 /* Note: if groupsel == NULL:
2400 * all groups in pullf/x file are stored in this window, and gUsed == g
2401 * if groupsel != NULL:
2402 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2405 for (g = 0; g < header->npullcrds; ++g)
2407 /* was this group selected for application in WHAM? */
2408 if (groupsel && (groupsel->bUse[g] == FALSE))
2417 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2419 pos = -force/header->k[g] + header->umbInitDist[g];
2423 /* pick the right column from:
2424 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2426 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2430 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2444 if (gUsed >= window->nPull)
2446 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2447 gUsed, window->nPull);
2450 if (opt->bCalcTauInt && !bGetMinMax)
2452 /* save time series for autocorrelation analysis */
2453 ntot = window->Ntot[gUsed];
2454 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2455 if (ntot >= lennow[gUsed])
2457 lennow[gUsed] += blocklen;
2458 srenew(window->ztime[gUsed], lennow[gUsed]);
2460 window->ztime[gUsed][ntot] = pos;
2463 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2468 while ( (ibin += bins) < 0)
2473 else if (ibin >= bins)
2475 while ( (ibin -= bins) >= bins)
2481 if (ibin >= 0 && ibin < bins)
2483 window->Histo[gUsed][ibin] += 1.;
2486 window->Ntot[gUsed]++;
2490 else if (t > opt->tmax)
2494 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2506 for (i = 0; i < ny; i++)
2512 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2513 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2514 t_UmbrellaHeader* header,
2515 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2518 real mintmp, maxtmp;
2520 printf("Reading %d tpr and pullf files\n", nfiles/2);
2522 /* min and max not given? */
2525 printf("Automatic determination of boundaries...\n");
2528 for (i = 0; i < nfiles; i++)
2530 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2532 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2534 read_tpr_header(fnTprs[i], header, opt);
2535 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2537 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2539 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2540 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2541 if (maxtmp > opt->max)
2545 if (mintmp < opt->min)
2550 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2551 if (opt->bBoundsOnly)
2553 printf("Found option -boundsonly, now exiting.\n");
2557 /* store stepsize in profile */
2558 opt->dz = (opt->max-opt->min)/opt->bins;
2560 for (i = 0; i < nfiles; i++)
2562 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2564 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2566 read_tpr_header(fnTprs[i], header, opt);
2567 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2569 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2571 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2572 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2573 if (window[i].Ntot[0] == 0)
2575 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2579 for (i = 0; i < nfiles; i++)
2588 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2590 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2591 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2593 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2595 int nlines, ny, i, ig;
2598 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2599 nlines = read_xvg(fn, &iact, &ny);
2600 if (nlines != nwins)
2602 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2605 for (i = 0; i < nlines; i++)
2607 if (window[i].nPull != ny)
2609 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2610 "number of pull groups is different in different simulations. That is not\n"
2611 "supported yet. Sorry.\n");
2613 for (ig = 0; ig < window[i].nPull; ig++)
2615 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2616 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2618 if (iact[ig][i] <= 0.0)
2620 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2627 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2630 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2631 * that -in case of limited sampling- the ACT may be severely underestimated.
2632 * Note: the g=1+2tau are overwritten.
2633 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2636 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2639 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2641 /* only evaluate within +- 3sigma of the Gausian */
2642 siglim = 3.0*opt->sigSmoothIact;
2643 siglim2 = dsqr(siglim);
2644 /* pre-factor of Gaussian */
2645 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2646 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2648 for (i = 0; i < nwins; i++)
2650 snew(window[i].tausmooth, window[i].nPull);
2651 for (ig = 0; ig < window[i].nPull; ig++)
2655 pos = window[i].pos[ig];
2656 for (j = 0; j < nwins; j++)
2658 for (jg = 0; jg < window[j].nPull; jg++)
2660 dpos2 = dsqr(window[j].pos[jg]-pos);
2661 if (dpos2 < siglim2)
2663 w = gaufact*std::exp(-dpos2*invtwosig2);
2665 tausm += w*window[j].tau[jg];
2666 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2667 w,dpos2,pos,gaufact,invtwosig2); */
2672 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2674 window[i].tausmooth[ig] = tausm;
2678 window[i].tausmooth[ig] = window[i].tau[ig];
2680 window[i].g[ig] = 1+2*tausm/window[i].dt;
2685 //! Stop integrating autoccorelation function when ACF drops under this value
2686 #define WHAM_AC_ZERO_LIMIT 0.05
2688 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2690 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2691 t_UmbrellaOptions *opt, const char *fn)
2693 int i, ig, ncorr, ntot, j, k, *count, restart;
2694 real *corr, c0, dt, tmp;
2695 real *ztime, av, tausteps;
2696 FILE *fp, *fpcorr = 0;
2700 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2701 "time [ps]", "autocorrelation function", opt->oenv);
2705 for (i = 0; i < nwins; i++)
2707 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2709 ntot = window[i].Ntot[0];
2711 /* using half the maximum time as length of autocorrelation function */
2715 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2716 " points. Provide more pull data!", ntot);
2719 /* snew(corrSq,ncorr); */
2722 snew(window[i].tau, window[i].nPull);
2723 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2729 for (ig = 0; ig < window[i].nPull; ig++)
2731 if (ntot != window[i].Ntot[ig])
2733 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2734 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2736 ztime = window[i].ztime[ig];
2738 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2739 for (j = 0, av = 0; (j < ntot); j++)
2744 for (k = 0; (k < ncorr); k++)
2749 for (j = 0; (j < ntot); j += restart)
2751 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2753 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2755 /* corrSq[k] += tmp*tmp; */
2759 /* divide by nr of frames for each time displacement */
2760 for (k = 0; (k < ncorr); k++)
2762 /* count probably = (ncorr-k+(restart-1))/restart; */
2763 corr[k] = corr[k]/count[k];
2764 /* variance of autocorrelation function */
2765 /* corrSq[k]=corrSq[k]/count[k]; */
2767 /* normalize such that corr[0] == 0 */
2769 for (k = 0; (k < ncorr); k++)
2772 /* corrSq[k]*=c0*c0; */
2775 /* write ACFs in verbose mode */
2778 for (k = 0; (k < ncorr); k++)
2780 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2782 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2785 /* esimate integrated correlation time, fitting is too unstable */
2786 tausteps = 0.5*corr[0];
2787 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2788 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2790 tausteps += corr[j];
2793 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2794 Kumar et al, eq. 28 ff. */
2795 window[i].tau[ig] = tausteps*dt;
2796 window[i].g[ig] = 1+2*tausteps;
2797 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2808 /* plot IACT along reaction coordinate */
2809 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2810 if (output_env_get_print_xvgr_codes(opt->oenv))
2812 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2813 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2814 for (i = 0; i < nwins; i++)
2816 fprintf(fp, "# %3d ", i);
2817 for (ig = 0; ig < window[i].nPull; ig++)
2819 fprintf(fp, " %11g", window[i].tau[ig]);
2824 for (i = 0; i < nwins; i++)
2826 for (ig = 0; ig < window[i].nPull; ig++)
2828 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2831 if (opt->sigSmoothIact > 0.0)
2833 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2834 opt->sigSmoothIact);
2835 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2836 smoothIact(window, nwins, opt);
2837 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2838 if (output_env_get_print_xvgr_codes(opt->oenv))
2840 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2841 fprintf(fp, "@ s1 symbol color 2\n");
2843 for (i = 0; i < nwins; i++)
2845 for (ig = 0; ig < window[i].nPull; ig++)
2847 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2852 printf("Wrote %s\n", fn);
2856 * compute average and sigma of each umbrella histogram
2858 void averageSigma(t_UmbrellaWindow *window, int nwins)
2861 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2863 for (i = 0; i < nwins; i++)
2865 snew(window[i].aver, window[i].nPull);
2866 snew(window[i].sigma, window[i].nPull);
2868 ntot = window[i].Ntot[0];
2869 for (ig = 0; ig < window[i].nPull; ig++)
2871 ztime = window[i].ztime[ig];
2872 for (k = 0, av = 0.; k < ntot; k++)
2877 for (k = 0, sum2 = 0.; k < ntot; k++)
2882 sig = std::sqrt(sum2/ntot);
2883 window[i].aver[ig] = av;
2885 /* Note: This estimate for sigma is biased from the limited sampling.
2886 Correct sigma by n/(n-1) where n = number of independent
2887 samples. Only possible if IACT is known.
2891 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2892 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2896 window[i].sigma[ig] = sig;
2898 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2905 * Use histograms to compute average force on pull group.
2907 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2909 int i, j, bins = opt->bins, k;
2910 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2913 dz = (max-min)/bins;
2914 ztot = opt->max-min;
2917 /* Compute average displacement from histograms */
2918 for (j = 0; j < nWindows; ++j)
2920 snew(window[j].forceAv, window[j].nPull);
2921 for (k = 0; k < window[j].nPull; ++k)
2925 for (i = 0; i < opt->bins; ++i)
2927 temp = (1.0*i+0.5)*dz+min;
2928 distance = temp - window[j].pos[k];
2930 { /* in cyclic wham: */
2931 if (distance > ztot_half) /* |distance| < ztot_half */
2935 else if (distance < -ztot_half)
2940 w = window[j].Histo[k][i]/window[j].g[k];
2941 displAv += w*distance;
2943 /* Are we near min or max? We are getting wrong forces from the histgrams since
2944 the histograms are zero outside [min,max). Therefore, assume that the position
2945 on the other side of the histomgram center is equally likely. */
2948 posmirrored = window[j].pos[k]-distance;
2949 if (posmirrored >= max || posmirrored < min)
2951 displAv += -w*distance;
2958 /* average force from average displacement */
2959 window[j].forceAv[k] = displAv*window[j].k[k];
2960 /* sigma from average square displacement */
2961 /* window[j].sigma [k] = sqrt(displAv2); */
2962 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2968 * Check if the complete reaction coordinate is covered by the histograms
2970 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2971 t_UmbrellaOptions *opt)
2973 int i, ig, j, bins = opt->bins, bBoundary;
2974 real avcount = 0, z, relcount, *count;
2975 snew(count, opt->bins);
2977 for (j = 0; j < opt->bins; ++j)
2979 for (i = 0; i < nwins; i++)
2981 for (ig = 0; ig < window[i].nPull; ig++)
2983 count[j] += window[i].Histo[ig][j];
2986 avcount += 1.0*count[j];
2989 for (j = 0; j < bins; ++j)
2991 relcount = count[j]/avcount;
2992 z = (j+0.5)*opt->dz+opt->min;
2993 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2994 /* check for bins with no data */
2997 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2998 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3000 /* and check for poor sampling */
3001 else if (relcount < 0.005 && !bBoundary)
3003 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3009 /*! \brief Compute initial potential by integrating the average force
3011 * This speeds up the convergence by roughly a factor of 2
3013 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3015 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3016 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3019 dz = (opt->max-min)/bins;
3021 printf("Getting initial potential by integration.\n");
3023 /* Compute average displacement from histograms */
3024 computeAverageForce(window, nWindows, opt);
3026 /* Get force for each bin from all histograms in this bin, or, alternatively,
3027 if no histograms are inside this bin, from the closest histogram */
3030 for (j = 0; j < opt->bins; ++j)
3032 pos = (1.0*j+0.5)*dz+min;
3036 groupmin = winmin = 0;
3037 for (i = 0; i < nWindows; i++)
3039 for (ig = 0; ig < window[i].nPull; ig++)
3041 hispos = window[i].pos[ig];
3042 dist = std::abs(hispos-pos);
3043 /* average force within bin */
3047 fAv += window[i].forceAv[ig];
3049 /* at the same time, remember closest histogram */
3058 /* if no histogram found in this bin, use closest histogram */
3065 fAv = window[winmin].forceAv[groupmin];
3069 for (j = 1; j < opt->bins; ++j)
3071 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3074 /* cyclic wham: linearly correct possible offset */
3077 diff = (pot[bins-1]-pot[0])/(bins-1);
3078 for (j = 1; j < opt->bins; ++j)
3085 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3086 for (j = 0; j < opt->bins; ++j)
3088 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3091 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3094 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3095 energy offsets which are usually determined by wham
3096 First: turn pot into probabilities:
3098 for (j = 0; j < opt->bins; ++j)
3100 pot[j] = std::exp(-pot[j]/(8.314e-3*opt->Temperature));
3102 calc_z(pot, window, nWindows, opt, TRUE);
3108 //! Count number of words in an line
3109 static int wordcount(char *ptr)
3114 if (std::strlen(ptr) == 0)
3118 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3120 for (i = 0; (ptr[i] != '\0'); i++)
3122 is[cur] = isspace(ptr[i]);
3123 if ((i > 0) && (is[cur] && !is[1-cur]))
3132 /*! \brief Read input file for pull group selection (option -is)
3134 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3136 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3139 int i, iline, n, len = STRLEN, temp;
3140 char *ptr = 0, *tmpbuf = 0;
3141 char fmt[1024], fmtign[1024];
3142 int block = 1, sizenow;
3144 fp = gmx_ffopen(opt->fnGroupsel, "r");
3145 opt->groupsel = NULL;
3150 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3155 if (iline >= sizenow)
3158 srenew(opt->groupsel, sizenow);
3160 opt->groupsel[iline].n = n;
3161 opt->groupsel[iline].nUse = 0;
3162 snew(opt->groupsel[iline].bUse, n);
3165 for (i = 0; i < n; i++)
3167 std::strcpy(fmt, fmtign);
3168 std::strcat(fmt, "%d");
3169 if (sscanf(ptr, fmt, &temp))
3171 opt->groupsel[iline].bUse[i] = (temp > 0);
3172 if (opt->groupsel[iline].bUse[i])
3174 opt->groupsel[iline].nUse++;
3177 std::strcat(fmtign, "%*s");
3181 opt->nGroupsel = iline;
3182 if (nTpr != opt->nGroupsel)
3184 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3188 printf("\nUse only these pull groups:\n");
3189 for (iline = 0; iline < nTpr; iline++)
3191 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3192 for (i = 0; i < opt->groupsel[iline].n; i++)
3194 if (opt->groupsel[iline].bUse[i])
3207 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3209 //! Number of elements in fnm (used for command line parsing)
3210 #define NFILE asize(fnm)
3212 //! The main g_wham routine
3213 int gmx_wham(int argc, char *argv[])
3215 const char *desc[] = {
3216 "[THISMODULE] is an analysis program that implements the Weighted",
3217 "Histogram Analysis Method (WHAM). It is intended to analyze",
3218 "output files generated by umbrella sampling simulations to ",
3219 "compute a potential of mean force (PMF).[PAR]",
3221 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3222 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3223 "and, if multiple coordinates need to be analyzed, all used the same",
3224 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3226 "At present, three input modes are supported.",
3228 "* With option [TT]-it[tt], the user provides a file which contains the",
3229 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3230 " AND, with option [TT]-ix[tt], a file which contains file names of",
3231 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3232 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3233 " first pullx, etc.",
3234 "* Same as the previous input mode, except that the the user",
3235 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3236 " From the pull force the position in the umbrella potential is",
3237 " computed. This does not work with tabulated umbrella potentials.",
3238 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3239 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3240 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3241 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3242 " must be similar to the following::",
3245 " # Component selection: 0 0 1",
3247 " # Ref. Group 'TestAtom'",
3248 " # Nr. of pull groups 2",
3249 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3250 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3253 " The number of pull groups, umbrella positions, force constants, and names ",
3254 " may (of course) differ. Following the header, a time column and ",
3255 " a data column for each pull group follows (i.e. the displacement",
3256 " with respect to the umbrella center). Up to four pull groups are possible ",
3257 " per [REF].pdo[ref] file at present.",
3259 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3260 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3261 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3262 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3263 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3264 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3265 "used, groupsel.dat looks like this::",
3271 "By default, the output files are",
3273 "* [TT]-o[tt] PMF output file",
3274 "* [TT]-hist[tt] Histograms output file",
3276 "Always check whether the histograms sufficiently overlap.[PAR]",
3277 "The umbrella potential is assumed to be harmonic and the force constants are ",
3278 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3279 "a tabulated potential can be provided with [TT]-tab[tt].",
3284 "* [TT]-bins[tt] Number of bins used in analysis",
3285 "* [TT]-temp[tt] Temperature in the simulations",
3286 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3287 "* [TT]-auto[tt] Automatic determination of boundaries",
3288 "* [TT]-min,-max[tt] Boundaries of the profile",
3290 "The data points that are used to compute the profile",
3291 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3292 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3293 "umbrella window.[PAR]",
3294 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3295 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3296 "With energy output, the energy in the first bin is defined to be zero. ",
3297 "If you want the free energy at a different ",
3298 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3299 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3300 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3301 "[THISMODULE] will make use of the",
3302 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3303 "reaction coordinate will assumed be be neighbors.[PAR]",
3304 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3305 "which may be useful for, e.g. membranes.",
3310 "If available, the number of OpenMP threads used by g_wham is controlled with [TT]-nt[tt].",
3315 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3316 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3317 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3318 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3319 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3320 "Because the IACTs can be severely underestimated in case of limited ",
3321 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3322 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3323 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3324 "integration of the ACFs while the ACFs are larger 0.05.",
3325 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3326 "less robust) method such as fitting to a double exponential, you can ",
3327 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3328 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3329 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3334 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3335 "otherwise the statistical error may be substantially underestimated. ",
3336 "More background and examples for the bootstrap technique can be found in ",
3337 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3338 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3339 "Four bootstrapping methods are supported and ",
3340 "selected with [TT]-bs-method[tt].",
3342 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3343 " data points, and the bootstrap is carried out by assigning random weights to the ",
3344 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3345 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3346 " statistical error is underestimated.",
3347 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3348 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3349 " (allowing duplication, i.e. sampling with replacement).",
3350 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3351 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3352 " divided into blocks and only histograms within each block are mixed. Note that ",
3353 " the histograms within each block must be representative for all possible histograms, ",
3354 " otherwise the statistical error is underestimated.",
3355 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3356 " such that the generated data points are distributed according the given histograms ",
3357 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3358 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3359 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3360 " Note that this method may severely underestimate the error in case of limited sampling, ",
3361 " that is if individual histograms do not represent the complete phase space at ",
3362 " the respective positions.",
3363 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3364 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3365 " and width of the umbrella histograms. That method yields similar error estimates ",
3366 " like method [TT]traj[tt].",
3368 "Bootstrapping output:",
3370 "* [TT]-bsres[tt] Average profile and standard deviations",
3371 "* [TT]-bsprof[tt] All bootstrapping profiles",
3373 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3374 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3378 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3379 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3380 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3381 static t_UmbrellaOptions opt;
3382 static int nthreads = -1;
3386 { "-nt", FALSE, etINT, {&nthreads},
3387 "Number of threads used by g_wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3389 { "-min", FALSE, etREAL, {&opt.min},
3390 "Minimum coordinate in profile"},
3391 { "-max", FALSE, etREAL, {&opt.max},
3392 "Maximum coordinate in profile"},
3393 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3394 "Determine min and max automatically"},
3395 { "-bins", FALSE, etINT, {&opt.bins},
3396 "Number of bins in profile"},
3397 { "-temp", FALSE, etREAL, {&opt.Temperature},
3399 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3401 { "-v", FALSE, etBOOL, {&opt.verbose},
3403 { "-b", FALSE, etREAL, {&opt.tmin},
3404 "First time to analyse (ps)"},
3405 { "-e", FALSE, etREAL, {&opt.tmax},
3406 "Last time to analyse (ps)"},
3407 { "-dt", FALSE, etREAL, {&opt.dt},
3408 "Analyse only every dt ps"},
3409 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3410 "Write histograms and exit"},
3411 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3412 "Determine min and max and exit (with [TT]-auto[tt])"},
3413 { "-log", FALSE, etBOOL, {&opt.bLog},
3414 "Calculate the log of the profile before printing"},
3415 { "-unit", FALSE, etENUM, {en_unit},
3416 "Energy unit in case of log output" },
3417 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3418 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3419 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3420 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3421 { "-sym", FALSE, etBOOL, {&opt.bSym},
3422 "Symmetrize profile around z=0"},
3423 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3424 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3425 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3426 "Calculate integrated autocorrelation times and use in wham"},
3427 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3428 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3429 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3430 "When computing autocorrelation functions, restart computing every .. (ps)"},
3431 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3432 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3433 "during smoothing"},
3434 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3435 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3436 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3437 "Bootstrap method" },
3438 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3439 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3440 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3441 "Seed for bootstrapping. (-1 = use time)"},
3442 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3443 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3444 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3445 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3446 { "-stepout", FALSE, etINT, {&opt.stepchange},
3447 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3448 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3449 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3453 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3454 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3455 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3456 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3457 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3458 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3459 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3460 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3461 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3462 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3463 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3464 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3467 int i, j, l, nfiles, nwins, nfiles2;
3468 t_UmbrellaHeader header;
3469 t_UmbrellaWindow * window = NULL;
3470 double *profile, maxchange = 1e20;
3471 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3472 char **fninTpr, **fninPull, **fninPdo;
3474 FILE *histout, *profout;
3475 char ylabel[256], title[256];
3478 opt.verbose = FALSE;
3479 opt.bHistOnly = FALSE;
3489 /* bootstrapping stuff */
3491 opt.bsMethod = bsMethod_hist;
3492 opt.tauBootStrap = 0.0;
3494 opt.histBootStrapBlockLength = 8;
3495 opt.bs_verbose = FALSE;
3500 opt.Temperature = 298;
3501 opt.Tolerance = 1e-6;
3502 opt.bBoundsOnly = FALSE;
3504 opt.bCalcTauInt = FALSE;
3505 opt.sigSmoothIact = 0.0;
3506 opt.bAllowReduceIact = TRUE;
3507 opt.bInitPotByIntegration = TRUE;
3508 opt.acTrestart = 1.0;
3509 opt.stepchange = 100;
3510 opt.stepUpdateContrib = 100;
3512 if (!parse_common_args(&argc, argv, 0,
3513 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3518 opt.unit = nenum(en_unit);
3519 opt.bsMethod = nenum(en_bsMethod);
3521 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3523 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3524 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3525 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3526 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3527 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3528 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3529 if (opt.bTab && opt.bPullf)
3531 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3532 "Provide pullx.xvg or pdo files!");
3535 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3537 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3539 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3541 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3542 "\n\n Check g_wham -h !");
3545 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3546 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3547 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3548 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3549 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3551 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3552 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3553 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3554 if ( (bMinSet || bMaxSet) && bAutoSet)
3556 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3559 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3561 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3564 if (bMinSet && opt.bAuto)
3566 printf("Note: min and max given, switching off -auto.\n");
3570 if (opt.bTauIntGiven && opt.bCalcTauInt)
3572 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3573 "the autocorrelation times. Not both.");
3576 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3578 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3579 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3581 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3583 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3584 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3587 /* Set # of OpenMP threads */
3590 gmx_omp_set_num_threads(nthreads);
3594 nthreads = gmx_omp_get_max_threads();
3598 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3601 /* Reading gmx4 pull output and tpr files */
3602 if (opt.bTpr || opt.bPullf || opt.bPullx)
3604 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3606 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3607 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3608 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3609 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3610 if (nfiles != nfiles2)
3612 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3613 opt.fnTpr, nfiles2, fnPull);
3616 /* Read file that selects the pull group to be used */
3617 if (opt.fnGroupsel != NULL)
3619 readPullGroupSelection(&opt, fninTpr, nfiles);
3622 window = initUmbrellaWindows(nfiles);
3623 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3626 { /* reading pdo files */
3627 if (opt.fnGroupsel != NULL)
3629 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3630 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3632 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3633 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3634 window = initUmbrellaWindows(nfiles);
3635 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3639 /* enforce equal weight for all histograms? */
3642 enforceEqualWeights(window, nwins);
3645 /* write histograms */
3646 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3647 xlabel, "count", opt.oenv);
3648 for (l = 0; l < opt.bins; ++l)
3650 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3651 for (i = 0; i < nwins; ++i)
3653 for (j = 0; j < window[i].nPull; ++j)
3655 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3658 fprintf(histout, "\n");
3661 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3664 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3668 /* Using tabulated umbrella potential */
3671 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3674 /* Integrated autocorrelation times provided ? */
3675 if (opt.bTauIntGiven)
3677 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3680 /* Compute integrated autocorrelation times */
3681 if (opt.bCalcTauInt)
3683 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3686 /* calc average and sigma for each histogram
3687 (maybe required for bootstrapping. If not, this is fast anyhow) */
3688 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3690 averageSigma(window, nwins);
3693 /* Get initial potential by simple integration */
3694 if (opt.bInitPotByIntegration)
3696 guessPotByIntegration(window, nwins, &opt);
3699 /* Check if complete reaction coordinate is covered */
3700 checkReactionCoordinateCovered(window, nwins, &opt);
3702 /* Calculate profile */
3703 snew(profile, opt.bins);
3711 if ( (i%opt.stepUpdateContrib) == 0)
3713 setup_acc_wham(profile, window, nwins, &opt);
3715 if (maxchange < opt.Tolerance)
3718 /* if (opt.verbose) */
3719 printf("Switched to exact iteration in iteration %d\n", i);
3721 calc_profile(profile, window, nwins, &opt, bExact);
3722 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3724 printf("\t%4d) Maximum change %e\n", i, maxchange);
3728 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3729 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3731 /* calc error from Kumar's formula */
3732 /* Unclear how the error propagates along reaction coordinate, therefore
3734 /* calc_error_kumar(profile,window, nwins,&opt); */
3736 /* Write profile in energy units? */
3739 prof_normalization_and_unit(profile, &opt);
3740 std::strcpy(ylabel, en_unit_label[opt.unit]);
3741 std::strcpy(title, "Umbrella potential");
3745 std::strcpy(ylabel, "Density of states");
3746 std::strcpy(title, "Density of states");
3749 /* symmetrize profile around z=0? */
3752 symmetrizeProfile(profile, &opt);
3755 /* write profile or density of states */
3756 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3757 for (i = 0; i < opt.bins; ++i)
3759 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3762 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3764 /* Bootstrap Method */
3767 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3768 opt2fn("-hist", NFILE, fnm),
3769 ylabel, profile, window, nwins, &opt);
3773 freeUmbrellaWindows(window, nfiles);
3775 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3776 please_cite(stdout, "Hub2010");