2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
55 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/fileio/tpxio.h"
57 #include "gromacs/fileio/xvgr.h"
58 #include "gromacs/gmxana/gmx_ana.h"
59 #include "gromacs/legacyheaders/copyrite.h"
60 #include "gromacs/legacyheaders/macros.h"
61 #include "gromacs/legacyheaders/names.h"
62 #include "gromacs/legacyheaders/typedefs.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/random/random.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/gmxomp.h"
68 #include "gromacs/utility/smalloc.h"
70 //! longest file names allowed in input files
71 #define WHAM_MAXFILELEN 2048
74 * x-axis legend for output files
76 static const char *xlabel = "\\xx\\f{} (nm)";
79 * enum for energy units
82 enSel, en_kJ, en_kCal, en_kT, enNr
85 * enum for type of input files (pdos, tpr, or pullf)
88 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
91 /*! \brief enum for bootstrapping method
93 * These bootstrap methods are supported:
94 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
95 * (bsMethod_BayesianHist)
96 * - bootstrap complete histograms (bsMethod_hist)
97 * - bootstrap trajectories from given umbrella histograms. This generates new
98 * "synthetic" histograms (bsMethod_traj)
99 * - bootstrap trajectories from Gaussian with mu/sigma computed from
100 * the respective histogram (bsMethod_trajGauss). This gives very similar
101 * results compared to bsMethod_traj.
103 * ********************************************************************
104 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
105 * JS Hub, BL de Groot, D van der Spoel
106 * g_wham - A free weighted histogram analysis implementation including
107 * robust error and autocorrelation estimates,
108 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
109 * ********************************************************************
112 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
113 bsMethod_traj, bsMethod_trajGauss
117 //! Parameters of the umbrella potentials
121 * \name Using umbrella pull code since gromacs 4.x
124 int npullcrds_tot; //!< nr of pull coordinates in tpr file
125 int npullcrds; //!< nr of umbrella pull coordinates for reading
126 int pull_geometry; //!< such as distance, direction
127 ivec pull_dim; //!< pull dimension with geometry distance
128 int pull_ndim; //!< nr of pull_dim != 0
129 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
130 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
131 real *k; //!< force constants in tpr file
132 real *init_dist; //!< reference displacements
133 real *umbInitDist; //!< reference displacement in umbrella direction
136 * \name Using PDO files common until gromacs 3.x
144 char PullName[4][256];
146 double UmbCons[4][3];
150 //! Data in the umbrella histograms
153 int nPull; //!< nr of pull groups in this pdo or pullf/x file
154 double **Histo; //!< nPull histograms
155 double **cum; //!< nPull cumulative distribution functions
156 int nBin; //!< nr of bins. identical to opt->bins
157 double *k; //!< force constants for the nPull groups
158 double *pos; //!< umbrella positions for the nPull groups
159 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
160 int *N; //!< nr of data points in nPull histograms.
161 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
163 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
165 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
166 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
169 double *tau; //!< intetrated autocorrelation time (IACT)
170 double *tausmooth; //!< smoothed IACT
172 double dt; //!< timestep in the input data. Can be adapted with g_wham option -dt
174 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
176 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
178 /*! \brief average force estimated from average displacement, fAv=dzAv*k
180 * Used for integration to guess the potential.
183 real *aver; //!< average of histograms
184 real *sigma; //!< stddev of histograms
185 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
188 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
191 int n; //!< total nr of pull groups in this tpr file
192 int nUse; //!< nr of pull groups used
193 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
196 //! Parameters of WHAM
203 const char *fnTpr, *fnPullf, *fnGroupsel;
204 const char *fnPdo, *fnPullx; //!< file names of input
205 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
206 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
208 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
209 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
210 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
211 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
214 * \name Basic WHAM options
217 int bins; //!< nr of bins, min, max, and dz of profile
219 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
220 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
223 * \name Output control
226 gmx_bool bLog; //!< energy output (instead of probability) for profile
227 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
228 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
229 /*! \brief after wham, set prof to zero at this z-position.
230 * When bootstrapping, set zProf0 to a "stable" reference position.
233 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
235 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
236 gmx_bool bAuto; //!< determine min and max automatically but do not exit
238 gmx_bool verbose; //!< more noisy wham mode
239 int stepchange; //!< print maximum change in prof after how many interations
240 output_env_t oenv; //!< xvgr options
243 * \name Autocorrelation stuff
246 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
247 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
248 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
249 real acTrestart; //!< when computing ACT, time between restarting points
251 /* \brief Enforce the same weight for each umbella window, that is
252 * calculate with the same number of data points for
253 * each window. That can be reasonable, if the histograms
254 * have different length, but due to autocorrelation,
255 * a longer simulation should not have larger weightin wham.
261 * \name Bootstrapping stuff
264 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
266 /* \brief bootstrap method
268 * if == bsMethod_hist, consider complete histograms as independent
269 * data points and, hence, only mix complete histograms.
270 * if == bsMethod_BayesianHist, consider complete histograms
271 * as independent data points, but assign random weights
272 * to the histograms during the bootstrapping ("Bayesian bootstrap")
273 * In case of long correlations (e.g., inside a channel), these
274 * will yield a more realistic error.
275 * if == bsMethod_traj(Gauss), generate synthetic histograms
277 * histogram by generating an autocorrelated random sequence
278 * that is distributed according to the respective given
279 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
280 * (instead of from the umbrella histogram) to generate a new
285 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
288 /* \brief when mixing histograms, mix only histograms withing blocks
289 long the reaction coordinate xi. Avoids gaps along xi. */
290 int histBootStrapBlockLength;
292 int bsSeed; //!< random seed for bootstrapping
294 /* \brief Write cumulative distribution functions (CDFs) of histograms
295 and write the generated histograms for each bootstrap */
299 * \name tabulated umbrella potential stuff
303 double *tabX, *tabY, tabMin, tabMax, tabDz;
306 gmx_rng_t rng; //!< gromacs random number generator
309 //! Make an umbrella window (may contain several histograms)
310 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
312 t_UmbrellaWindow *win;
315 for (i = 0; i < nwin; i++)
317 win[i].Histo = win[i].cum = 0;
318 win[i].k = win[i].pos = win[i].z = 0;
319 win[i].N = win[i].Ntot = 0;
320 win[i].g = win[i].tau = win[i].tausmooth = 0;
324 win[i].aver = win[i].sigma = 0;
330 //! Delete an umbrella window (may contain several histograms)
331 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
334 for (i = 0; i < nwin; i++)
338 for (j = 0; j < win[i].nPull; j++)
340 sfree(win[i].Histo[j]);
345 for (j = 0; j < win[i].nPull; j++)
347 sfree(win[i].cum[j]);
352 for (j = 0; j < win[i].nPull; j++)
354 sfree(win[i].bContrib[j]);
366 sfree(win[i].tausmooth);
367 sfree(win[i].bContrib);
369 sfree(win[i].forceAv);
372 sfree(win[i].bsWeight);
378 * Read and setup tabulated umbrella potential
380 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
385 printf("Setting up tabulated potential from file %s\n", fn);
386 nl = read_xvg(fn, &y, &ny);
390 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
392 opt->tabMin = y[0][0];
393 opt->tabMax = y[0][nl-1];
394 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
397 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
398 "ascending z-direction", fn);
400 for (i = 0; i < nl-1; i++)
402 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
404 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
409 for (i = 0; i < nl; i++)
411 opt->tabX[i] = y[0][i];
412 opt->tabY[i] = y[1][i];
414 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
415 opt->tabMin, opt->tabMax, opt->tabDz);
418 //! Read the header of an PDO file (position, force const, nr of groups)
419 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
422 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
424 std::istringstream ist;
427 if (fgets(line, 2048, file) == NULL)
429 gmx_fatal(FARGS, "Error reading header from pdo file\n");
432 ist >> Buffer0 >> Buffer1 >> Buffer2;
433 if (strcmp(Buffer1, "UMBRELLA"))
435 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
436 "(Found in first line: `%s')\n",
437 Buffer1, "UMBRELLA", line);
439 if (strcmp(Buffer2, "3.0"))
441 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
445 if (fgets(line, 2048, file) == NULL)
447 gmx_fatal(FARGS, "Error reading header from pdo file\n");
450 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
451 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
453 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
454 if (header->nDim != 1)
456 gmx_fatal(FARGS, "Currently only supports one dimension");
460 if (fgets(line, 2048, file) == NULL)
462 gmx_fatal(FARGS, "Error reading header from pdo file\n");
465 ist >> Buffer0 >> Buffer1 >> header->nSkip;
468 if (fgets(line, 2048, file) == NULL)
470 gmx_fatal(FARGS, "Error reading header from pdo file\n");
473 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
476 if (fgets(line, 2048, file) == NULL)
478 gmx_fatal(FARGS, "Error reading header from pdo file\n");
481 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
485 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
489 for (i = 0; i < header->nPull; ++i)
491 if (fgets(line, 2048, file) == NULL)
493 gmx_fatal(FARGS, "Error reading header from pdo file\n");
496 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
497 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
498 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
502 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
503 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
507 if (fgets(line, 2048, file) == NULL)
509 gmx_fatal(FARGS, "Cannot read from file\n");
513 if (strcmp(Buffer3, "#####") != 0)
515 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
520 static char *fgets3(FILE *fp, char ptr[], int *len)
525 if (fgets(ptr, *len-1, fp) == NULL)
530 while ((strchr(ptr, '\n') == NULL) && (!feof(fp)))
532 /* This line is longer than len characters, let's increase len! */
536 if (fgets(p-1, STRLEN, fp) == NULL)
542 if (ptr[slen-1] == '\n')
550 /*! \brief Read the data columns of and PDO file.
552 * TO DO: Get rid of the scanf function to avoid the clang warning.
553 * At the moment, this warning is avoided by hiding the format string
554 * the variable fmtlf.
556 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
557 int fileno, t_UmbrellaWindow * win,
558 t_UmbrellaOptions *opt,
559 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
561 int i, inttemp, bins, count, ntot;
562 real min, max, minfound = 1e20, maxfound = -1e20;
563 double temp, time, time0 = 0, dt;
565 t_UmbrellaWindow * window = 0;
566 gmx_bool timeok, dt_ok = 1;
567 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
568 int len = STRLEN, dstep = 1;
569 const int blocklen = 4096;
579 /* Need to alocate memory and set up structure */
580 window->nPull = header->nPull;
583 snew(window->Histo, window->nPull);
584 snew(window->z, window->nPull);
585 snew(window->k, window->nPull);
586 snew(window->pos, window->nPull);
587 snew(window->N, window->nPull);
588 snew(window->Ntot, window->nPull);
589 snew(window->g, window->nPull);
590 snew(window->bsWeight, window->nPull);
592 window->bContrib = 0;
594 if (opt->bCalcTauInt)
596 snew(window->ztime, window->nPull);
602 snew(lennow, window->nPull);
604 for (i = 0; i < window->nPull; ++i)
607 window->bsWeight[i] = 1.;
608 snew(window->Histo[i], bins);
609 window->k[i] = header->UmbCons[i][0];
610 window->pos[i] = header->UmbPos[i][0];
614 if (opt->bCalcTauInt)
616 window->ztime[i] = 0;
620 /* Done with setup */
626 min = max = bins = 0; /* Get rid of warnings */
631 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
635 if (ptr[0] == '#' || strlen(ptr) < 2)
640 /* Initiate format string */
642 strcat(fmtign, "%*s");
644 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
645 /* Round time to fs */
646 time = 1.0/1000*( static_cast<int> (time*1000+0.5) );
648 /* get time step of pdo file */
658 dstep = static_cast<int>(opt->dt/dt+0.5);
666 window->dt = dt*dstep;
671 dt_ok = ((count-1)%dstep == 0);
672 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
674 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
675 time,opt->tmin, opt->tmax, dt_ok,timeok); */
679 for (i = 0; i < header->nPull; ++i)
682 strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
683 strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
684 if (sscanf(ptr, fmt, &temp))
686 temp += header->UmbPos[i][0];
700 if (opt->bCalcTauInt)
702 /* save time series for autocorrelation analysis */
703 ntot = window->Ntot[i];
704 if (ntot >= lennow[i])
706 lennow[i] += blocklen;
707 srenew(window->ztime[i], lennow[i]);
709 window->ztime[i][ntot] = temp;
717 inttemp = static_cast<int> (temp);
724 else if (inttemp >= bins)
730 if (inttemp >= 0 && inttemp < bins)
732 window->Histo[i][inttemp] += 1.;
740 if (time > opt->tmax)
744 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
760 /*! \brief Set identical weights for all histograms
762 * Normally, the weight is given by the number data points in each
763 * histogram, together with the autocorrelation time. This can be overwritten
764 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
765 * an appropriate autocorrelation times instead of using this function.
767 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
769 int i, k, j, NEnforced;
772 NEnforced = window[0].Ntot[0];
773 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
774 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
775 /* enforce all histograms to have the same weight as the very first histogram */
777 for (j = 0; j < nWindows; ++j)
779 for (k = 0; k < window[j].nPull; ++k)
781 ratio = 1.0*NEnforced/window[j].Ntot[k];
782 for (i = 0; i < window[0].nBin; ++i)
784 window[j].Histo[k][i] *= ratio;
786 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
791 /*! \brief Simple linear interpolation between two given tabulated points
793 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
796 double pl, pu, dz, dp;
798 jl = static_cast<int> (floor((dist-opt->tabMin)/opt->tabDz));
800 if (jl < 0 || ju >= opt->tabNbins)
802 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
803 "Provide an extended table.", dist, jl, ju);
807 dz = dist-opt->tabX[jl];
808 dp = (pu-pl)*dz/opt->tabDz;
814 * Check which bins substiantially contribute (accelerates WHAM)
816 * Don't worry, that routine does not mean we compute the PMF in limited precision.
817 * After rapid convergence (using only substiantal contributions), we always switch to
820 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
821 t_UmbrellaOptions *opt)
823 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
824 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
825 gmx_bool bAnyContrib;
826 static int bFirst = 1;
827 static double wham_contrib_lim;
831 for (i = 0; i < nWindows; ++i)
833 nGrptot += window[i].nPull;
835 wham_contrib_lim = opt->Tolerance/nGrptot;
838 ztot = opt->max-opt->min;
841 for (i = 0; i < nWindows; ++i)
843 if (!window[i].bContrib)
845 snew(window[i].bContrib, window[i].nPull);
847 for (j = 0; j < window[i].nPull; ++j)
849 if (!window[i].bContrib[j])
851 snew(window[i].bContrib[j], opt->bins);
854 for (k = 0; k < opt->bins; ++k)
856 temp = (1.0*k+0.5)*dz+min;
857 distance = temp - window[i].pos[j]; /* distance to umbrella center */
859 { /* in cyclic wham: */
860 if (distance > ztot_half) /* |distance| < ztot_half */
864 else if (distance < -ztot_half)
869 /* Note: there are two contributions to bin k in the wham equations:
870 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
871 ii) exp(- U/(8.314e-3*opt->Temperature))
872 where U is the umbrella potential
873 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
878 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
882 U = tabulated_pot(distance, opt); /* Use tabulated potential */
884 contrib1 = profile[k]*exp(-U/(8.314e-3*opt->Temperature));
885 contrib2 = window[i].N[j]*exp(-U/(8.314e-3*opt->Temperature) + window[i].z[j]);
886 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
887 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
888 if (window[i].bContrib[j][k])
894 /* If this histo is far outside min and max all bContrib may be FALSE,
895 causing a floating point exception later on. To avoid that, switch
899 for (k = 0; k < opt->bins; ++k)
901 window[i].bContrib[j][k] = TRUE;
908 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
909 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
914 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
920 //! Compute the PMF (one of the two main WHAM routines)
921 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
922 t_UmbrellaOptions *opt, gmx_bool bExact)
924 double ztot_half, ztot, min = opt->min, dz = opt->dz;
926 ztot = opt->max-opt->min;
931 int nthreads = gmx_omp_get_max_threads();
932 int thread_id = gmx_omp_get_thread_num();
934 int i0 = thread_id*opt->bins/nthreads;
935 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
937 for (i = i0; i < i1; ++i)
940 double num, denom, invg, temp = 0, distance, U = 0;
942 for (j = 0; j < nWindows; ++j)
944 for (k = 0; k < window[j].nPull; ++k)
946 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
947 temp = (1.0*i+0.5)*dz+min;
948 num += invg*window[j].Histo[k][i];
950 if (!(bExact || window[j].bContrib[k][i]))
954 distance = temp - window[j].pos[k]; /* distance to umbrella center */
956 { /* in cyclic wham: */
957 if (distance > ztot_half) /* |distance| < ztot_half */
961 else if (distance < -ztot_half)
969 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
973 U = tabulated_pot(distance, opt); /* Use tabulated potential */
975 denom += invg*window[j].N[k]*exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]);
978 profile[i] = num/denom;
983 //! Compute the free energy offsets z (one of the two main WHAM routines)
984 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
985 t_UmbrellaOptions *opt, gmx_bool bExact)
987 double min = opt->min, dz = opt->dz, ztot_half, ztot;
988 double maxglob = -1e20;
990 ztot = opt->max-opt->min;
995 int nthreads = gmx_omp_get_max_threads();
996 int thread_id = gmx_omp_get_thread_num();
998 int i0 = thread_id*nWindows/nthreads;
999 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1000 double maxloc = -1e20;
1002 for (i = i0; i < i1; ++i)
1004 double total = 0, temp, distance, U = 0;
1007 for (j = 0; j < window[i].nPull; ++j)
1010 for (k = 0; k < window[i].nBin; ++k)
1012 if (!(bExact || window[i].bContrib[j][k]))
1016 temp = (1.0*k+0.5)*dz+min;
1017 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1019 { /* in cyclic wham: */
1020 if (distance > ztot_half) /* |distance| < ztot_half */
1024 else if (distance < -ztot_half)
1032 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1036 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1038 total += profile[k]*exp(-U/(8.314e-3*opt->Temperature));
1040 /* Avoid floating point exception if window is far outside min and max */
1043 total = -log(total);
1049 temp = fabs(total - window[i].z[j]);
1054 window[i].z[j] = total;
1057 /* Now get maximum maxloc from the threads and put in maxglob */
1058 if (maxloc > maxglob)
1060 #pragma omp critical
1062 if (maxloc > maxglob)
1073 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1074 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1076 int i, j, bins = opt->bins;
1077 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1080 if (min > 0. || max < 0.)
1082 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1083 opt->min, opt->max);
1088 for (i = 0; i < bins; i++)
1092 /* bin left of zsym */
1093 j = static_cast<int> (floor((zsym-min)/dz-0.5));
1094 if (j >= 0 && (j+1) < bins)
1096 /* interpolate profile linearly between bins j and j+1 */
1097 z1 = min+(j+0.5)*dz;
1099 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1100 /* average between left and right */
1101 prof2[i] = 0.5*(profsym+profile[i]);
1105 prof2[i] = profile[i];
1109 memcpy(profile, prof2, bins*sizeof(double));
1113 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1114 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1117 double unit_factor = 1., R_MolarGasConst, diff;
1119 R_MolarGasConst = 8.314472e-3; /* in kJ/(mol*K) */
1122 /* Not log? Nothing to do! */
1128 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1129 if (opt->unit == en_kT)
1133 else if (opt->unit == en_kJ)
1135 unit_factor = R_MolarGasConst*opt->Temperature;
1137 else if (opt->unit == en_kCal)
1139 unit_factor = R_MolarGasConst*opt->Temperature/4.1868;
1143 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1146 for (i = 0; i < bins; i++)
1148 if (profile[i] > 0.0)
1150 profile[i] = -log(profile[i])*unit_factor;
1154 /* shift to zero at z=opt->zProf0 */
1155 if (!opt->bProf0Set)
1161 /* Get bin with shortest distance to opt->zProf0
1162 (-0.5 from bin position and +0.5 from rounding cancel) */
1163 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1168 else if (imin >= bins)
1172 diff = profile[imin];
1176 for (i = 0; i < bins; i++)
1182 //! Make an array of random integers (used for bootstrapping)
1183 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1185 int ipull, blockBase, nr, ipullRandom;
1187 if (blockLength == 0)
1189 blockLength = nPull;
1192 for (ipull = 0; ipull < nPull; ipull++)
1194 blockBase = (ipull/blockLength)*blockLength;
1196 { /* make sure nothing bad happens in the last block */
1197 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1198 ipullRandom = blockBase + nr;
1200 while (ipullRandom >= nPull);
1201 if (ipullRandom < 0 || ipullRandom >= nPull)
1203 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1204 "blockLength = %d, blockBase = %d\n",
1205 ipullRandom, nPull, nr, blockLength, blockBase);
1207 randomArray[ipull] = ipullRandom;
1209 /*for (ipull=0; ipull<nPull; ipull++)
1210 printf("%d ",randomArray[ipull]); printf("\n"); */
1213 /*! \brief Set pull group information of a synthetic histogram
1215 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1216 * but it is not required if we bootstrap complete histograms.
1218 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1219 t_UmbrellaWindow *thisWindow, int pullid)
1221 synthWindow->N [0] = thisWindow->N [pullid];
1222 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1223 synthWindow->pos [0] = thisWindow->pos [pullid];
1224 synthWindow->z [0] = thisWindow->z [pullid];
1225 synthWindow->k [0] = thisWindow->k [pullid];
1226 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1227 synthWindow->g [0] = thisWindow->g [pullid];
1228 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1231 /*! \brief Calculate cumulative distribution function of of all histograms.
1233 * This allow to create random number sequences
1234 * which are distributed according to the histograms. Required to generate
1235 * the "synthetic" histograms for the Bootstrap method
1237 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1238 t_UmbrellaOptions *opt, const char *fnhist)
1242 char *fn = 0, *buf = 0;
1245 if (opt->bs_verbose)
1247 snew(fn, strlen(fnhist)+10);
1248 snew(buf, strlen(fnhist)+10);
1249 sprintf(fn, "%s_cumul.xvg", strncpy(buf, fnhist, strlen(fnhist)-4));
1250 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1254 for (i = 0; i < nWindows; i++)
1256 snew(window[i].cum, window[i].nPull);
1257 for (j = 0; j < window[i].nPull; j++)
1259 snew(window[i].cum[j], nbin+1);
1260 window[i].cum[j][0] = 0.;
1261 for (k = 1; k <= nbin; k++)
1263 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1266 /* normalize CDFs. Ensure cum[nbin]==1 */
1267 last = window[i].cum[j][nbin];
1268 for (k = 0; k <= nbin; k++)
1270 window[i].cum[j][k] /= last;
1275 printf("Cumulative distriubtion functions of all histograms created.\n");
1276 if (opt->bs_verbose)
1278 for (k = 0; k <= nbin; k++)
1280 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1281 for (i = 0; i < nWindows; i++)
1283 for (j = 0; j < window[i].nPull; j++)
1285 fprintf(fp, "%g\t", window[i].cum[j][k]);
1290 printf("Wrote cumulative distribution functions to %s\n", fn);
1298 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1300 * This is used to generate a random sequence distributed according to a histogram
1302 void searchCumulative(double xx[], int n, double x, int *j)
1324 else if (x == xx[n-1])
1334 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1335 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1336 int pullid, t_UmbrellaOptions *opt)
1338 int N, i, nbins, r_index, ibin;
1339 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1342 N = thisWindow->N[pullid];
1343 dt = thisWindow->dt;
1344 nbins = thisWindow->nBin;
1346 /* tau = autocorrelation time */
1347 if (opt->tauBootStrap > 0.0)
1349 tausteps = opt->tauBootStrap/dt;
1351 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1353 /* calc tausteps from g=1+2tausteps */
1354 g = thisWindow->g[pullid];
1360 "When generating hypothetical trajctories from given umbrella histograms,\n"
1361 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1362 "cannot be predicted. You have 3 options:\n"
1363 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1364 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1366 " with option -iiact for all umbrella windows.\n"
1367 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1368 " Use option (3) only if you are sure what you're doing, you may severely\n"
1369 " underestimate the error if a too small ACT is given.\n");
1370 gmx_fatal(FARGS, errstr);
1373 synthWindow->N [0] = N;
1374 synthWindow->pos [0] = thisWindow->pos[pullid];
1375 synthWindow->z [0] = thisWindow->z[pullid];
1376 synthWindow->k [0] = thisWindow->k[pullid];
1377 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1378 synthWindow->g [0] = thisWindow->g [pullid];
1379 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1381 for (i = 0; i < nbins; i++)
1383 synthWindow->Histo[0][i] = 0.;
1386 if (opt->bsMethod == bsMethod_trajGauss)
1388 sig = thisWindow->sigma [pullid];
1389 mu = thisWindow->aver [pullid];
1392 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1394 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1396 z = a*x + sqrt(1-a^2)*y
1397 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1398 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1400 C(t) = exp(-t/tau) with tau=-1/ln(a)
1402 Then, use error function to turn the Gaussian random variable into a uniformly
1403 distributed one in [0,1]. Eventually, use cumulative distribution function of
1404 histogram to get random variables distributed according to histogram.
1405 Note: The ACT of the flat distribution and of the generated histogram is not
1406 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1408 a = exp(-1.0/tausteps);
1410 invsqrt2 = 1./sqrt(2.0);
1412 /* init random sequence */
1413 x = gmx_rng_gaussian_table(opt->rng);
1415 if (opt->bsMethod == bsMethod_traj)
1417 /* bootstrap points from the umbrella histograms */
1418 for (i = 0; i < N; i++)
1420 y = gmx_rng_gaussian_table(opt->rng);
1422 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1423 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1425 r = 0.5*(1+gmx_erf(x*invsqrt2));
1426 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1427 synthWindow->Histo[0][r_index] += 1.;
1430 else if (opt->bsMethod == bsMethod_trajGauss)
1432 /* bootstrap points from a Gaussian with the same average and sigma
1433 as the respective umbrella histogram. The idea was, that -given
1434 limited sampling- the bootstrapped histograms are otherwise biased
1435 from the limited sampling of the US histos. However, bootstrapping from
1436 the Gaussian seems to yield a similar estimate. */
1440 y = gmx_rng_gaussian_table(opt->rng);
1443 ibin = static_cast<int> (floor((z-opt->min)/opt->dz));
1448 while ( (ibin += nbins) < 0)
1453 else if (ibin >= nbins)
1455 while ( (ibin -= nbins) >= nbins)
1462 if (ibin >= 0 && ibin < nbins)
1464 synthWindow->Histo[0][ibin] += 1.;
1471 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1475 /*! \brief Write all histograms to a file
1477 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1478 * sets of bootstrapped histograms.
1480 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1481 int bs_index, t_UmbrellaOptions *opt)
1483 char *fn = 0, *buf = 0, title[256];
1489 snew(fn, strlen(fnhist)+10);
1490 snew(buf, strlen(fnhist)+1);
1491 sprintf(fn, "%s_bs%d.xvg", strncpy(buf, fnhist, strlen(fnhist)-4), bs_index);
1492 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1496 fn = gmx_strdup(fnhist);
1497 strcpy(title, "Umbrella histograms");
1500 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1503 /* Write histograms */
1504 for (l = 0; l < bins; ++l)
1506 fprintf(fp, "%e\t", (double)(l+0.5)*opt->dz+opt->min);
1507 for (i = 0; i < nWindows; ++i)
1509 for (j = 0; j < window[i].nPull; ++j)
1511 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1518 printf("Wrote %s\n", fn);
1526 //! Used for qsort to sort random numbers
1527 int func_wham_is_larger(const void *a, const void *b)
1546 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1547 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1554 /* generate ordered random numbers between 0 and nAllPull */
1555 for (i = 0; i < nAllPull-1; i++)
1557 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1559 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1560 r[nAllPull-1] = 1.0*nAllPull;
1562 synthwin[0].bsWeight[0] = r[0];
1563 for (i = 1; i < nAllPull; i++)
1565 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1568 /* avoid to have zero weight by adding a tiny value */
1569 for (i = 0; i < nAllPull; i++)
1571 if (synthwin[i].bsWeight[0] < 1e-5)
1573 synthwin[i].bsWeight[0] = 1e-5;
1580 //! The main bootstrapping routine
1581 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1582 char* ylabel, double *profile,
1583 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1585 t_UmbrellaWindow * synthWindow;
1586 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1587 int i, j, *randomArray = 0, winid, pullid, ib;
1588 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1590 gmx_bool bExact = FALSE;
1592 /* init random generator */
1593 if (opt->bsSeed == -1)
1595 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1599 opt->rng = gmx_rng_init(opt->bsSeed);
1602 snew(bsProfile, opt->bins);
1603 snew(bsProfiles_av, opt->bins);
1604 snew(bsProfiles_av2, opt->bins);
1606 /* Create array of all pull groups. Note that different windows
1607 may have different nr of pull groups
1608 First: Get total nr of pull groups */
1610 for (i = 0; i < nWindows; i++)
1612 nAllPull += window[i].nPull;
1614 snew(allPull_winId, nAllPull);
1615 snew(allPull_pullId, nAllPull);
1617 /* Setup one array of all pull groups */
1618 for (i = 0; i < nWindows; i++)
1620 for (j = 0; j < window[i].nPull; j++)
1622 allPull_winId[iAllPull] = i;
1623 allPull_pullId[iAllPull] = j;
1628 /* setup stuff for synthetic windows */
1629 snew(synthWindow, nAllPull);
1630 for (i = 0; i < nAllPull; i++)
1632 synthWindow[i].nPull = 1;
1633 synthWindow[i].nBin = opt->bins;
1634 snew(synthWindow[i].Histo, 1);
1635 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1637 snew(synthWindow[i].Histo[0], opt->bins);
1639 snew(synthWindow[i].N, 1);
1640 snew(synthWindow[i].pos, 1);
1641 snew(synthWindow[i].z, 1);
1642 snew(synthWindow[i].k, 1);
1643 snew(synthWindow[i].bContrib, 1);
1644 snew(synthWindow[i].g, 1);
1645 snew(synthWindow[i].bsWeight, 1);
1648 switch (opt->bsMethod)
1651 snew(randomArray, nAllPull);
1652 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1653 please_cite(stdout, "Hub2006");
1655 case bsMethod_BayesianHist:
1656 /* just copy all histogams into synthWindow array */
1657 for (i = 0; i < nAllPull; i++)
1659 winid = allPull_winId [i];
1660 pullid = allPull_pullId[i];
1661 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1665 case bsMethod_trajGauss:
1666 calc_cumulatives(window, nWindows, opt, fnhist);
1669 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1672 /* do bootstrapping */
1673 fp = xvgropen(fnprof, "Boot strap profiles", xlabel, ylabel, opt->oenv);
1674 for (ib = 0; ib < opt->nBootStrap; ib++)
1676 printf(" *******************************************\n"
1677 " ******** Start bootstrap nr %d ************\n"
1678 " *******************************************\n", ib+1);
1680 switch (opt->bsMethod)
1683 /* bootstrap complete histograms from given histograms */
1684 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1685 for (i = 0; i < nAllPull; i++)
1687 winid = allPull_winId [randomArray[i]];
1688 pullid = allPull_pullId[randomArray[i]];
1689 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1692 case bsMethod_BayesianHist:
1693 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1694 setRandomBsWeights(synthWindow, nAllPull, opt);
1697 case bsMethod_trajGauss:
1698 /* create new histos from given histos, that is generate new hypothetical
1700 for (i = 0; i < nAllPull; i++)
1702 winid = allPull_winId[i];
1703 pullid = allPull_pullId[i];
1704 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1709 /* write histos in case of verbose output */
1710 if (opt->bs_verbose)
1712 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1719 memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1722 if ( (i%opt->stepUpdateContrib) == 0)
1724 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1726 if (maxchange < opt->Tolerance)
1730 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1732 printf("\t%4d) Maximum change %e\n", i, maxchange);
1734 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1737 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1738 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1742 prof_normalization_and_unit(bsProfile, opt);
1745 /* symmetrize profile around z=0 */
1748 symmetrizeProfile(bsProfile, opt);
1751 /* save stuff to get average and stddev */
1752 for (i = 0; i < opt->bins; i++)
1755 bsProfiles_av[i] += tmp;
1756 bsProfiles_av2[i] += tmp*tmp;
1757 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1759 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1763 /* write average and stddev */
1764 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1765 if (output_env_get_print_xvgr_codes(opt->oenv))
1767 fprintf(fp, "@TYPE xydy\n");
1769 for (i = 0; i < opt->bins; i++)
1771 bsProfiles_av [i] /= opt->nBootStrap;
1772 bsProfiles_av2[i] /= opt->nBootStrap;
1773 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1774 stddev = (tmp >= 0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1775 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1778 printf("Wrote boot strap result to %s\n", fnres);
1781 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1782 int whaminFileType(char *fn)
1786 if (strcmp(fn+len-3, "tpr") == 0)
1790 else if (strcmp(fn+len-3, "xvg") == 0 || strcmp(fn+len-6, "xvg.gz") == 0)
1792 return whamin_pullxf;
1794 else if (strcmp(fn+len-3, "pdo") == 0 || strcmp(fn+len-6, "pdo.gz") == 0)
1800 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1802 return whamin_unknown;
1805 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1806 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1807 t_UmbrellaOptions *opt)
1809 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1810 int nread, sizenow, i, block = 1;
1813 fp = gmx_ffopen(fn, "r");
1816 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1818 if (strlen(tmp) >= WHAM_MAXFILELEN)
1820 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1822 if (nread >= sizenow)
1825 srenew(filename, sizenow);
1826 for (i = sizenow-block; i < sizenow; i++)
1828 snew(filename[i], WHAM_MAXFILELEN);
1831 /* remove newline if there is one */
1832 if (tmp[strlen(tmp)-1] == '\n')
1834 tmp[strlen(tmp)-1] = '\0';
1836 strcpy(filename[nread], tmp);
1839 printf("Found file %s in %s\n", filename[nread], fn);
1843 *filenamesRet = filename;
1847 //! Open a file or a pipe to a gzipped file
1848 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1850 char Buffer[1024], gunzip[1024], *Path = 0;
1852 static gmx_bool bFirst = 1;
1854 /* gzipped pdo file? */
1855 if ((strcmp(fn+strlen(fn)-3, ".gz") == 0))
1857 /* search gunzip executable */
1858 if (!(Path = getenv("GMX_PATH_GZIP")))
1860 if (gmx_fexist("/bin/gunzip"))
1862 sprintf(gunzip, "%s", "/bin/gunzip");
1864 else if (gmx_fexist("/usr/bin/gunzip"))
1866 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1870 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1871 "You may want to define the path to gunzip "
1872 "with the environment variable GMX_PATH_GZIP.", gunzip);
1877 sprintf(gunzip, "%s/gunzip", Path);
1878 if (!gmx_fexist(gunzip))
1880 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1881 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1886 printf("Using gunzig executable %s\n", gunzip);
1889 if (!gmx_fexist(fn))
1891 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1893 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1896 printf("Executing command '%s'\n", Buffer);
1899 if ((pipe = popen(Buffer, "r")) == NULL)
1901 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1904 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1910 pipe = gmx_ffopen(fn, "r");
1917 //! Close file or pipe
1918 void pdo_close_file(FILE *fp)
1927 //! Reading all pdo files
1928 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1929 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1932 real mintmp, maxtmp, done = 0.;
1935 /* char Buffer0[1000]; */
1939 gmx_fatal(FARGS, "No files found. Hick.");
1942 /* if min and max are not given, get min and max from the input files */
1945 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1948 for (i = 0; i < nfiles; ++i)
1950 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1951 /*fgets(Buffer0,999,file);
1952 fprintf(stderr,"First line '%s'\n",Buffer0); */
1953 done = 100.0*(i+1)/nfiles;
1954 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1959 read_pdo_header(file, header, opt);
1960 /* here only determine min and max of this window */
1961 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1962 if (maxtmp > opt->max)
1966 if (mintmp < opt->min)
1972 pdo_close_file(file);
1980 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1981 if (opt->bBoundsOnly)
1983 printf("Found option -boundsonly, now exiting.\n");
1987 /* store stepsize in profile */
1988 opt->dz = (opt->max-opt->min)/opt->bins;
1990 /* Having min and max, we read in all files */
1991 /* Loop over all files */
1992 for (i = 0; i < nfiles; ++i)
1994 done = 100.0*(i+1)/nfiles;
1995 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2000 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2001 read_pdo_header(file, header, opt);
2002 /* load data into window */
2003 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2004 if ((window+i)->Ntot[0] == 0)
2006 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2010 pdo_close_file(file);
2018 for (i = 0; i < nfiles; ++i)
2025 //! translate 0/1 to N/Y to write pull dimensions
2026 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2028 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2029 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2034 static int first = 1;
2036 /* printf("Reading %s \n",fn); */
2037 read_tpx_state(fn, &ir, &state, NULL, NULL);
2041 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2044 /* nr of pull groups */
2046 for (i = 0; i < ir.pull->ncoord; i++)
2048 if (ir.pull->coord[i].eType == epullUMBRELLA)
2054 header->pull_geometry = ir.pull->coord[i].eGeom;
2055 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2060 /* TODO: remove this restriction */
2061 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2064 for (m = 0; m < DIM; m++)
2066 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2068 /* TODO: remove the restriction */
2069 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2072 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2074 /* TODO: remove the restriction */
2075 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2085 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2088 header->npullcrds_tot = ir.pull->ncoord;
2089 header->npullcrds = ncrd;
2090 header->bPrintRef = ir.pull->bPrintCOM1;
2091 if (ir.pull->bPrintCOM2)
2093 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2095 if (ir.pull->bPrintRefValue)
2097 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2099 header->bPrintComp = ir.pull->bPrintComp;
2100 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2101 /* We should add a struct for each pull coord with all pull coord data
2102 instead of allocating many arrays for each property */
2103 snew(header->k, ncrd);
2104 snew(header->init_dist, ncrd);
2105 snew(header->umbInitDist, ncrd);
2107 /* only z-direction with epullgCYL? */
2108 if (header->pull_geometry == epullgCYL)
2110 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2112 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2113 "However, found dimensions [%s %s %s]\n",
2114 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2115 int2YN(header->pull_dim[ZZ]));
2119 for (i = 0; i < ncrd; i++)
2121 header->k[i] = ir.pull->coord[i].k;
2122 if (header->k[i] == 0.0)
2124 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2125 "That doesn't seem to be an Umbrella tpr.\n",
2128 header->init_dist[i] = ir.pull->coord[i].init;
2130 /* initial distance to reference */
2131 switch (header->pull_geometry)
2134 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2138 header->umbInitDist[i] = header->init_dist[i];
2141 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2145 if (opt->verbose || first)
2147 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2148 "\tPull group coordinates%s expected in pullx files.\n",
2149 fn, header->npullcrds, epullg_names[header->pull_geometry],
2150 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2151 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2152 for (i = 0; i < ncrd; i++)
2154 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2157 if (!opt->verbose && first)
2159 printf("\tUse option -v to see this output for all input tpr files\n\n");
2165 //! 2-norm in a ndim-dimensional space
2166 double dist_ndim(double **dx, int ndim, int line)
2170 for (i = 0; i < ndim; i++)
2172 r2 += sqr(dx[i][line]);
2177 //! Read pullx.xvg or pullf.xvg
2178 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2179 t_UmbrellaWindow * window,
2180 t_UmbrellaOptions *opt,
2181 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2182 t_groupselection *groupsel)
2184 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2185 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2186 real min, max, minfound = 1e20, maxfound = -1e20;
2187 gmx_bool dt_ok, timeok, bHaveForce;
2188 const char *quantity;
2189 const int blocklen = 4096;
2191 static gmx_bool bFirst = TRUE;
2194 * Data columns in pull output:
2195 * - in force output pullf.xvg:
2196 * No reference columns, one column per pull coordinate
2198 * - in position output pullx.xvg
2199 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2200 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2204 if (opt->bPullx && header->bPrintComp)
2206 nColPerCrd += header->pull_ndim;
2208 quantity = opt->bPullx ? "position" : "force";
2210 if (opt->bPullx && header->bPrintRef)
2212 nColRefEachCrd = header->pull_ndim;
2219 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2220 bHaveForce = opt->bPullf;
2222 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2223 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2224 Sorry for the laziness, this is a To-do. */
2225 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2228 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2229 "reading \n(option -if) is supported at present, "
2230 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2231 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
2232 epullg_names[header->pull_geometry]);
2235 nt = read_xvg(fn, &y, &ny);
2237 /* Check consistency */
2240 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2244 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2245 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2247 /* Since this tool code has not updated yet to allow difference pull
2248 * dimensions per pull coordinate, we can't easily check the exact
2249 * number of expected columns, so we only check what we expect for
2250 * the pull coordinates selected for the WHAM analysis.
2252 printf("Expecting these columns in pull file:\n"
2253 "\t%d reference columns for each individual pull coordinate\n"
2254 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2255 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2257 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2261 if (ny < nColExpect ||
2262 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2264 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2265 "\nMaybe you confused options -ix and -if ?\n",
2266 header->npullcrds, fntpr, ny-1, fn,
2267 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2273 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2283 window->dt = y[0][1]-y[0][0];
2285 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2287 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2290 /* Need to alocate memory and set up structure */
2294 /* Use only groups selected with option -is file */
2295 if (header->npullcrds != groupsel->n)
2297 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2298 header->npullcrds, groupsel->n);
2300 window->nPull = groupsel->nUse;
2304 window->nPull = header->npullcrds;
2307 window->nBin = bins;
2308 snew(window->Histo, window->nPull);
2309 snew(window->z, window->nPull);
2310 snew(window->k, window->nPull);
2311 snew(window->pos, window->nPull);
2312 snew(window->N, window->nPull);
2313 snew(window->Ntot, window->nPull);
2314 snew(window->g, window->nPull);
2315 snew(window->bsWeight, window->nPull);
2316 window->bContrib = 0;
2318 if (opt->bCalcTauInt)
2320 snew(window->ztime, window->nPull);
2324 window->ztime = NULL;
2326 snew(lennow, window->nPull);
2328 for (g = 0; g < window->nPull; ++g)
2331 window->bsWeight[g] = 1.;
2332 snew(window->Histo[g], bins);
2334 window->Ntot[g] = 0;
2336 if (opt->bCalcTauInt)
2338 window->ztime[g] = NULL;
2342 /* Copying umbrella center and force const is more involved since not
2343 all pull groups from header (tpr file) may be used in window variable */
2344 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2346 if (groupsel && (groupsel->bUse[g] == FALSE))
2350 window->k[gUsed] = header->k[g];
2351 window->pos[gUsed] = header->umbInitDist[g];
2356 { /* only determine min and max */
2359 min = max = bins = 0; /* Get rid of warnings */
2363 for (i = 0; i < nt; i++)
2365 /* Do you want that time frame? */
2366 t = 1.0/1000*( static_cast<int> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2368 /* get time step of pdo file and get dstep from opt->dt */
2378 dstep = static_cast<int>(opt->dt/dt+0.5);
2386 window->dt = dt*dstep;
2390 dt_ok = (i%dstep == 0);
2391 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2393 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2394 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2397 /* Note: if groupsel == NULL:
2398 * all groups in pullf/x file are stored in this window, and gUsed == g
2399 * if groupsel != NULL:
2400 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2403 for (g = 0; g < header->npullcrds; ++g)
2405 /* was this group selected for application in WHAM? */
2406 if (groupsel && (groupsel->bUse[g] == FALSE))
2415 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2417 pos = -force/header->k[g] + header->umbInitDist[g];
2421 /* pick the right column from:
2422 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2424 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2428 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2442 if (gUsed >= window->nPull)
2444 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2445 gUsed, window->nPull);
2448 if (opt->bCalcTauInt && !bGetMinMax)
2450 /* save time series for autocorrelation analysis */
2451 ntot = window->Ntot[gUsed];
2452 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2453 if (ntot >= lennow[gUsed])
2455 lennow[gUsed] += blocklen;
2456 srenew(window->ztime[gUsed], lennow[gUsed]);
2458 window->ztime[gUsed][ntot] = pos;
2461 ibin = static_cast<int> (floor((pos-min)/(max-min)*bins));
2466 while ( (ibin += bins) < 0)
2471 else if (ibin >= bins)
2473 while ( (ibin -= bins) >= bins)
2479 if (ibin >= 0 && ibin < bins)
2481 window->Histo[gUsed][ibin] += 1.;
2484 window->Ntot[gUsed]++;
2488 else if (t > opt->tmax)
2492 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2504 for (i = 0; i < ny; i++)
2510 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2511 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2512 t_UmbrellaHeader* header,
2513 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2516 real mintmp, maxtmp;
2518 printf("Reading %d tpr and pullf files\n", nfiles/2);
2520 /* min and max not given? */
2523 printf("Automatic determination of boundaries...\n");
2526 for (i = 0; i < nfiles; i++)
2528 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2530 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2532 read_tpr_header(fnTprs[i], header, opt);
2533 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2535 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2537 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2538 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2539 if (maxtmp > opt->max)
2543 if (mintmp < opt->min)
2548 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2549 if (opt->bBoundsOnly)
2551 printf("Found option -boundsonly, now exiting.\n");
2555 /* store stepsize in profile */
2556 opt->dz = (opt->max-opt->min)/opt->bins;
2558 for (i = 0; i < nfiles; i++)
2560 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2562 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2564 read_tpr_header(fnTprs[i], header, opt);
2565 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2567 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2569 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2570 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2571 if (window[i].Ntot[0] == 0)
2573 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2577 for (i = 0; i < nfiles; i++)
2586 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2588 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2589 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2591 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2593 int nlines, ny, i, ig;
2596 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2597 nlines = read_xvg(fn, &iact, &ny);
2598 if (nlines != nwins)
2600 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2603 for (i = 0; i < nlines; i++)
2605 if (window[i].nPull != ny)
2607 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2608 "number of pull groups is different in different simulations. That is not\n"
2609 "supported yet. Sorry.\n");
2611 for (ig = 0; ig < window[i].nPull; ig++)
2613 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2614 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2616 if (iact[ig][i] <= 0.0)
2618 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2625 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2628 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2629 * that -in case of limited sampling- the ACT may be severely underestimated.
2630 * Note: the g=1+2tau are overwritten.
2631 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2634 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2637 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2639 /* only evaluate within +- 3sigma of the Gausian */
2640 siglim = 3.0*opt->sigSmoothIact;
2641 siglim2 = dsqr(siglim);
2642 /* pre-factor of Gaussian */
2643 gaufact = 1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2644 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2646 for (i = 0; i < nwins; i++)
2648 snew(window[i].tausmooth, window[i].nPull);
2649 for (ig = 0; ig < window[i].nPull; ig++)
2653 pos = window[i].pos[ig];
2654 for (j = 0; j < nwins; j++)
2656 for (jg = 0; jg < window[j].nPull; jg++)
2658 dpos2 = dsqr(window[j].pos[jg]-pos);
2659 if (dpos2 < siglim2)
2661 w = gaufact*exp(-dpos2*invtwosig2);
2663 tausm += w*window[j].tau[jg];
2664 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2665 w,dpos2,pos,gaufact,invtwosig2); */
2670 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2672 window[i].tausmooth[ig] = tausm;
2676 window[i].tausmooth[ig] = window[i].tau[ig];
2678 window[i].g[ig] = 1+2*tausm/window[i].dt;
2683 //! Stop integrating autoccorelation function when ACF drops under this value
2684 #define WHAM_AC_ZERO_LIMIT 0.05
2686 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2688 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2689 t_UmbrellaOptions *opt, const char *fn)
2691 int i, ig, ncorr, ntot, j, k, *count, restart;
2692 real *corr, c0, dt, tmp;
2693 real *ztime, av, tausteps;
2694 FILE *fp, *fpcorr = 0;
2698 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2699 "time [ps]", "autocorrelation function", opt->oenv);
2703 for (i = 0; i < nwins; i++)
2705 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2707 ntot = window[i].Ntot[0];
2709 /* using half the maximum time as length of autocorrelation function */
2713 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2714 " points. Provide more pull data!", ntot);
2717 /* snew(corrSq,ncorr); */
2720 snew(window[i].tau, window[i].nPull);
2721 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2727 for (ig = 0; ig < window[i].nPull; ig++)
2729 if (ntot != window[i].Ntot[ig])
2731 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2732 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2734 ztime = window[i].ztime[ig];
2736 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2737 for (j = 0, av = 0; (j < ntot); j++)
2742 for (k = 0; (k < ncorr); k++)
2747 for (j = 0; (j < ntot); j += restart)
2749 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2751 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2753 /* corrSq[k] += tmp*tmp; */
2757 /* divide by nr of frames for each time displacement */
2758 for (k = 0; (k < ncorr); k++)
2760 /* count probably = (ncorr-k+(restart-1))/restart; */
2761 corr[k] = corr[k]/count[k];
2762 /* variance of autocorrelation function */
2763 /* corrSq[k]=corrSq[k]/count[k]; */
2765 /* normalize such that corr[0] == 0 */
2767 for (k = 0; (k < ncorr); k++)
2770 /* corrSq[k]*=c0*c0; */
2773 /* write ACFs in verbose mode */
2776 for (k = 0; (k < ncorr); k++)
2778 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2780 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2783 /* esimate integrated correlation time, fitting is too unstable */
2784 tausteps = 0.5*corr[0];
2785 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2786 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2788 tausteps += corr[j];
2791 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2792 Kumar et al, eq. 28 ff. */
2793 window[i].tau[ig] = tausteps*dt;
2794 window[i].g[ig] = 1+2*tausteps;
2795 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2806 /* plot IACT along reaction coordinate */
2807 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2808 if (output_env_get_print_xvgr_codes(opt->oenv))
2810 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2811 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2812 for (i = 0; i < nwins; i++)
2814 fprintf(fp, "# %3d ", i);
2815 for (ig = 0; ig < window[i].nPull; ig++)
2817 fprintf(fp, " %11g", window[i].tau[ig]);
2822 for (i = 0; i < nwins; i++)
2824 for (ig = 0; ig < window[i].nPull; ig++)
2826 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2829 if (opt->sigSmoothIact > 0.0)
2831 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2832 opt->sigSmoothIact);
2833 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2834 smoothIact(window, nwins, opt);
2835 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2836 if (output_env_get_print_xvgr_codes(opt->oenv))
2838 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2839 fprintf(fp, "@ s1 symbol color 2\n");
2841 for (i = 0; i < nwins; i++)
2843 for (ig = 0; ig < window[i].nPull; ig++)
2845 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2850 printf("Wrote %s\n", fn);
2854 * compute average and sigma of each umbrella histogram
2856 void averageSigma(t_UmbrellaWindow *window, int nwins)
2859 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2861 for (i = 0; i < nwins; i++)
2863 snew(window[i].aver, window[i].nPull);
2864 snew(window[i].sigma, window[i].nPull);
2866 ntot = window[i].Ntot[0];
2867 for (ig = 0; ig < window[i].nPull; ig++)
2869 ztime = window[i].ztime[ig];
2870 for (k = 0, av = 0.; k < ntot; k++)
2875 for (k = 0, sum2 = 0.; k < ntot; k++)
2880 sig = sqrt(sum2/ntot);
2881 window[i].aver[ig] = av;
2883 /* Note: This estimate for sigma is biased from the limited sampling.
2884 Correct sigma by n/(n-1) where n = number of independent
2885 samples. Only possible if IACT is known.
2889 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2890 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2894 window[i].sigma[ig] = sig;
2896 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2903 * Use histograms to compute average force on pull group.
2905 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2907 int i, j, bins = opt->bins, k;
2908 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2911 dz = (max-min)/bins;
2912 ztot = opt->max-min;
2915 /* Compute average displacement from histograms */
2916 for (j = 0; j < nWindows; ++j)
2918 snew(window[j].forceAv, window[j].nPull);
2919 for (k = 0; k < window[j].nPull; ++k)
2923 for (i = 0; i < opt->bins; ++i)
2925 temp = (1.0*i+0.5)*dz+min;
2926 distance = temp - window[j].pos[k];
2928 { /* in cyclic wham: */
2929 if (distance > ztot_half) /* |distance| < ztot_half */
2933 else if (distance < -ztot_half)
2938 w = window[j].Histo[k][i]/window[j].g[k];
2939 displAv += w*distance;
2941 /* Are we near min or max? We are getting wrong forces from the histgrams since
2942 the histograms are zero outside [min,max). Therefore, assume that the position
2943 on the other side of the histomgram center is equally likely. */
2946 posmirrored = window[j].pos[k]-distance;
2947 if (posmirrored >= max || posmirrored < min)
2949 displAv += -w*distance;
2956 /* average force from average displacement */
2957 window[j].forceAv[k] = displAv*window[j].k[k];
2958 /* sigma from average square displacement */
2959 /* window[j].sigma [k] = sqrt(displAv2); */
2960 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2966 * Check if the complete reaction coordinate is covered by the histograms
2968 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2969 t_UmbrellaOptions *opt)
2971 int i, ig, j, bins = opt->bins, bBoundary;
2972 real avcount = 0, z, relcount, *count;
2973 snew(count, opt->bins);
2975 for (j = 0; j < opt->bins; ++j)
2977 for (i = 0; i < nwins; i++)
2979 for (ig = 0; ig < window[i].nPull; ig++)
2981 count[j] += window[i].Histo[ig][j];
2984 avcount += 1.0*count[j];
2987 for (j = 0; j < bins; ++j)
2989 relcount = count[j]/avcount;
2990 z = (j+0.5)*opt->dz+opt->min;
2991 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
2992 /* check for bins with no data */
2995 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2996 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2998 /* and check for poor sampling */
2999 else if (relcount < 0.005 && !bBoundary)
3001 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3007 /*! \brief Compute initial potential by integrating the average force
3009 * This speeds up the convergence by roughly a factor of 2
3011 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3013 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3014 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3017 dz = (opt->max-min)/bins;
3019 printf("Getting initial potential by integration.\n");
3021 /* Compute average displacement from histograms */
3022 computeAverageForce(window, nWindows, opt);
3024 /* Get force for each bin from all histograms in this bin, or, alternatively,
3025 if no histograms are inside this bin, from the closest histogram */
3028 for (j = 0; j < opt->bins; ++j)
3030 pos = (1.0*j+0.5)*dz+min;
3034 groupmin = winmin = 0;
3035 for (i = 0; i < nWindows; i++)
3037 for (ig = 0; ig < window[i].nPull; ig++)
3039 hispos = window[i].pos[ig];
3040 dist = fabs(hispos-pos);
3041 /* average force within bin */
3045 fAv += window[i].forceAv[ig];
3047 /* at the same time, remember closest histogram */
3056 /* if no histogram found in this bin, use closest histogram */
3063 fAv = window[winmin].forceAv[groupmin];
3067 for (j = 1; j < opt->bins; ++j)
3069 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3072 /* cyclic wham: linearly correct possible offset */
3075 diff = (pot[bins-1]-pot[0])/(bins-1);
3076 for (j = 1; j < opt->bins; ++j)
3083 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3084 for (j = 0; j < opt->bins; ++j)
3086 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3089 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3092 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3093 energy offsets which are usually determined by wham
3094 First: turn pot into probabilities:
3096 for (j = 0; j < opt->bins; ++j)
3098 pot[j] = exp(-pot[j]/(8.314e-3*opt->Temperature));
3100 calc_z(pot, window, nWindows, opt, TRUE);
3106 //! Count number of words in an line
3107 static int wordcount(char *ptr)
3112 if (strlen(ptr) == 0)
3116 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3118 for (i = 0; (ptr[i] != '\0'); i++)
3120 is[cur] = isspace(ptr[i]);
3121 if ((i > 0) && (is[cur] && !is[1-cur]))
3130 /*! \brief Read input file for pull group selection (option -is)
3132 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3134 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3137 int i, iline, n, len = STRLEN, temp;
3138 char *ptr = 0, *tmpbuf = 0;
3139 char fmt[1024], fmtign[1024];
3140 int block = 1, sizenow;
3142 fp = gmx_ffopen(opt->fnGroupsel, "r");
3143 opt->groupsel = NULL;
3148 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3153 if (iline >= sizenow)
3156 srenew(opt->groupsel, sizenow);
3158 opt->groupsel[iline].n = n;
3159 opt->groupsel[iline].nUse = 0;
3160 snew(opt->groupsel[iline].bUse, n);
3163 for (i = 0; i < n; i++)
3165 strcpy(fmt, fmtign);
3167 if (sscanf(ptr, fmt, &temp))
3169 opt->groupsel[iline].bUse[i] = (temp > 0);
3170 if (opt->groupsel[iline].bUse[i])
3172 opt->groupsel[iline].nUse++;
3175 strcat(fmtign, "%*s");
3179 opt->nGroupsel = iline;
3180 if (nTpr != opt->nGroupsel)
3182 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3186 printf("\nUse only these pull groups:\n");
3187 for (iline = 0; iline < nTpr; iline++)
3189 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3190 for (i = 0; i < opt->groupsel[iline].n; i++)
3192 if (opt->groupsel[iline].bUse[i])
3205 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3207 //! Number of elements in fnm (used for command line parsing)
3208 #define NFILE asize(fnm)
3210 //! The main g_wham routine
3211 int gmx_wham(int argc, char *argv[])
3213 const char *desc[] = {
3214 "[THISMODULE] is an analysis program that implements the Weighted",
3215 "Histogram Analysis Method (WHAM). It is intended to analyze",
3216 "output files generated by umbrella sampling simulations to ",
3217 "compute a potential of mean force (PMF).[PAR]",
3219 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3220 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3221 "and, if multiple coordinates need to be analyzed, all used the same",
3222 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3224 "At present, three input modes are supported.",
3226 "* With option [TT]-it[tt], the user provides a file which contains the",
3227 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3228 " AND, with option [TT]-ix[tt], a file which contains file names of",
3229 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3230 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3231 " first pullx, etc.",
3232 "* Same as the previous input mode, except that the the user",
3233 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3234 " From the pull force the position in the umbrella potential is",
3235 " computed. This does not work with tabulated umbrella potentials.",
3236 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3237 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3238 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3239 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3240 " must be similar to the following::",
3243 " # Component selection: 0 0 1",
3245 " # Ref. Group 'TestAtom'",
3246 " # Nr. of pull groups 2",
3247 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3248 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3251 " The number of pull groups, umbrella positions, force constants, and names ",
3252 " may (of course) differ. Following the header, a time column and ",
3253 " a data column for each pull group follows (i.e. the displacement",
3254 " with respect to the umbrella center). Up to four pull groups are possible ",
3255 " per [REF].pdo[ref] file at present.",
3257 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3258 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3259 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3260 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3261 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3262 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3263 "used, groupsel.dat looks like this::",
3269 "By default, the output files are",
3271 "* [TT]-o[tt] PMF output file",
3272 "* [TT]-hist[tt] Histograms output file",
3274 "Always check whether the histograms sufficiently overlap.[PAR]",
3275 "The umbrella potential is assumed to be harmonic and the force constants are ",
3276 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3277 "a tabulated potential can be provided with [TT]-tab[tt].",
3282 "* [TT]-bins[tt] Number of bins used in analysis",
3283 "* [TT]-temp[tt] Temperature in the simulations",
3284 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3285 "* [TT]-auto[tt] Automatic determination of boundaries",
3286 "* [TT]-min,-max[tt] Boundaries of the profile",
3288 "The data points that are used to compute the profile",
3289 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3290 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3291 "umbrella window.[PAR]",
3292 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3293 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3294 "With energy output, the energy in the first bin is defined to be zero. ",
3295 "If you want the free energy at a different ",
3296 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3297 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3298 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3299 "[THISMODULE] will make use of the",
3300 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3301 "reaction coordinate will assumed be be neighbors.[PAR]",
3302 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3303 "which may be useful for, e.g. membranes.",
3308 "If available, the number of OpenMP threads used by g_wham is controlled with [TT]-nt[tt].",
3313 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3314 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3315 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3316 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3317 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3318 "Because the IACTs can be severely underestimated in case of limited ",
3319 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3320 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3321 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3322 "integration of the ACFs while the ACFs are larger 0.05.",
3323 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3324 "less robust) method such as fitting to a double exponential, you can ",
3325 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3326 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3327 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3332 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3333 "otherwise the statistical error may be substantially underestimated. ",
3334 "More background and examples for the bootstrap technique can be found in ",
3335 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3336 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3337 "Four bootstrapping methods are supported and ",
3338 "selected with [TT]-bs-method[tt].",
3340 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3341 " data points, and the bootstrap is carried out by assigning random weights to the ",
3342 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3343 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3344 " statistical error is underestimated.",
3345 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3346 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3347 " (allowing duplication, i.e. sampling with replacement).",
3348 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3349 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3350 " divided into blocks and only histograms within each block are mixed. Note that ",
3351 " the histograms within each block must be representative for all possible histograms, ",
3352 " otherwise the statistical error is underestimated.",
3353 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3354 " such that the generated data points are distributed according the given histograms ",
3355 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3356 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3357 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3358 " Note that this method may severely underestimate the error in case of limited sampling, ",
3359 " that is if individual histograms do not represent the complete phase space at ",
3360 " the respective positions.",
3361 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3362 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3363 " and width of the umbrella histograms. That method yields similar error estimates ",
3364 " like method [TT]traj[tt].",
3366 "Bootstrapping output:",
3368 "* [TT]-bsres[tt] Average profile and standard deviations",
3369 "* [TT]-bsprof[tt] All bootstrapping profiles",
3371 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3372 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3376 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3377 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3378 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3379 static t_UmbrellaOptions opt;
3380 static int nthreads = -1;
3384 { "-nt", FALSE, etINT, {&nthreads},
3385 "Number of threads used by g_wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3387 { "-min", FALSE, etREAL, {&opt.min},
3388 "Minimum coordinate in profile"},
3389 { "-max", FALSE, etREAL, {&opt.max},
3390 "Maximum coordinate in profile"},
3391 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3392 "Determine min and max automatically"},
3393 { "-bins", FALSE, etINT, {&opt.bins},
3394 "Number of bins in profile"},
3395 { "-temp", FALSE, etREAL, {&opt.Temperature},
3397 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3399 { "-v", FALSE, etBOOL, {&opt.verbose},
3401 { "-b", FALSE, etREAL, {&opt.tmin},
3402 "First time to analyse (ps)"},
3403 { "-e", FALSE, etREAL, {&opt.tmax},
3404 "Last time to analyse (ps)"},
3405 { "-dt", FALSE, etREAL, {&opt.dt},
3406 "Analyse only every dt ps"},
3407 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3408 "Write histograms and exit"},
3409 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3410 "Determine min and max and exit (with [TT]-auto[tt])"},
3411 { "-log", FALSE, etBOOL, {&opt.bLog},
3412 "Calculate the log of the profile before printing"},
3413 { "-unit", FALSE, etENUM, {en_unit},
3414 "Energy unit in case of log output" },
3415 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3416 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3417 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3418 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3419 { "-sym", FALSE, etBOOL, {&opt.bSym},
3420 "Symmetrize profile around z=0"},
3421 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3422 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3423 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3424 "Calculate integrated autocorrelation times and use in wham"},
3425 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3426 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3427 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3428 "When computing autocorrelation functions, restart computing every .. (ps)"},
3429 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3430 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3431 "during smoothing"},
3432 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3433 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3434 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3435 "Bootstrap method" },
3436 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3437 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3438 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3439 "Seed for bootstrapping. (-1 = use time)"},
3440 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3441 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3442 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3443 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3444 { "-stepout", FALSE, etINT, {&opt.stepchange},
3445 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3446 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3447 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3451 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3452 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3453 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3454 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3455 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3456 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3457 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3458 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3459 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3460 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3461 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3462 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3465 int i, j, l, nfiles, nwins, nfiles2;
3466 t_UmbrellaHeader header;
3467 t_UmbrellaWindow * window = NULL;
3468 double *profile, maxchange = 1e20;
3469 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3470 char **fninTpr, **fninPull, **fninPdo;
3472 FILE *histout, *profout;
3473 char ylabel[256], title[256];
3476 opt.verbose = FALSE;
3477 opt.bHistOnly = FALSE;
3487 /* bootstrapping stuff */
3489 opt.bsMethod = bsMethod_hist;
3490 opt.tauBootStrap = 0.0;
3492 opt.histBootStrapBlockLength = 8;
3493 opt.bs_verbose = FALSE;
3498 opt.Temperature = 298;
3499 opt.Tolerance = 1e-6;
3500 opt.bBoundsOnly = FALSE;
3502 opt.bCalcTauInt = FALSE;
3503 opt.sigSmoothIact = 0.0;
3504 opt.bAllowReduceIact = TRUE;
3505 opt.bInitPotByIntegration = TRUE;
3506 opt.acTrestart = 1.0;
3507 opt.stepchange = 100;
3508 opt.stepUpdateContrib = 100;
3510 if (!parse_common_args(&argc, argv, 0,
3511 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3516 opt.unit = nenum(en_unit);
3517 opt.bsMethod = nenum(en_bsMethod);
3519 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3521 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3522 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3523 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3524 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3525 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3526 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3527 if (opt.bTab && opt.bPullf)
3529 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3530 "Provide pullx.xvg or pdo files!");
3533 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3535 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3537 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3539 gmx_fatal(FARGS, "g_wham supports three input modes, pullx, pullf, or pdo file input."
3540 "\n\n Check g_wham -h !");
3543 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3544 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3545 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3546 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3547 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3549 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3550 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3551 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3552 if ( (bMinSet || bMaxSet) && bAutoSet)
3554 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3557 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3559 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3562 if (bMinSet && opt.bAuto)
3564 printf("Note: min and max given, switching off -auto.\n");
3568 if (opt.bTauIntGiven && opt.bCalcTauInt)
3570 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3571 "the autocorrelation times. Not both.");
3574 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3576 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3577 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3579 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3581 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3582 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3585 /* Set # of OpenMP threads */
3588 gmx_omp_set_num_threads(nthreads);
3592 nthreads = gmx_omp_get_max_threads();
3596 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3599 /* Reading gmx4 pull output and tpr files */
3600 if (opt.bTpr || opt.bPullf || opt.bPullx)
3602 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3604 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3605 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3606 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3607 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3608 if (nfiles != nfiles2)
3610 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3611 opt.fnTpr, nfiles2, fnPull);
3614 /* Read file that selects the pull group to be used */
3615 if (opt.fnGroupsel != NULL)
3617 readPullGroupSelection(&opt, fninTpr, nfiles);
3620 window = initUmbrellaWindows(nfiles);
3621 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3624 { /* reading pdo files */
3625 if (opt.fnGroupsel != NULL)
3627 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3628 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3630 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3631 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3632 window = initUmbrellaWindows(nfiles);
3633 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3637 /* enforce equal weight for all histograms? */
3640 enforceEqualWeights(window, nwins);
3643 /* write histograms */
3644 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3645 xlabel, "count", opt.oenv);
3646 for (l = 0; l < opt.bins; ++l)
3648 fprintf(histout, "%e\t", (double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3649 for (i = 0; i < nwins; ++i)
3651 for (j = 0; j < window[i].nPull; ++j)
3653 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3656 fprintf(histout, "\n");
3659 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3662 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3666 /* Using tabulated umbrella potential */
3669 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3672 /* Integrated autocorrelation times provided ? */
3673 if (opt.bTauIntGiven)
3675 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3678 /* Compute integrated autocorrelation times */
3679 if (opt.bCalcTauInt)
3681 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3684 /* calc average and sigma for each histogram
3685 (maybe required for bootstrapping. If not, this is fast anyhow) */
3686 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3688 averageSigma(window, nwins);
3691 /* Get initial potential by simple integration */
3692 if (opt.bInitPotByIntegration)
3694 guessPotByIntegration(window, nwins, &opt);
3697 /* Check if complete reaction coordinate is covered */
3698 checkReactionCoordinateCovered(window, nwins, &opt);
3700 /* Calculate profile */
3701 snew(profile, opt.bins);
3709 if ( (i%opt.stepUpdateContrib) == 0)
3711 setup_acc_wham(profile, window, nwins, &opt);
3713 if (maxchange < opt.Tolerance)
3716 /* if (opt.verbose) */
3717 printf("Switched to exact iteration in iteration %d\n", i);
3719 calc_profile(profile, window, nwins, &opt, bExact);
3720 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3722 printf("\t%4d) Maximum change %e\n", i, maxchange);
3726 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3727 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3729 /* calc error from Kumar's formula */
3730 /* Unclear how the error propagates along reaction coordinate, therefore
3732 /* calc_error_kumar(profile,window, nwins,&opt); */
3734 /* Write profile in energy units? */
3737 prof_normalization_and_unit(profile, &opt);
3738 strcpy(ylabel, en_unit_label[opt.unit]);
3739 strcpy(title, "Umbrella potential");
3743 strcpy(ylabel, "Density of states");
3744 strcpy(title, "Density of states");
3747 /* symmetrize profile around z=0? */
3750 symmetrizeProfile(profile, &opt);
3753 /* write profile or density of states */
3754 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3755 for (i = 0; i < opt.bins; ++i)
3757 fprintf(profout, "%e\t%e\n", (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3760 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3762 /* Bootstrap Method */
3765 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3766 opt2fn("-hist", NFILE, fnm),
3767 ylabel, profile, window, nwins, &opt);
3771 freeUmbrellaWindows(window, nfiles);
3773 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
3774 please_cite(stdout, "Hub2010");