2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
42 * \author Jochen Hub <jhub@gwdg.de>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull_params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/path.h"
80 #include "gromacs/utility/pleasecite.h"
81 #include "gromacs/utility/smalloc.h"
83 //! longest file names allowed in input files
84 #define WHAM_MAXFILELEN 2048
87 * enum for energy units
98 * enum for type of input files (tpr or pullx/pullf)
107 /*! \brief enum for bootstrapping method
109 * These bootstrap methods are supported:
110 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
111 * (bsMethod_BayesianHist)
112 * - bootstrap complete histograms (bsMethod_hist)
113 * - bootstrap trajectories from given umbrella histograms. This generates new
114 * "synthetic" histograms (bsMethod_traj)
115 * - bootstrap trajectories from Gaussian with mu/sigma computed from
116 * the respective histogram (bsMethod_trajGauss). This gives very similar
117 * results compared to bsMethod_traj.
119 * ********************************************************************
120 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
121 * JS Hub, BL de Groot, D van der Spoel
122 * g_wham - A free weighted histogram analysis implementation including
123 * robust error and autocorrelation estimates,
124 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
125 * ********************************************************************
130 bsMethod_BayesianHist,
136 //! Parameters of one pull coordinate
139 PullingAlgorithm pull_type; //!< such as constraint, umbrella, ...
140 PullGroupGeometry geometry; //!< such as distance, direction, cylinder
141 int ngroup; //!< the number of pull groups involved
142 ivec dim; //!< pull dimension with geometry distance
143 int ndim; //!< nr of pull_dim != 0
144 real k; //!< force constants in tpr file
145 real init_dist; //!< reference displacement
146 char coord_unit[256]; //!< unit of the displacement
149 //! Parameters of the umbrella potentials
153 * \name Using umbrella pull code since gromacs 4.x
156 int npullcrds; //!< nr of umbrella pull coordinates for reading
157 t_pullcoord* pcrd; //!< the pull coordinates
158 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
159 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
160 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
165 //! Data in the umbrella histograms
168 int nPull; //!< nr of pull groups in this pullf/pullx file
169 double** Histo; //!< nPull histograms
170 double** cum; //!< nPull cumulative distribution functions
171 int nBin; //!< nr of bins. identical to opt->bins
172 double* k; //!< force constants for the nPull coords
173 double* pos; //!< umbrella positions for the nPull coords
174 double* z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
175 int* N; //!< nr of data points in nPull histograms.
176 int* Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
178 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
180 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
181 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
184 double* tau; //!< intetrated autocorrelation time (IACT)
185 double* tausmooth; //!< smoothed IACT
187 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
189 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
191 real** ztime; //!< input data z(t) as a function of time. Required to compute ACTs
193 /*! \brief average force estimated from average displacement, fAv=dzAv*k
195 * Used for integration to guess the potential.
198 real* aver; //!< average of histograms
199 real* sigma; //!< stddev of histograms
200 double* bsWeight; //!< for bootstrapping complete histograms with continuous weights
203 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
206 int n; //!< total nr of pull coords in this tpr file
207 int nUse; //!< nr of pull coords used
208 gmx_bool* bUse; //!< boolean array of size n. =1 if used, =0 if not
211 //! Parameters of WHAM
212 typedef struct UmbrellaOptions // NOLINT(clang-analyzer-optin.performance.Padding)
218 const char *fnTpr, *fnPullf, *fnCoordSel;
219 const char* fnPullx; //!< file names of input
220 gmx_bool bTpr, bPullf, bPullx; //!< input file types given?
221 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
223 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
224 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
225 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
226 t_coordselection* coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
229 * \name Basic WHAM options
232 int bins; //!< nr of bins, min, max, and dz of profile
234 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
235 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
238 * \name Output control
241 gmx_bool bLog; //!< energy output (instead of probability) for profile
242 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
243 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
244 /*! \brief after wham, set prof to zero at this z-position.
245 * When bootstrapping, set zProf0 to a "stable" reference position.
248 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
250 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
251 gmx_bool bAuto; //!< determine min and max automatically but do not exit
253 gmx_bool verbose; //!< more noisy wham mode
254 int stepchange; //!< print maximum change in prof after how many interations
255 gmx_output_env_t* oenv; //!< xvgr options
258 * \name Autocorrelation stuff
261 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
262 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
263 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
264 real acTrestart; //!< when computing ACT, time between restarting points
266 /* \brief Enforce the same weight for each umbella window, that is
267 * calculate with the same number of data points for
268 * each window. That can be reasonable, if the histograms
269 * have different length, but due to autocorrelation,
270 * a longer simulation should not have larger weightin wham.
276 * \name Bootstrapping stuff
279 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
281 /* \brief bootstrap method
283 * if == bsMethod_hist, consider complete histograms as independent
284 * data points and, hence, only mix complete histograms.
285 * if == bsMethod_BayesianHist, consider complete histograms
286 * as independent data points, but assign random weights
287 * to the histograms during the bootstrapping ("Bayesian bootstrap")
288 * In case of long correlations (e.g., inside a channel), these
289 * will yield a more realistic error.
290 * if == bsMethod_traj(Gauss), generate synthetic histograms
292 * histogram by generating an autocorrelated random sequence
293 * that is distributed according to the respective given
294 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
295 * (instead of from the umbrella histogram) to generate a new
300 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
303 /* \brief when mixing histograms, mix only histograms withing blocks
304 long the reaction coordinate xi. Avoids gaps along xi. */
305 int histBootStrapBlockLength;
307 int bsSeed; //!< random seed for bootstrapping
309 /* \brief Write cumulative distribution functions (CDFs) of histograms
310 and write the generated histograms for each bootstrap */
314 * \name tabulated umbrella potential stuff
318 double * tabX, *tabY, tabMin, tabMax, tabDz;
321 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
322 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
325 //! Make an umbrella window (may contain several histograms)
326 static t_UmbrellaWindow* initUmbrellaWindows(int nwin)
328 t_UmbrellaWindow* win;
331 for (i = 0; i < nwin; i++)
333 win[i].Histo = win[i].cum = nullptr;
334 win[i].k = win[i].pos = win[i].z = nullptr;
335 win[i].N = win[i].Ntot = nullptr;
336 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
337 win[i].bContrib = nullptr;
338 win[i].ztime = nullptr;
339 win[i].forceAv = nullptr;
340 win[i].aver = win[i].sigma = nullptr;
341 win[i].bsWeight = nullptr;
346 //! Delete an umbrella window (may contain several histograms)
347 static void freeUmbrellaWindows(t_UmbrellaWindow* win, int nwin)
350 for (i = 0; i < nwin; i++)
354 for (j = 0; j < win[i].nPull; j++)
356 sfree(win[i].Histo[j]);
361 for (j = 0; j < win[i].nPull; j++)
363 sfree(win[i].cum[j]);
368 for (j = 0; j < win[i].nPull; j++)
370 sfree(win[i].bContrib[j]);
382 sfree(win[i].tausmooth);
383 sfree(win[i].bContrib);
385 sfree(win[i].forceAv);
388 sfree(win[i].bsWeight);
394 * Read and setup tabulated umbrella potential
396 static void setup_tab(const char* fn, t_UmbrellaOptions* opt)
401 printf("Setting up tabulated potential from file %s\n", fn);
402 nl = read_xvg(fn, &y, &ny);
406 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
408 opt->tabMin = y[0][0];
409 opt->tabMax = y[0][nl - 1];
410 opt->tabDz = (opt->tabMax - opt->tabMin) / (nl - 1);
414 "The tabulated potential in %s must be provided in \n"
415 "ascending z-direction",
418 for (i = 0; i < nl - 1; i++)
420 if (std::abs(y[0][i + 1] - y[0][i] - opt->tabDz) > opt->tabDz / 1e6)
422 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
427 for (i = 0; i < nl; i++)
429 opt->tabX[i] = y[0][i];
430 opt->tabY[i] = y[1][i];
432 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
439 static char* fgets3(FILE* fp, char ptr[], int* len)
444 if (fgets(ptr, *len - 1, fp) == nullptr)
449 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
451 /* This line is longer than len characters, let's increase len! */
455 if (fgets(p - 1, STRLEN, fp) == nullptr)
460 slen = std::strlen(ptr);
461 if (ptr[slen - 1] == '\n')
463 ptr[slen - 1] = '\0';
469 /*! \brief Set identical weights for all histograms
471 * Normally, the weight is given by the number data points in each
472 * histogram, together with the autocorrelation time. This can be overwritten
473 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
474 * an appropriate autocorrelation times instead of using this function.
476 static void enforceEqualWeights(t_UmbrellaWindow* window, int nWindows)
478 int i, k, j, NEnforced;
481 NEnforced = window[0].Ntot[0];
482 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
483 "non-weighted histogram analysis method. Ndata = %d\n",
485 /* enforce all histograms to have the same weight as the very first histogram */
487 for (j = 0; j < nWindows; ++j)
489 for (k = 0; k < window[j].nPull; ++k)
491 ratio = 1.0 * NEnforced / window[j].Ntot[k];
492 for (i = 0; i < window[0].nBin; ++i)
494 window[j].Histo[k][i] *= ratio;
496 window[j].N[k] = gmx::roundToInt(ratio * window[j].N[k]);
501 /*! \brief Simple linear interpolation between two given tabulated points
503 static double tabulated_pot(double dist, t_UmbrellaOptions* opt)
506 double pl, pu, dz, dp;
508 jl = static_cast<int>(std::floor((dist - opt->tabMin) / opt->tabDz));
510 if (jl < 0 || ju >= opt->tabNbins)
513 "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
514 "Provide an extended table.",
521 dz = dist - opt->tabX[jl];
522 dp = (pu - pl) * dz / opt->tabDz;
528 * Check which bins substiantially contribute (accelerates WHAM)
530 * Don't worry, that routine does not mean we compute the PMF in limited precision.
531 * After rapid convergence (using only substiantal contributions), we always switch to
534 static void setup_acc_wham(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
536 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
537 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
538 gmx_bool bAnyContrib;
539 static int bFirst = 1;
540 static double wham_contrib_lim;
544 for (i = 0; i < nWindows; ++i)
546 nGrptot += window[i].nPull;
548 wham_contrib_lim = opt->Tolerance / nGrptot;
551 ztot = opt->max - opt->min;
552 ztot_half = ztot / 2;
554 for (i = 0; i < nWindows; ++i)
556 if (!window[i].bContrib)
558 snew(window[i].bContrib, window[i].nPull);
560 for (j = 0; j < window[i].nPull; ++j)
562 if (!window[i].bContrib[j])
564 snew(window[i].bContrib[j], opt->bins);
567 for (k = 0; k < opt->bins; ++k)
569 temp = (1.0 * k + 0.5) * dz + min;
570 distance = temp - window[i].pos[j]; /* distance to umbrella center */
572 { /* in cyclic wham: */
573 if (distance > ztot_half) /* |distance| < ztot_half */
577 else if (distance < -ztot_half)
582 /* Note: there are two contributions to bin k in the wham equations:
583 i) N[j]*exp(- U/(c_boltz*opt->Temperature) + window[i].z[j])
584 ii) exp(- U/(c_boltz*opt->Temperature))
585 where U is the umbrella potential
586 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
591 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
595 U = tabulated_pot(distance, opt); /* Use tabulated potential */
597 contrib1 = profile[k] * std::exp(-U / (gmx::c_boltz * opt->Temperature));
598 contrib2 = window[i].N[j]
599 * std::exp(-U / (gmx::c_boltz * opt->Temperature) + window[i].z[j]);
600 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
601 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
602 if (window[i].bContrib[j][k])
608 /* If this histo is far outside min and max all bContrib may be FALSE,
609 causing a floating point exception later on. To avoid that, switch
613 for (k = 0; k < opt->bins; ++k)
615 window[i].bContrib[j][k] = TRUE;
622 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
623 "Evaluating only %d of %d expressions.\n\n",
631 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n", nContrib, nTot);
636 //! Compute the PMF (one of the two main WHAM routines)
637 static void calc_profile(double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
639 double ztot_half, ztot, min = opt->min, dz = opt->dz;
641 ztot = opt->max - opt->min;
642 ztot_half = ztot / 2;
648 int nthreads = gmx_omp_get_max_threads();
649 int thread_id = gmx_omp_get_thread_num();
651 int i0 = thread_id * opt->bins / nthreads;
652 int i1 = std::min(opt->bins, ((thread_id + 1) * opt->bins) / nthreads);
654 for (i = i0; i < i1; ++i)
657 double num, denom, invg, temp = 0, distance, U = 0;
659 for (j = 0; j < nWindows; ++j)
661 for (k = 0; k < window[j].nPull; ++k)
663 invg = 1.0 / window[j].g[k] * window[j].bsWeight[k];
664 temp = (1.0 * i + 0.5) * dz + min;
665 num += invg * window[j].Histo[k][i];
667 if (!(bExact || window[j].bContrib[k][i]))
671 distance = temp - window[j].pos[k]; /* distance to umbrella center */
673 { /* in cyclic wham: */
674 if (distance > ztot_half) /* |distance| < ztot_half */
678 else if (distance < -ztot_half)
686 U = 0.5 * window[j].k[k] * gmx::square(distance); /* harmonic potential assumed. */
690 U = tabulated_pot(distance, opt); /* Use tabulated potential */
692 denom += invg * window[j].N[k]
693 * std::exp(-U / (gmx::c_boltz * opt->Temperature) + window[j].z[k]);
696 profile[i] = num / denom;
699 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
703 //! Compute the free energy offsets z (one of the two main WHAM routines)
704 static double calc_z(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
706 double min = opt->min, dz = opt->dz, ztot_half, ztot;
707 double maxglob = -1e20;
709 ztot = opt->max - opt->min;
710 ztot_half = ztot / 2;
716 int nthreads = gmx_omp_get_max_threads();
717 int thread_id = gmx_omp_get_thread_num();
719 int i0 = thread_id * nWindows / nthreads;
720 int i1 = std::min(nWindows, ((thread_id + 1) * nWindows) / nthreads);
721 double maxloc = -1e20;
723 for (i = i0; i < i1; ++i)
725 double total = 0, temp, distance, U = 0;
728 for (j = 0; j < window[i].nPull; ++j)
731 for (k = 0; k < window[i].nBin; ++k)
733 if (!(bExact || window[i].bContrib[j][k]))
737 temp = (1.0 * k + 0.5) * dz + min;
738 distance = temp - window[i].pos[j]; /* distance to umbrella center */
740 { /* in cyclic wham: */
741 if (distance > ztot_half) /* |distance| < ztot_half */
745 else if (distance < -ztot_half)
753 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
757 U = tabulated_pot(distance, opt); /* Use tabulated potential */
759 total += profile[k] * std::exp(-U / (gmx::c_boltz * opt->Temperature));
761 /* Avoid floating point exception if window is far outside min and max */
764 total = -std::log(total);
770 temp = std::abs(total - window[i].z[j]);
775 window[i].z[j] = total;
778 /* Now get maximum maxloc from the threads and put in maxglob */
779 if (maxloc > maxglob)
783 if (maxloc > maxglob)
790 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
796 //! Make PMF symmetric around 0 (useful e.g. for membranes)
797 static void symmetrizeProfile(double* profile, t_UmbrellaOptions* opt)
799 int i, j, bins = opt->bins;
800 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
803 if (min > 0. || max < 0.)
805 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n", opt->min, opt->max);
810 for (i = 0; i < bins; i++)
812 z = min + (i + 0.5) * dz;
814 /* bin left of zsym */
815 j = gmx::roundToInt((zsym - min) / dz) - 1;
816 if (j >= 0 && (j + 1) < bins)
818 /* interpolate profile linearly between bins j and j+1 */
819 z1 = min + (j + 0.5) * dz;
821 profsym = profile[j] + (profile[j + 1] - profile[j]) / dz * deltaz;
822 /* average between left and right */
823 prof2[i] = 0.5 * (profsym + profile[i]);
827 prof2[i] = profile[i];
831 std::memcpy(profile, prof2, bins * sizeof(double));
835 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
836 static void prof_normalization_and_unit(double* profile, t_UmbrellaOptions* opt)
839 double unit_factor = 1., diff;
843 /* Not log? Nothing to do! */
849 /* Get profile in units of kT, kJ/mol, or kCal/mol */
850 if (opt->unit == en_kT)
854 else if (opt->unit == en_kJ)
856 unit_factor = gmx::c_boltz * opt->Temperature;
858 else if (opt->unit == en_kCal)
860 unit_factor = (gmx::c_boltz / gmx::c_cal2Joule) * opt->Temperature;
864 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
867 for (i = 0; i < bins; i++)
869 if (profile[i] > 0.0)
871 profile[i] = -std::log(profile[i]) * unit_factor;
875 /* shift to zero at z=opt->zProf0 */
882 /* Get bin with shortest distance to opt->zProf0
883 (-0.5 from bin position and +0.5 from rounding cancel) */
884 imin = static_cast<int>((opt->zProf0 - opt->min) / opt->dz);
889 else if (imin >= bins)
893 diff = profile[imin];
897 for (i = 0; i < bins; i++)
903 //! Make an array of random integers (used for bootstrapping)
904 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine* rng)
906 gmx::UniformIntDistribution<int> dist(0, blockLength - 1);
908 int ipull, blockBase, nr, ipullRandom;
910 if (blockLength == 0)
915 for (ipull = 0; ipull < nPull; ipull++)
917 blockBase = (ipull / blockLength) * blockLength;
919 { /* make sure nothing bad happens in the last block */
920 nr = dist(*rng); // [0,blockLength-1]
921 ipullRandom = blockBase + nr;
922 } while (ipullRandom >= nPull);
923 if (ipullRandom < 0 || ipullRandom >= nPull)
926 "Ups, random iWin = %d, nPull = %d, nr = %d, "
927 "blockLength = %d, blockBase = %d\n",
934 randomArray[ipull] = ipullRandom;
938 /*! \brief Set pull group information of a synthetic histogram
940 * This is used when bootstapping new trajectories and thereby create new histogtrams,
941 * but it is not required if we bootstrap complete histograms.
943 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow* synthWindow, t_UmbrellaWindow* thisWindow, int pullid)
945 synthWindow->N[0] = thisWindow->N[pullid];
946 synthWindow->Histo[0] = thisWindow->Histo[pullid];
947 synthWindow->pos[0] = thisWindow->pos[pullid];
948 synthWindow->z[0] = thisWindow->z[pullid];
949 synthWindow->k[0] = thisWindow->k[pullid];
950 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
951 synthWindow->g[0] = thisWindow->g[pullid];
952 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
955 /*! \brief Calculate cumulative distribution function of of all histograms.
957 * This allow to create random number sequences
958 * which are distributed according to the histograms. Required to generate
959 * the "synthetic" histograms for the Bootstrap method
961 static void calc_cumulatives(t_UmbrellaWindow* window,
963 t_UmbrellaOptions* opt,
974 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
975 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
979 for (i = 0; i < nWindows; i++)
981 snew(window[i].cum, window[i].nPull);
982 for (j = 0; j < window[i].nPull; j++)
984 snew(window[i].cum[j], nbin + 1);
985 window[i].cum[j][0] = 0.;
986 for (k = 1; k <= nbin; k++)
988 window[i].cum[j][k] = window[i].cum[j][k - 1] + window[i].Histo[j][k - 1];
991 /* normalize CDFs. Ensure cum[nbin]==1 */
992 last = window[i].cum[j][nbin];
993 for (k = 0; k <= nbin; k++)
995 window[i].cum[j][k] /= last;
1000 printf("Cumulative distribution functions of all histograms created.\n");
1001 if (opt->bs_verbose)
1003 for (k = 0; k <= nbin; k++)
1005 fprintf(fp, "%g\t", opt->min + k * opt->dz);
1006 for (i = 0; i < nWindows; i++)
1008 for (j = 0; j < window[i].nPull; j++)
1010 fprintf(fp, "%g\t", window[i].cum[j][k]);
1015 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1021 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1023 * This is used to generate a random sequence distributed according to a histogram
1025 static void searchCumulative(const double xx[], int n, double x, int* j)
1033 jm = (ju + jl) >> 1;
1047 else if (x == xx[n - 1])
1057 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1058 static void create_synthetic_histo(t_UmbrellaWindow* synthWindow,
1059 t_UmbrellaWindow* thisWindow,
1061 t_UmbrellaOptions* opt)
1063 int N, i, nbins, r_index, ibin;
1064 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1067 N = thisWindow->N[pullid];
1068 dt = thisWindow->dt;
1069 nbins = thisWindow->nBin;
1071 /* tau = autocorrelation time */
1072 if (opt->tauBootStrap > 0.0)
1074 tausteps = opt->tauBootStrap / dt;
1076 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1078 /* calc tausteps from g=1+2tausteps */
1079 g = thisWindow->g[pullid];
1080 tausteps = (g - 1) / 2;
1085 "When generating hypothetical trajectories from given umbrella histograms,\n"
1086 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1087 "cannot be predicted. You have 3 options:\n"
1088 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1089 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1091 " with option -iiact for all umbrella windows.\n"
1092 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1093 " Use option (3) only if you are sure what you're doing, you may severely\n"
1094 " underestimate the error if a too small ACT is given.\n");
1095 gmx_fatal(FARGS, "%s", errstr);
1098 synthWindow->N[0] = N;
1099 synthWindow->pos[0] = thisWindow->pos[pullid];
1100 synthWindow->z[0] = thisWindow->z[pullid];
1101 synthWindow->k[0] = thisWindow->k[pullid];
1102 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1103 synthWindow->g[0] = thisWindow->g[pullid];
1104 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1106 for (i = 0; i < nbins; i++)
1108 synthWindow->Histo[0][i] = 0.;
1111 if (opt->bsMethod == bsMethod_trajGauss)
1113 sig = thisWindow->sigma[pullid];
1114 mu = thisWindow->aver[pullid];
1117 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1119 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1121 z = a*x + sqrt(1-a^2)*y
1122 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1123 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1125 C(t) = exp(-t/tau) with tau=-1/ln(a)
1127 Then, use error function to turn the Gaussian random variable into a uniformly
1128 distributed one in [0,1]. Eventually, use cumulative distribution function of
1129 histogram to get random variables distributed according to histogram.
1130 Note: The ACT of the flat distribution and of the generated histogram is not
1131 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1133 a = std::exp(-1.0 / tausteps);
1134 ap = std::sqrt(1.0 - a * a);
1135 invsqrt2 = 1.0 / std::sqrt(2.0);
1137 /* init random sequence */
1138 x = opt->normalDistribution(opt->rng);
1140 if (opt->bsMethod == bsMethod_traj)
1142 /* bootstrap points from the umbrella histograms */
1143 for (i = 0; i < N; i++)
1145 y = opt->normalDistribution(opt->rng);
1147 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1148 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1150 r = 0.5 * (1 + std::erf(x * invsqrt2));
1151 searchCumulative(thisWindow->cum[pullid], nbins + 1, r, &r_index);
1152 synthWindow->Histo[0][r_index] += 1.;
1155 else if (opt->bsMethod == bsMethod_trajGauss)
1157 /* bootstrap points from a Gaussian with the same average and sigma
1158 as the respective umbrella histogram. The idea was, that -given
1159 limited sampling- the bootstrapped histograms are otherwise biased
1160 from the limited sampling of the US histos. However, bootstrapping from
1161 the Gaussian seems to yield a similar estimate. */
1165 y = opt->normalDistribution(opt->rng);
1168 ibin = static_cast<int>(std::floor((z - opt->min) / opt->dz));
1173 while ((ibin += nbins) < 0) {}
1175 else if (ibin >= nbins)
1177 while ((ibin -= nbins) >= nbins) {}
1181 if (ibin >= 0 && ibin < nbins)
1183 synthWindow->Histo[0][ibin] += 1.;
1190 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1194 /*! \brief Write all histograms to a file
1196 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1197 * sets of bootstrapped histograms.
1199 static void print_histograms(const char* fnhist,
1200 t_UmbrellaWindow* window,
1203 t_UmbrellaOptions* opt,
1206 std::string fn, title;
1212 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1213 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1217 fn = gmx_strdup(fnhist);
1218 title = gmx::formatString("Umbrella histograms");
1221 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1224 /* Write histograms */
1225 for (l = 0; l < bins; ++l)
1227 fprintf(fp, "%e\t", (l + 0.5) * opt->dz + opt->min);
1228 for (i = 0; i < nWindows; ++i)
1230 for (j = 0; j < window[i].nPull; ++j)
1232 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1239 printf("Wrote %s\n", fn.c_str());
1242 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1243 static void setRandomBsWeights(t_UmbrellaWindow* synthwin, int nAllPull, t_UmbrellaOptions* opt)
1247 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1251 /* generate ordered random numbers between 0 and nAllPull */
1252 for (i = 0; i < nAllPull - 1; i++)
1254 r[i] = dist(opt->rng);
1256 std::sort(r, r + nAllPull - 1);
1257 r[nAllPull - 1] = 1.0 * nAllPull;
1259 synthwin[0].bsWeight[0] = r[0];
1260 for (i = 1; i < nAllPull; i++)
1262 synthwin[i].bsWeight[0] = r[i] - r[i - 1];
1265 /* avoid to have zero weight by adding a tiny value */
1266 for (i = 0; i < nAllPull; i++)
1268 if (synthwin[i].bsWeight[0] < 1e-5)
1270 synthwin[i].bsWeight[0] = 1e-5;
1277 //! The main bootstrapping routine
1278 static void do_bootstrapping(const char* fnres,
1284 t_UmbrellaWindow* window,
1286 t_UmbrellaOptions* opt)
1288 t_UmbrellaWindow* synthWindow;
1289 double * bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1290 int i, j, *randomArray = nullptr, winid, pullid, ib;
1291 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1293 gmx_bool bExact = FALSE;
1295 /* init random generator */
1296 if (opt->bsSeed == 0)
1298 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1300 opt->rng.seed(opt->bsSeed);
1302 snew(bsProfile, opt->bins);
1303 snew(bsProfiles_av, opt->bins);
1304 snew(bsProfiles_av2, opt->bins);
1306 /* Create array of all pull groups. Note that different windows
1307 may have different nr of pull groups
1308 First: Get total nr of pull groups */
1310 for (i = 0; i < nWindows; i++)
1312 nAllPull += window[i].nPull;
1314 snew(allPull_winId, nAllPull);
1315 snew(allPull_pullId, nAllPull);
1317 /* Setup one array of all pull groups */
1318 for (i = 0; i < nWindows; i++)
1320 for (j = 0; j < window[i].nPull; j++)
1322 allPull_winId[iAllPull] = i;
1323 allPull_pullId[iAllPull] = j;
1328 /* setup stuff for synthetic windows */
1329 snew(synthWindow, nAllPull);
1330 for (i = 0; i < nAllPull; i++)
1332 synthWindow[i].nPull = 1;
1333 synthWindow[i].nBin = opt->bins;
1334 snew(synthWindow[i].Histo, 1);
1335 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1337 snew(synthWindow[i].Histo[0], opt->bins);
1339 snew(synthWindow[i].N, 1);
1340 snew(synthWindow[i].pos, 1);
1341 snew(synthWindow[i].z, 1);
1342 snew(synthWindow[i].k, 1);
1343 snew(synthWindow[i].bContrib, 1);
1344 snew(synthWindow[i].g, 1);
1345 snew(synthWindow[i].bsWeight, 1);
1348 switch (opt->bsMethod)
1351 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1352 please_cite(stdout, "Hub2006");
1354 case bsMethod_BayesianHist:
1355 /* just copy all histogams into synthWindow array */
1356 for (i = 0; i < nAllPull; i++)
1358 winid = allPull_winId[i];
1359 pullid = allPull_pullId[i];
1360 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1364 case bsMethod_trajGauss: calc_cumulatives(window, nWindows, opt, fnhist, xlabel); break;
1365 default: gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1368 /* do bootstrapping */
1369 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1370 for (ib = 0; ib < opt->nBootStrap; ib++)
1372 printf(" *******************************************\n"
1373 " ******** Start bootstrap nr %d ************\n"
1374 " *******************************************\n",
1377 switch (opt->bsMethod)
1380 /* bootstrap complete histograms from given histograms */
1381 srenew(randomArray, nAllPull);
1382 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1383 for (i = 0; i < nAllPull; i++)
1385 winid = allPull_winId[randomArray[i]];
1386 pullid = allPull_pullId[randomArray[i]];
1387 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1390 case bsMethod_BayesianHist:
1391 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1392 setRandomBsWeights(synthWindow, nAllPull, opt);
1395 case bsMethod_trajGauss:
1396 /* create new histos from given histos, that is generate new hypothetical
1398 for (i = 0; i < nAllPull; i++)
1400 winid = allPull_winId[i];
1401 pullid = allPull_pullId[i];
1402 create_synthetic_histo(synthWindow + i, window + winid, pullid, opt);
1407 /* write histos in case of verbose output */
1408 if (opt->bs_verbose)
1410 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1417 std::memcpy(bsProfile, profile, opt->bins * sizeof(double)); /* use profile as guess */
1420 if ((i % opt->stepUpdateContrib) == 0)
1422 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1424 if (maxchange < opt->Tolerance)
1428 if (((i % opt->stepchange) == 0 || i == 1) && i != 0)
1430 printf("\t%4d) Maximum change %e\n", i, maxchange);
1432 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1434 } while ((maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance
1436 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1440 prof_normalization_and_unit(bsProfile, opt);
1443 /* symmetrize profile around z=0 */
1446 symmetrizeProfile(bsProfile, opt);
1449 /* save stuff to get average and stddev */
1450 for (i = 0; i < opt->bins; i++)
1453 bsProfiles_av[i] += tmp;
1454 bsProfiles_av2[i] += tmp * tmp;
1455 fprintf(fp, "%e\t%e\n", (i + 0.5) * opt->dz + opt->min, tmp);
1457 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1461 /* write average and stddev */
1462 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1463 if (output_env_get_print_xvgr_codes(opt->oenv))
1465 fprintf(fp, "@TYPE xydy\n");
1467 for (i = 0; i < opt->bins; i++)
1469 bsProfiles_av[i] /= opt->nBootStrap;
1470 bsProfiles_av2[i] /= opt->nBootStrap;
1471 tmp = bsProfiles_av2[i] - gmx::square(bsProfiles_av[i]);
1472 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1473 fprintf(fp, "%e\t%e\t%e\n", (i + 0.5) * opt->dz + opt->min, bsProfiles_av[i], stddev);
1476 printf("Wrote boot strap result to %s\n", fnres);
1479 //! Return type of input file based on file extension (xvg or tpr)
1480 static int whaminFileType(char* fn)
1483 len = std::strlen(fn);
1484 if (std::strcmp(fn + len - 3, "tpr") == 0)
1488 else if (std::strcmp(fn + len - 3, "xvg") == 0 || std::strcmp(fn + len - 6, "xvg.gz") == 0)
1490 return whamin_pullxf;
1495 "Unknown file type of %s. Should be tpr or xvg. Use GROMACS 2021 or earlier to "
1496 "read pdo files.\n",
1501 //! Read the files names in pullf/pullx-files.dat, tpr-files.dat
1502 static void read_wham_in(const char* fn, char*** filenamesRet, int* nfilesRet, t_UmbrellaOptions* opt)
1504 char **filename = nullptr, tmp[WHAM_MAXFILELEN + 2];
1505 int nread, sizenow, i, block = 1;
1508 fp = gmx_ffopen(fn, "r");
1511 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1513 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1515 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1517 if (nread >= sizenow)
1520 srenew(filename, sizenow);
1521 for (i = sizenow - block; i < sizenow; i++)
1523 snew(filename[i], WHAM_MAXFILELEN);
1526 /* remove newline if there is one */
1527 if (tmp[std::strlen(tmp) - 1] == '\n')
1529 tmp[std::strlen(tmp) - 1] = '\0';
1531 std::strcpy(filename[nread], tmp);
1534 printf("Found file %s in %s\n", filename[nread], fn);
1538 *filenamesRet = filename;
1542 //! translate 0/1 to N/Y to write pull dimensions
1543 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
1545 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
1546 static void read_tpr_header(const char* fn, t_UmbrellaHeader* header, t_UmbrellaOptions* opt, t_coordselection* coordsel)
1548 t_inputrec irInstance;
1549 t_inputrec* ir = &irInstance;
1551 static int first = 1;
1553 /* printf("Reading %s \n",fn); */
1554 read_tpx_state(fn, ir, &state, nullptr);
1558 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
1560 if (ir->pull->ncoord == 0)
1562 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
1565 /* Read overall pull info */
1566 header->npullcrds = ir->pull->ncoord;
1567 header->bPrintCOM = ir->pull->bPrintCOM;
1568 header->bPrintRefValue = ir->pull->bPrintRefValue;
1569 header->bPrintComp = ir->pull->bPrintComp;
1571 /* Read pull coordinates */
1572 snew(header->pcrd, header->npullcrds);
1573 for (int i = 0; i < ir->pull->ncoord; i++)
1575 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
1576 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
1577 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
1578 /* Angle type coordinates are handled fully in degrees in gmx wham.
1579 * The pull "distance" appears with a power of -2 in the force constant,
1580 * so we need to multiply with the internal units (radians for angle)
1581 * to user units (degrees for an angle) with the same power.
1583 header->pcrd[i].k = ir->pull->coord[i].k
1584 / gmx::square(pull_conversion_factor_internal2userinput(ir->pull->coord[i]));
1585 header->pcrd[i].init_dist = ir->pull->coord[i].init;
1587 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
1588 header->pcrd[i].ndim =
1589 header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
1591 std::strcpy(header->pcrd[i].coord_unit, pull_coordinate_units(ir->pull->coord[i]));
1593 if (ir->efep != FreeEnergyPerturbationType::No && ir->pull->coord[i].k != ir->pull->coord[i].kB)
1596 "Seems like you did free-energy perturbation, and you perturbed the force "
1598 " This is not supported.\n");
1600 if (coordsel && (coordsel->n != ir->pull->ncoord))
1603 "Found %d pull coordinates in %s, but %d columns in the respective line\n"
1604 "coordinate selection file (option -is)\n",
1611 /* Check pull coords for consistency */
1612 PullGroupGeometry geom = PullGroupGeometry::Count;
1613 ivec thedim = { 0, 0, 0 };
1614 bool geometryIsSet = false;
1615 for (int i = 0; i < ir->pull->ncoord; i++)
1617 if (coordsel == nullptr || coordsel->bUse[i])
1619 if (header->pcrd[i].pull_type != PullingAlgorithm::Umbrella)
1622 "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only "
1623 "umbrella coodinates can enter WHAM.\n"
1624 "If you have umbrella and non-umbrella coordinates, you can select the "
1625 "umbrella coordinates with gmx wham -is\n",
1628 enumValueToString(header->pcrd[i].pull_type));
1632 geom = header->pcrd[i].geometry;
1633 copy_ivec(header->pcrd[i].dim, thedim);
1634 geometryIsSet = true;
1636 if (geom != header->pcrd[i].geometry)
1639 "%s: Your pull coordinates have different pull geometry (coordinate 1: "
1640 "%s, coordinate %d: %s)\n"
1641 "If you want to use only some pull coordinates in WHAM, please select "
1642 "them with option gmx wham -is\n",
1644 enumValueToString(geom),
1646 enumValueToString(header->pcrd[i].geometry));
1648 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY]
1649 || thedim[ZZ] != header->pcrd[i].dim[ZZ])
1652 "%s: Your pull coordinates have different pull dimensions (coordinate 1: "
1653 "%s %s %s, coordinate %d: %s %s %s)\n"
1654 "If you want to use only some pull coordinates in WHAM, please select "
1655 "them with option gmx wham -is\n",
1661 int2YN(header->pcrd[i].dim[XX]),
1662 int2YN(header->pcrd[i].dim[YY]),
1663 int2YN(header->pcrd[i].dim[ZZ]));
1665 if (header->pcrd[i].geometry == PullGroupGeometry::Cylinder)
1667 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
1671 "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
1672 "However, found dimensions [%s %s %s]\n",
1673 int2YN(header->pcrd[i].dim[XX]),
1674 int2YN(header->pcrd[i].dim[YY]),
1675 int2YN(header->pcrd[i].dim[ZZ]));
1678 if (header->pcrd[i].k <= 0.0)
1681 "%s: Pull coordinate %d has force constant of of %g.\n"
1682 "That doesn't seem to be an Umbrella tpr.\n",
1690 if (opt->verbose || first)
1692 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
1694 for (int i = 0; i < ir->pull->ncoord; i++)
1696 int lentmp = strlen(enumValueToString(header->pcrd[i].geometry));
1697 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
1701 "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d "
1702 "dimensions). Used: %%s\n",
1704 for (int i = 0; i < ir->pull->ncoord; i++)
1706 bool use = (coordsel == nullptr || coordsel->bUse[i]);
1708 enumValueToString(header->pcrd[i].geometry),
1710 header->pcrd[i].init_dist,
1711 int2YN(header->pcrd[i].dim[XX]),
1712 int2YN(header->pcrd[i].dim[YY]),
1713 int2YN(header->pcrd[i].dim[ZZ]),
1714 header->pcrd[i].ndim,
1715 use ? "Yes" : "No");
1716 printf("\tPull group coordinates of %d groups expected in pullx files.\n",
1717 ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
1719 printf("\tReference value of the coordinate%s expected in pullx files.\n",
1720 header->bPrintRefValue ? "" : " not");
1722 if (!opt->verbose && first)
1724 printf("\tUse option -v to see this output for all input tpr files\n\n");
1730 //! Read pullx.xvg or pullf.xvg
1731 static void read_pull_xf(const char* fn,
1732 t_UmbrellaHeader* header,
1733 t_UmbrellaWindow* window,
1734 t_UmbrellaOptions* opt,
1735 gmx_bool bGetMinMax,
1738 t_coordselection* coordsel)
1740 double ** y = nullptr, pos = 0., t, force, time0 = 0., dt;
1741 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
1742 int nColExpect, ntot, column;
1743 real min, max, minfound = 1e20, maxfound = -1e20;
1744 gmx_bool dt_ok, timeok;
1745 const char* quantity;
1746 const int blocklen = 4096;
1747 int* lennow = nullptr;
1748 static gmx_bool bFirst = TRUE;
1751 * Data columns in pull output:
1752 * - in force output pullf.xvg:
1753 * No reference columns, one column per pull coordinate
1755 * - in position output pullx.xvg:
1756 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
1757 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
1758 * be written separately into pullx file, but this is not supported and throws an error below;
1759 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
1762 if (header->bPrintComp && opt->bPullx)
1766 "gmx wham cannot read pullx files if the components of the coordinate was written\n"
1767 "(mdp option pull-print-components). Provide the pull force files instead (with "
1771 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
1772 snew(nColThisCrd, header->npullcrds);
1773 snew(nColCOMCrd, header->npullcrds);
1774 snew(nColRefCrd, header->npullcrds);
1778 /* pullf reading: simply one column per coordinate */
1779 for (g = 0; g < header->npullcrds; g++)
1788 /* pullx reading. Note explanation above. */
1789 for (g = 0; g < header->npullcrds; g++)
1791 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
1792 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim * header->pcrd[g].ngroup : 0);
1793 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
1797 nColExpect = 1; /* time column */
1798 for (g = 0; g < header->npullcrds; g++)
1800 nColExpect += nColThisCrd[g];
1803 /* read pullf or pullx. Could be optimized if min and max are given. */
1804 nt = read_xvg(fn, &y, &ny);
1806 /* Check consistency */
1807 quantity = opt->bPullx ? "position" : "force";
1810 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
1812 if (bFirst || opt->verbose)
1814 // Note the comment above about the pull file column format!
1815 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
1816 printf("\tColumn for time: 1\n");
1818 for (i = 0; i < header->npullcrds; i++)
1820 printf("\tColumn(s) with data for pull coordinate %d are\n", i + 1);
1821 if (nColCOMCrd[i] > 0)
1823 const int firstColumnForCOM = nextColumn;
1824 const int lastColumnForCOM = firstColumnForCOM + nColCOMCrd[i] - 1;
1825 const int columnForThisCoordinate = lastColumnForCOM + 1;
1826 printf("\t\treaction coordinate: %d\n"
1827 "\t\tcenter-of-mass of groups: %d through %d\n",
1828 columnForThisCoordinate,
1831 nextColumn = columnForThisCoordinate + 1;
1835 const int columnForThisCoordinate = nextColumn;
1836 printf("\t\treaction coordinate: %d\n"
1837 "\t\tcenter-of-mass of groups: No\n",
1838 columnForThisCoordinate);
1841 if (header->bPrintRefValue)
1843 const int columnForRefValue = nextColumn;
1844 printf("\t\treference position column: %d\n", columnForRefValue);
1849 printf("\t\treference position column: No\n");
1852 printf("\tFound %d times in %s\n", nt, fn);
1855 if (nColExpect != ny)
1858 "Expected %d columns (including time column) in %s, but found %d."
1859 " Maybe you confused options -if and -ix ?",
1873 window->dt = y[0][1] - y[0][0];
1875 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
1877 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
1880 /* Need to alocate memory and set up structure for windows */
1883 /* Use only groups selected with option -is file */
1884 if (header->npullcrds != coordsel->n)
1887 "tpr file contains %d pull groups, but expected %d from group selection "
1892 window->nPull = coordsel->nUse;
1896 window->nPull = header->npullcrds;
1899 window->nBin = bins;
1900 snew(window->Histo, window->nPull);
1901 snew(window->z, window->nPull);
1902 snew(window->k, window->nPull);
1903 snew(window->pos, window->nPull);
1904 snew(window->N, window->nPull);
1905 snew(window->Ntot, window->nPull);
1906 snew(window->g, window->nPull);
1907 snew(window->bsWeight, window->nPull);
1908 window->bContrib = nullptr;
1910 if (opt->bCalcTauInt)
1912 snew(window->ztime, window->nPull);
1916 window->ztime = nullptr;
1918 snew(lennow, window->nPull);
1920 for (g = 0; g < window->nPull; ++g)
1923 window->bsWeight[g] = 1.;
1925 window->Ntot[g] = 0;
1927 snew(window->Histo[g], bins);
1929 if (opt->bCalcTauInt)
1931 window->ztime[g] = nullptr;
1935 /* Copying umbrella center and force const is more involved since not
1936 all pull groups from header (tpr file) may be used in window variable */
1937 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
1939 if (coordsel && !coordsel->bUse[g])
1943 window->k[gUsed] = header->pcrd[g].k;
1944 window->pos[gUsed] = header->pcrd[g].init_dist;
1949 { /* only determine min and max */
1952 min = max = bins = 0; /* Get rid of warnings */
1956 for (i = 0; i < nt; i++)
1958 /* Do you want that time frame? */
1959 t = 1.0 / 1000 * (gmx::roundToInt64((y[0][i] * 1000))); /* round time to fs */
1961 /* get time step and get dstep from opt->dt */
1971 dstep = gmx::roundToInt(opt->dt / dt);
1979 window->dt = dt * dstep;
1983 dt_ok = (i % dstep == 0);
1984 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
1986 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
1987 t,opt->tmin, opt->tmax, dt_ok,timeok); */
1990 /* Note: if coordsel == NULL:
1991 * all groups in pullf/x file are stored in this window, and gUsed == g
1992 * if coordsel != NULL:
1993 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
1996 for (g = 0; g < header->npullcrds; ++g)
1998 /* was this group selected for application in WHAM? */
1999 if (coordsel && !coordsel->bUse[g])
2007 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2008 force = y[g + 1][i];
2009 pos = -force / header->pcrd[g].k + header->pcrd[g].init_dist;
2013 /* Pick the correct column index.
2014 Note that there is always exactly one displacement column.
2017 for (int j = 0; j < g; j++)
2019 column += nColThisCrd[j];
2024 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2038 if (gUsed >= window->nPull)
2041 "gUsed too large (%d, nPull=%d). This error should have been "
2047 if (opt->bCalcTauInt && !bGetMinMax)
2049 /* save time series for autocorrelation analysis */
2050 ntot = window->Ntot[gUsed];
2051 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2052 if (ntot >= lennow[gUsed])
2054 lennow[gUsed] += blocklen;
2055 srenew(window->ztime[gUsed], lennow[gUsed]);
2057 window->ztime[gUsed][ntot] = pos;
2060 ibin = static_cast<int>(std::floor((pos - min) / (max - min) * bins));
2065 while ((ibin += bins) < 0) {}
2067 else if (ibin >= bins)
2069 while ((ibin -= bins) >= bins) {}
2072 if (ibin >= 0 && ibin < bins)
2074 window->Histo[gUsed][ibin] += 1.;
2077 window->Ntot[gUsed]++;
2081 else if (t > opt->tmax)
2085 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2097 for (i = 0; i < ny; i++)
2103 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2104 static void read_tpr_pullxf_files(char** fnTprs,
2107 t_UmbrellaHeader* header,
2108 t_UmbrellaWindow* window,
2109 t_UmbrellaOptions* opt)
2112 real mintmp, maxtmp;
2114 printf("Reading %d tpr and pullx/pullf files\n", nfiles);
2116 /* min and max not given? */
2119 printf("Automatic determination of boundaries...\n");
2122 for (i = 0; i < nfiles; i++)
2124 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2126 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2128 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2129 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2132 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",
2135 read_pull_xf(fnPull[i],
2142 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2143 if (maxtmp > opt->max)
2147 if (mintmp < opt->min)
2152 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2153 if (opt->bBoundsOnly)
2155 printf("Found option -boundsonly, now exiting.\n");
2159 /* store stepsize in profile */
2160 opt->dz = (opt->max - opt->min) / opt->bins;
2162 bool foundData = false;
2163 for (i = 0; i < nfiles; i++)
2165 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2167 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2169 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2170 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2173 FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2175 read_pull_xf(fnPull[i],
2182 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2183 if (window[i].Ntot[0] == 0)
2185 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2195 "No data points were found in pullf/pullx files. Maybe you need to specify the "
2199 for (i = 0; i < nfiles; i++)
2208 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2210 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2211 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2213 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow* window, int nwins, const char* fn)
2215 int nlines, ny, i, ig;
2218 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2219 nlines = read_xvg(fn, &iact, &ny);
2220 if (nlines != nwins)
2223 "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2228 for (i = 0; i < nlines; i++)
2230 if (window[i].nPull != ny)
2233 "You are providing autocorrelation times with option -iiact and the\n"
2234 "number of pull groups is different in different simulations. That is not\n"
2235 "supported yet. Sorry.\n");
2237 for (ig = 0; ig < window[i].nPull; ig++)
2239 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2240 window[i].g[ig] = 1 + 2 * iact[ig][i] / window[i].dt;
2242 if (iact[ig][i] <= 0.0)
2244 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2251 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2254 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2255 * that -in case of limited sampling- the ACT may be severely underestimated.
2256 * Note: the g=1+2tau are overwritten.
2257 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2260 static void smoothIact(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
2263 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2265 /* only evaluate within +- 3sigma of the Gausian */
2266 siglim = 3.0 * opt->sigSmoothIact;
2267 siglim2 = gmx::square(siglim);
2268 /* pre-factor of Gaussian */
2269 gaufact = 1.0 / (std::sqrt(2 * M_PI) * opt->sigSmoothIact);
2270 invtwosig2 = 0.5 / gmx::square(opt->sigSmoothIact);
2272 for (i = 0; i < nwins; i++)
2274 snew(window[i].tausmooth, window[i].nPull);
2275 for (ig = 0; ig < window[i].nPull; ig++)
2279 pos = window[i].pos[ig];
2280 for (j = 0; j < nwins; j++)
2282 for (jg = 0; jg < window[j].nPull; jg++)
2284 dpos2 = gmx::square(window[j].pos[jg] - pos);
2285 if (dpos2 < siglim2)
2287 w = gaufact * std::exp(-dpos2 * invtwosig2);
2289 tausm += w * window[j].tau[jg];
2290 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2291 w,dpos2,pos,gaufact,invtwosig2); */
2296 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2298 window[i].tausmooth[ig] = tausm;
2302 window[i].tausmooth[ig] = window[i].tau[ig];
2304 window[i].g[ig] = 1 + 2 * tausm / window[i].dt;
2309 //! Stop integrating autoccorelation function when ACF drops under this value
2310 #define WHAM_AC_ZERO_LIMIT 0.05
2312 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2314 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow* window,
2316 t_UmbrellaOptions* opt,
2320 int i, ig, ncorr, ntot, j, k, *count, restart;
2321 real *corr, c0, dt, tmp;
2322 real *ztime, av, tausteps;
2323 FILE *fp, *fpcorr = nullptr;
2327 fpcorr = xvgropen("hist_autocorr.xvg",
2328 "Autocorrelation functions of umbrella windows",
2330 "autocorrelation function",
2335 for (i = 0; i < nwins; i++)
2338 "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...",
2339 100. * (i + 1) / nwins);
2341 ntot = window[i].Ntot[0];
2343 /* using half the maximum time as length of autocorrelation function */
2348 "Tryig to estimtate autocorrelation time from only %d"
2349 " points. Provide more pull data!",
2353 /* snew(corrSq,ncorr); */
2356 snew(window[i].tau, window[i].nPull);
2357 restart = gmx::roundToInt(opt->acTrestart / dt);
2363 for (ig = 0; ig < window[i].nPull; ig++)
2365 if (ntot != window[i].Ntot[ig])
2368 "Encountered different nr of frames in different pull groups.\n"
2369 "That should not happen. (%d and %d)\n",
2371 window[i].Ntot[ig]);
2373 ztime = window[i].ztime[ig];
2375 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2376 for (j = 0, av = 0; (j < ntot); j++)
2381 for (k = 0; (k < ncorr); k++)
2386 for (j = 0; (j < ntot); j += restart)
2388 for (k = 0; (k < ncorr) && (j + k < ntot); k++)
2390 tmp = (ztime[j] - av) * (ztime[j + k] - av);
2392 /* corrSq[k] += tmp*tmp; */
2396 /* divide by nr of frames for each time displacement */
2397 for (k = 0; (k < ncorr); k++)
2399 /* count probably = (ncorr-k+(restart-1))/restart; */
2400 corr[k] = corr[k] / count[k];
2401 /* variance of autocorrelation function */
2402 /* corrSq[k]=corrSq[k]/count[k]; */
2404 /* normalize such that corr[0] == 0 */
2406 for (k = 0; (k < ncorr); k++)
2409 /* corrSq[k]*=c0*c0; */
2412 /* write ACFs in verbose mode */
2415 for (k = 0; (k < ncorr); k++)
2417 fprintf(fpcorr, "%g %g\n", k * dt, corr[k]);
2419 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2422 /* esimate integrated correlation time, fitting is too unstable */
2423 tausteps = 0.5 * corr[0];
2424 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2425 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2427 tausteps += corr[j];
2430 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2431 Kumar et al, eq. 28 ff. */
2432 window[i].tau[ig] = tausteps * dt;
2433 window[i].g[ig] = 1 + 2 * tausteps;
2434 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2445 /* plot IACT along reaction coordinate */
2446 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2447 if (output_env_get_print_xvgr_codes(opt->oenv))
2449 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2450 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2451 for (i = 0; i < nwins; i++)
2453 fprintf(fp, "# %3d ", i);
2454 for (ig = 0; ig < window[i].nPull; ig++)
2456 fprintf(fp, " %11g", window[i].tau[ig]);
2461 for (i = 0; i < nwins; i++)
2463 for (ig = 0; ig < window[i].nPull; ig++)
2465 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2468 if (opt->sigSmoothIact > 0.0)
2470 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = "
2472 opt->sigSmoothIact);
2473 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2474 smoothIact(window, nwins, opt);
2475 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2476 if (output_env_get_print_xvgr_codes(opt->oenv))
2478 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2479 fprintf(fp, "@ s1 symbol color 2\n");
2481 for (i = 0; i < nwins; i++)
2483 for (ig = 0; ig < window[i].nPull; ig++)
2485 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2490 printf("Wrote %s\n", fn);
2494 * compute average and sigma of each umbrella histogram
2496 static void averageSigma(t_UmbrellaWindow* window, int nwins)
2499 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2501 for (i = 0; i < nwins; i++)
2503 snew(window[i].aver, window[i].nPull);
2504 snew(window[i].sigma, window[i].nPull);
2506 ntot = window[i].Ntot[0];
2507 for (ig = 0; ig < window[i].nPull; ig++)
2509 ztime = window[i].ztime[ig];
2510 for (k = 0, av = 0.; k < ntot; k++)
2515 for (k = 0, sum2 = 0.; k < ntot; k++)
2517 diff = ztime[k] - av;
2518 sum2 += diff * diff;
2520 sig = std::sqrt(sum2 / ntot);
2521 window[i].aver[ig] = av;
2523 /* Note: This estimate for sigma is biased from the limited sampling.
2524 Correct sigma by n/(n-1) where n = number of independent
2525 samples. Only possible if IACT is known.
2529 nSamplesIndep = window[i].N[ig] / (window[i].tau[ig] / window[i].dt);
2530 window[i].sigma[ig] = sig * nSamplesIndep / (nSamplesIndep - 1);
2534 window[i].sigma[ig] = sig;
2536 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2543 * Use histograms to compute average force on pull group.
2545 static void computeAverageForce(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
2547 int i, j, bins = opt->bins, k;
2548 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2551 dz = (max - min) / bins;
2552 ztot = opt->max - min;
2553 ztot_half = ztot / 2;
2555 /* Compute average displacement from histograms */
2556 for (j = 0; j < nWindows; ++j)
2558 snew(window[j].forceAv, window[j].nPull);
2559 for (k = 0; k < window[j].nPull; ++k)
2563 for (i = 0; i < opt->bins; ++i)
2565 temp = (1.0 * i + 0.5) * dz + min;
2566 distance = temp - window[j].pos[k];
2568 { /* in cyclic wham: */
2569 if (distance > ztot_half) /* |distance| < ztot_half */
2573 else if (distance < -ztot_half)
2578 w = window[j].Histo[k][i] / window[j].g[k];
2579 displAv += w * distance;
2581 /* Are we near min or max? We are getting wrong forces from the histgrams since
2582 the histograms are zero outside [min,max). Therefore, assume that the position
2583 on the other side of the histomgram center is equally likely. */
2586 posmirrored = window[j].pos[k] - distance;
2587 if (posmirrored >= max || posmirrored < min)
2589 displAv += -w * distance;
2596 /* average force from average displacement */
2597 window[j].forceAv[k] = displAv * window[j].k[k];
2598 /* sigma from average square displacement */
2599 /* window[j].sigma [k] = sqrt(displAv2); */
2600 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2606 * Check if the complete reaction coordinate is covered by the histograms
2608 static void checkReactionCoordinateCovered(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
2610 int i, ig, j, bins = opt->bins;
2612 real avcount = 0, z, relcount, *count;
2613 snew(count, opt->bins);
2615 for (j = 0; j < opt->bins; ++j)
2617 for (i = 0; i < nwins; i++)
2619 for (ig = 0; ig < window[i].nPull; ig++)
2621 count[j] += window[i].Histo[ig][j];
2624 avcount += 1.0 * count[j];
2627 for (j = 0; j < bins; ++j)
2629 relcount = count[j] / avcount;
2630 z = (j + 0.5) * opt->dz + opt->min;
2631 bBoundary = j < bins / 20 || (bins - j) > bins / 20;
2632 /* check for bins with no data */
2636 "\nWARNING, no data point in bin %d (z=%g) !\n"
2637 "You may not get a reasonable profile. Check your histograms!\n",
2641 /* and check for poor sampling */
2642 else if (relcount < 0.005 && !bBoundary)
2644 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2650 /*! \brief Compute initial potential by integrating the average force
2652 * This speeds up the convergence by roughly a factor of 2
2654 static void guessPotByIntegration(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, const char* xlabel)
2656 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
2657 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
2660 dz = (opt->max - min) / bins;
2662 printf("Getting initial potential by integration.\n");
2664 /* Compute average displacement from histograms */
2665 computeAverageForce(window, nWindows, opt);
2667 /* Get force for each bin from all histograms in this bin, or, alternatively,
2668 if no histograms are inside this bin, from the closest histogram */
2671 for (j = 0; j < opt->bins; ++j)
2673 pos = (1.0 * j + 0.5) * dz + min;
2677 groupmin = winmin = 0;
2678 for (i = 0; i < nWindows; i++)
2680 for (ig = 0; ig < window[i].nPull; ig++)
2682 hispos = window[i].pos[ig];
2683 dist = std::abs(hispos - pos);
2684 /* average force within bin */
2688 fAv += window[i].forceAv[ig];
2690 /* at the same time, remember closest histogram */
2699 /* if no histogram found in this bin, use closest histogram */
2706 fAv = window[winmin].forceAv[groupmin];
2710 for (j = 1; j < opt->bins; ++j)
2712 pot[j] = pot[j - 1] - 0.5 * dz * (f[j - 1] + f[j]);
2715 /* cyclic wham: linearly correct possible offset */
2718 diff = (pot[bins - 1] - pot[0]) / (bins - 1);
2719 for (j = 1; j < opt->bins; ++j)
2726 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
2727 for (j = 0; j < opt->bins; ++j)
2729 fprintf(fp, "%g %g\n", (j + 0.5) * dz + opt->min, pot[j]);
2732 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
2735 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2736 energy offsets which are usually determined by wham
2737 First: turn pot into probabilities:
2739 for (j = 0; j < opt->bins; ++j)
2741 pot[j] = std::exp(-pot[j] / (gmx::c_boltz * opt->Temperature));
2743 calc_z(pot, window, nWindows, opt, TRUE);
2749 //! Count number of words in an line
2750 static int wordcount(char* ptr)
2755 if (std::strlen(ptr) == 0)
2759 /* fprintf(stderr,"ptr='%s'\n",ptr); */
2761 for (i = 0; (ptr[i] != '\0'); i++)
2763 is[cur] = isspace(ptr[i]);
2764 if ((i > 0) && (is[cur] && !is[1 - cur]))
2773 /*! \brief Read input file for pull group selection (option -is)
2775 * TO DO: ptr=fgets(...) is never freed (small memory leak)
2777 static void readPullCoordSelection(t_UmbrellaOptions* opt, char** fnTpr, int nTpr)
2780 int i, iline, n, len = STRLEN, temp;
2781 char *ptr = nullptr, *tmpbuf = nullptr;
2782 char fmt[1024], fmtign[1024];
2783 int block = 1, sizenow;
2785 fp = gmx_ffopen(opt->fnCoordSel, "r");
2786 opt->coordsel = nullptr;
2791 while ((ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
2796 if (iline >= sizenow)
2799 srenew(opt->coordsel, sizenow);
2801 opt->coordsel[iline].n = n;
2802 opt->coordsel[iline].nUse = 0;
2803 snew(opt->coordsel[iline].bUse, n);
2806 for (i = 0; i < n; i++)
2808 std::strcpy(fmt, fmtign);
2809 std::strcat(fmt, "%d");
2810 if (sscanf(ptr, fmt, &temp))
2812 opt->coordsel[iline].bUse[i] = (temp > 0);
2813 if (opt->coordsel[iline].bUse[i])
2815 opt->coordsel[iline].nUse++;
2818 std::strcat(fmtign, "%*s");
2822 opt->nCoordsel = iline;
2823 if (nTpr != opt->nCoordsel)
2825 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel, opt->fnCoordSel);
2828 printf("\nUse only these pull coordinates:\n");
2829 for (iline = 0; iline < nTpr; iline++)
2831 printf("%s (%d of %d coordinates):",
2833 opt->coordsel[iline].nUse,
2834 opt->coordsel[iline].n);
2835 for (i = 0; i < opt->coordsel[iline].n; i++)
2837 if (opt->coordsel[iline].bUse[i])
2839 printf(" %d", i + 1);
2850 #define WHAMBOOLXOR(a, b) (((!(a)) && (b)) || ((a) && (!(b))))
2852 //! Number of elements in fnm (used for command line parsing)
2853 #define NFILE asize(fnm)
2855 //! The main gmx wham routine
2856 int gmx_wham(int argc, char* argv[])
2858 const char* desc[] = {
2859 "[THISMODULE] is an analysis program that implements the Weighted",
2860 "Histogram Analysis Method (WHAM). It is intended to analyze",
2861 "output files generated by umbrella sampling simulations to ",
2862 "compute a potential of mean force (PMF).[PAR]",
2864 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
2865 "where the first pull coordinate(s) is/are umbrella pull coordinates",
2866 "and, if multiple coordinates need to be analyzed, all used the same",
2867 "geometry and dimensions. In most cases this is not an issue.[PAR]",
2868 "At present, three input modes are supported.",
2870 "* With option [TT]-it[tt], the user provides a file which contains the",
2871 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
2872 " AND, with option [TT]-ix[tt], a file which contains file names of",
2873 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
2874 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
2875 " first pullx, etc.",
2876 "* Same as the previous input mode, except that the user",
2877 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
2878 " From the pull force the position in the umbrella potential is",
2879 " computed. This does not work with tabulated umbrella potentials.",
2881 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If ",
2883 "some of the pull coordinates should be used, a pull coordinate selection file (option ",
2884 "[TT]-is[tt]) can ",
2885 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
2886 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr ",
2888 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. ",
2890 "If you have three tpr files, each containing 4 pull coordinates, but only pull ",
2891 "coordinates 1 and 2 should be ",
2892 "used, coordsel.dat looks like this::",
2898 "By default, the output files are::",
2900 " [TT]-o[tt] PMF output file",
2901 " [TT]-hist[tt] Histograms output file",
2903 "Always check whether the histograms sufficiently overlap.[PAR]",
2904 "The umbrella potential is assumed to be harmonic and the force constants are ",
2905 "read from the [REF].tpr[ref] files. If a non-harmonic umbrella force ",
2907 "a tabulated potential can be provided with [TT]-tab[tt].",
2912 "* [TT]-bins[tt] Number of bins used in analysis",
2913 "* [TT]-temp[tt] Temperature in the simulations",
2914 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
2915 "* [TT]-auto[tt] Automatic determination of boundaries",
2916 "* [TT]-min,-max[tt] Boundaries of the profile",
2918 "The data points that are used to compute the profile",
2919 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
2920 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
2921 "umbrella window.[PAR]",
2922 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
2923 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
2924 "With energy output, the energy in the first bin is defined to be zero. ",
2925 "If you want the free energy at a different ",
2926 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
2927 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
2928 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
2929 "[THISMODULE] will make use of the",
2930 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
2931 "reaction coordinate will assumed be be neighbors.[PAR]",
2932 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
2933 "which may be useful for, e.g. membranes.",
2938 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
2939 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
2944 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
2945 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
2946 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
2947 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
2948 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
2949 "Because the IACTs can be severely underestimated in case of limited ",
2950 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
2951 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
2952 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
2953 "integration of the ACFs while the ACFs are larger 0.05.",
2954 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
2955 "less robust) method such as fitting to a double exponential, you can ",
2956 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
2957 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
2958 "input file (pullx/pullf file) and one column per pull coordinate in the ",
2964 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
2965 "otherwise the statistical error may be substantially underestimated. ",
2966 "More background and examples for the bootstrap technique can be found in ",
2967 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
2968 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
2969 "Four bootstrapping methods are supported and ",
2970 "selected with [TT]-bs-method[tt].",
2972 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
2973 " data points, and the bootstrap is carried out by assigning random weights to the ",
2974 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
2975 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
2976 " statistical error is underestimated.",
2977 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
2978 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
2979 " (allowing duplication, i.e. sampling with replacement).",
2980 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
2981 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
2982 " divided into blocks and only histograms within each block are mixed. Note that ",
2983 " the histograms within each block must be representative for all possible histograms, ",
2984 " otherwise the statistical error is underestimated.",
2985 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
2986 " such that the generated data points are distributed according the given histograms ",
2987 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
2988 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
2989 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
2990 " Note that this method may severely underestimate the error in case of limited ",
2992 " that is if individual histograms do not represent the complete phase space at ",
2993 " the respective positions.",
2994 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
2995 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
2996 " and width of the umbrella histograms. That method yields similar error estimates ",
2997 " like method [TT]traj[tt].",
2999 "Bootstrapping output:",
3001 "* [TT]-bsres[tt] Average profile and standard deviations",
3002 "* [TT]-bsprof[tt] All bootstrapping profiles",
3004 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3005 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3009 const char* en_unit[] = { nullptr, "kJ", "kCal", "kT", nullptr };
3010 const char* en_unit_label[] = { "", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr };
3011 const char* en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3012 static t_UmbrellaOptions opt;
3015 { "-min", FALSE, etREAL, { &opt.min }, "Minimum coordinate in profile" },
3016 { "-max", FALSE, etREAL, { &opt.max }, "Maximum coordinate in profile" },
3017 { "-auto", FALSE, etBOOL, { &opt.bAuto }, "Determine min and max automatically" },
3018 { "-bins", FALSE, etINT, { &opt.bins }, "Number of bins in profile" },
3019 { "-temp", FALSE, etREAL, { &opt.Temperature }, "Temperature" },
3020 { "-tol", FALSE, etREAL, { &opt.Tolerance }, "Tolerance" },
3021 { "-v", FALSE, etBOOL, { &opt.verbose }, "Verbose mode" },
3022 { "-b", FALSE, etREAL, { &opt.tmin }, "First time to analyse (ps)" },
3023 { "-e", FALSE, etREAL, { &opt.tmax }, "Last time to analyse (ps)" },
3024 { "-dt", FALSE, etREAL, { &opt.dt }, "Analyse only every dt ps" },
3025 { "-histonly", FALSE, etBOOL, { &opt.bHistOnly }, "Write histograms and exit" },
3029 { &opt.bBoundsOnly },
3030 "Determine min and max and exit (with [TT]-auto[tt])" },
3031 { "-log", FALSE, etBOOL, { &opt.bLog }, "Calculate the log of the profile before printing" },
3032 { "-unit", FALSE, etENUM, { en_unit }, "Energy unit in case of log output" },
3037 "Define profile to 0.0 at this position (with [TT]-log[tt])" },
3042 "Create cyclic/periodic profile. Assumes min and max are the same point." },
3043 { "-sym", FALSE, etBOOL, { &opt.bSym }, "Symmetrize profile around z=0" },
3048 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)" },
3052 { &opt.bCalcTauInt },
3053 "Calculate integrated autocorrelation times and use in wham" },
3057 { &opt.sigSmoothIact },
3058 "Smooth autocorrelation times along reaction coordinate with Gaussian of this "
3059 "[GRK]sigma[grk]" },
3063 { &opt.acTrestart },
3064 "When computing autocorrelation functions, restart computing every .. (ps)" },
3068 { &opt.bAllowReduceIact },
3069 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3070 "during smoothing" },
3074 { &opt.nBootStrap },
3075 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3076 { "-bs-method", FALSE, etENUM, { en_bsMethod }, "Bootstrap method" },
3080 { &opt.tauBootStrap },
3081 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is "
3083 { "-bs-seed", FALSE, etINT, { &opt.bsSeed }, "Seed for bootstrapping. (-1 = use time)" },
3087 { &opt.histBootStrapBlockLength },
3088 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]." },
3092 { &opt.bs_verbose },
3093 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap." },
3097 { &opt.stepchange },
3098 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])" },
3102 { &opt.stepUpdateContrib },
3103 "HIDDENUpdate table with significan contributions to WHAM every ... iterations" },
3107 { efDAT, "-ix", "pullx-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3108 { efDAT, "-if", "pullf-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3109 { efDAT, "-it", "tpr-files", ffOPTRD }, /* wham input: tprs */
3110 { efDAT, "-is", "coordsel", ffOPTRD }, /* input: select pull coords to use */
3111 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3112 { efXVG, "-hist", "histo", ffWRITE }, /* output file for histograms */
3113 { efXVG, "-oiact", "iact", ffOPTWR }, /* writing integrated autocorrelation times */
3114 { efDAT, "-iiact", "iact-in", ffOPTRD }, /* reading integrated autocorrelation times */
3115 { efXVG, "-bsres", "bsResult", ffOPTWR }, /* average and errors of bootstrap analysis */
3116 { efXVG, "-bsprof", "bsProfs", ffOPTWR }, /* output file for bootstrap profiles */
3117 { efDAT, "-tab", "umb-pot", ffOPTRD }, /* Tabulated umbrella potential (if not harmonic) */
3120 int i, j, l, nfiles, nwins, nfiles2;
3121 t_UmbrellaHeader header;
3122 t_UmbrellaWindow* window = nullptr;
3123 double * profile, maxchange = 1e20;
3124 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3125 char ** fninTpr, **fninPull;
3127 FILE * histout, *profout;
3128 char xlabel[STRLEN], ylabel[256], title[256];
3131 opt.verbose = FALSE;
3132 opt.bHistOnly = FALSE;
3141 opt.coordsel = nullptr;
3143 /* bootstrapping stuff */
3145 opt.bsMethod = bsMethod_hist;
3146 opt.tauBootStrap = 0.0;
3148 opt.histBootStrapBlockLength = 8;
3149 opt.bs_verbose = FALSE;
3154 opt.Temperature = 298;
3155 opt.Tolerance = 1e-6;
3156 opt.bBoundsOnly = FALSE;
3158 opt.bCalcTauInt = FALSE;
3159 opt.sigSmoothIact = 0.0;
3160 opt.bAllowReduceIact = TRUE;
3161 opt.bInitPotByIntegration = TRUE;
3162 opt.acTrestart = 1.0;
3163 opt.stepchange = 100;
3164 opt.stepUpdateContrib = 100;
3166 if (!parse_common_args(
3167 &argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3172 opt.unit = nenum(en_unit);
3173 opt.bsMethod = nenum(en_bsMethod);
3175 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3177 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3178 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3179 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3180 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3181 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3182 if (opt.bTab && opt.bPullf)
3185 "Force input does not work with tabulated potentials. "
3186 "Provide pullx.xvg files!");
3189 if (!WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3191 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3194 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3195 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3196 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3197 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3199 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3200 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3201 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3202 if ((bMinSet || bMaxSet) && bAutoSet)
3204 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3207 if ((bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3209 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3212 if (bMinSet && opt.bAuto)
3214 printf("Note: min and max given, switching off -auto.\n");
3218 if (opt.bTauIntGiven && opt.bCalcTauInt)
3221 "Either read (option -iiact) or calculate (option -ac) the\n"
3222 "the autocorrelation times. Not both.");
3225 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3228 "Either compute autocorrelation times (ACTs) (option -ac) or "
3229 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3231 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3234 "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3235 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3238 /* Reading gmx4/gmx5 pull output and tpr files */
3239 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3241 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3242 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3243 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3246 opt.bPullf ? "force" : "position",
3249 if (nfiles != nfiles2)
3251 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles, opt.fnTpr, nfiles2, fnPull);
3254 /* Read file that selects the pull group to be used */
3255 if (opt.fnCoordSel != nullptr)
3257 readPullCoordSelection(&opt, fninTpr, nfiles);
3260 window = initUmbrellaWindows(nfiles);
3261 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3263 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3264 We can therefore get the units for the xlabel from the first coordinate. */
3265 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3269 /* enforce equal weight for all histograms? */
3272 enforceEqualWeights(window, nwins);
3275 /* write histograms */
3276 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms", xlabel, "count", opt.oenv);
3277 for (l = 0; l < opt.bins; ++l)
3279 fprintf(histout, "%e\t", (l + 0.5) / opt.bins * (opt.max - opt.min) + opt.min);
3280 for (i = 0; i < nwins; ++i)
3282 for (j = 0; j < window[i].nPull; ++j)
3284 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3287 fprintf(histout, "\n");
3290 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3293 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3297 /* Using tabulated umbrella potential */
3300 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3303 /* Integrated autocorrelation times provided ? */
3304 if (opt.bTauIntGiven)
3306 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3309 /* Compute integrated autocorrelation times */
3310 if (opt.bCalcTauInt)
3312 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3315 /* calc average and sigma for each histogram
3316 (maybe required for bootstrapping. If not, this is fast anyhow) */
3317 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3319 averageSigma(window, nwins);
3322 /* Get initial potential by simple integration */
3323 if (opt.bInitPotByIntegration)
3325 guessPotByIntegration(window, nwins, &opt, xlabel);
3328 /* Check if complete reaction coordinate is covered */
3329 checkReactionCoordinateCovered(window, nwins, &opt);
3331 /* Calculate profile */
3332 snew(profile, opt.bins);
3340 if ((i % opt.stepUpdateContrib) == 0)
3342 setup_acc_wham(profile, window, nwins, &opt);
3344 if (maxchange < opt.Tolerance)
3347 /* if (opt.verbose) */
3348 printf("Switched to exact iteration in iteration %d\n", i);
3350 calc_profile(profile, window, nwins, &opt, bExact);
3351 if (((i % opt.stepchange) == 0 || i == 1) && i != 0)
3353 printf("\t%4d) Maximum change %e\n", i, maxchange);
3356 } while ((maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3357 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3359 /* calc error from Kumar's formula */
3360 /* Unclear how the error propagates along reaction coordinate, therefore
3362 /* calc_error_kumar(profile,window, nwins,&opt); */
3364 /* Write profile in energy units? */
3367 prof_normalization_and_unit(profile, &opt);
3368 std::strcpy(ylabel, en_unit_label[opt.unit]);
3369 std::strcpy(title, "Umbrella potential");
3373 std::strcpy(ylabel, "Density of states");
3374 std::strcpy(title, "Density of states");
3377 /* symmetrize profile around z=0? */
3380 symmetrizeProfile(profile, &opt);
3383 /* write profile or density of states */
3384 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3385 for (i = 0; i < opt.bins; ++i)
3387 fprintf(profout, "%e\t%e\n", (i + 0.5) / opt.bins * (opt.max - opt.min) + opt.min, profile[i]);
3390 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3392 /* Bootstrap Method */
3395 do_bootstrapping(opt2fn("-bsres", NFILE, fnm),
3396 opt2fn("-bsprof", NFILE, fnm),
3397 opt2fn("-hist", NFILE, fnm),
3407 freeUmbrellaWindows(window, nfiles);
3409 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3410 please_cite(stdout, "Hub2010");