2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
42 * \author Jochen Hub <jhub@gwdg.de>
58 #include "gromacs/commandline/pargs.h"
59 #include "gromacs/fileio/tpxio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxana/gmx_ana.h"
62 #include "gromacs/math/functions.h"
63 #include "gromacs/math/units.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/pull_params.h"
68 #include "gromacs/mdtypes/state.h"
69 #include "gromacs/pulling/pull.h"
70 #include "gromacs/random/tabulatednormaldistribution.h"
71 #include "gromacs/random/threefry.h"
72 #include "gromacs/random/uniformintdistribution.h"
73 #include "gromacs/random/uniformrealdistribution.h"
74 #include "gromacs/utility/arraysize.h"
75 #include "gromacs/utility/cstringutil.h"
76 #include "gromacs/utility/exceptions.h"
77 #include "gromacs/utility/fatalerror.h"
78 #include "gromacs/utility/futil.h"
79 #include "gromacs/utility/gmxomp.h"
80 #include "gromacs/utility/path.h"
81 #include "gromacs/utility/pleasecite.h"
82 #include "gromacs/utility/smalloc.h"
84 //! longest file names allowed in input files
85 #define WHAM_MAXFILELEN 2048
88 * enum for energy units
99 * enum for type of input files (pdos, tpr, or pullf)
109 /*! \brief enum for bootstrapping method
111 * These bootstrap methods are supported:
112 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
113 * (bsMethod_BayesianHist)
114 * - bootstrap complete histograms (bsMethod_hist)
115 * - bootstrap trajectories from given umbrella histograms. This generates new
116 * "synthetic" histograms (bsMethod_traj)
117 * - bootstrap trajectories from Gaussian with mu/sigma computed from
118 * the respective histogram (bsMethod_trajGauss). This gives very similar
119 * results compared to bsMethod_traj.
121 * ********************************************************************
122 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
123 * JS Hub, BL de Groot, D van der Spoel
124 * g_wham - A free weighted histogram analysis implementation including
125 * robust error and autocorrelation estimates,
126 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
127 * ********************************************************************
132 bsMethod_BayesianHist,
138 //! Parameters of one pull coodinate
141 PullingAlgorithm pull_type; //!< such as constraint, umbrella, ...
142 PullGroupGeometry geometry; //!< such as distance, direction, cylinder
143 int ngroup; //!< the number of pull groups involved
144 ivec dim; //!< pull dimension with geometry distance
145 int ndim; //!< nr of pull_dim != 0
146 real k; //!< force constants in tpr file
147 real init_dist; //!< reference displacement
148 char coord_unit[256]; //!< unit of the displacement
151 //! Parameters of the umbrella potentials
155 * \name Using umbrella pull code since gromacs 4.x
158 int npullcrds; //!< nr of umbrella pull coordinates for reading
159 t_pullcoord* pcrd; //!< the pull coordinates
160 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
161 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
162 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
166 * \name Using PDO files common until gromacs 3.x
174 char PullName[4][256];
176 double UmbCons[4][3];
180 //! Data in the umbrella histograms
183 int nPull; //!< nr of pull groups in this pdo or pullf/x file
184 double** Histo; //!< nPull histograms
185 double** cum; //!< nPull cumulative distribution functions
186 int nBin; //!< nr of bins. identical to opt->bins
187 double* k; //!< force constants for the nPull coords
188 double* pos; //!< umbrella positions for the nPull coords
189 double* z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
190 int* N; //!< nr of data points in nPull histograms.
191 int* Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
193 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
195 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
196 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
199 double* tau; //!< intetrated autocorrelation time (IACT)
200 double* tausmooth; //!< smoothed IACT
202 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
204 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
206 real** ztime; //!< input data z(t) as a function of time. Required to compute ACTs
208 /*! \brief average force estimated from average displacement, fAv=dzAv*k
210 * Used for integration to guess the potential.
213 real* aver; //!< average of histograms
214 real* sigma; //!< stddev of histograms
215 double* bsWeight; //!< for bootstrapping complete histograms with continuous weights
218 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
221 int n; //!< total nr of pull coords in this tpr file
222 int nUse; //!< nr of pull coords used
223 gmx_bool* bUse; //!< boolean array of size n. =1 if used, =0 if not
226 //! Parameters of WHAM
227 typedef struct UmbrellaOptions // NOLINT(clang-analyzer-optin.performance.Padding)
233 const char *fnTpr, *fnPullf, *fnCoordSel;
234 const char *fnPdo, *fnPullx; //!< file names of input
235 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
236 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
238 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
239 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
240 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
241 t_coordselection* coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
244 * \name Basic WHAM options
247 int bins; //!< nr of bins, min, max, and dz of profile
249 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
250 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
253 * \name Output control
256 gmx_bool bLog; //!< energy output (instead of probability) for profile
257 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
258 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
259 /*! \brief after wham, set prof to zero at this z-position.
260 * When bootstrapping, set zProf0 to a "stable" reference position.
263 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
265 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
266 gmx_bool bAuto; //!< determine min and max automatically but do not exit
268 gmx_bool verbose; //!< more noisy wham mode
269 int stepchange; //!< print maximum change in prof after how many interations
270 gmx_output_env_t* oenv; //!< xvgr options
273 * \name Autocorrelation stuff
276 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
277 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
278 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
279 real acTrestart; //!< when computing ACT, time between restarting points
281 /* \brief Enforce the same weight for each umbella window, that is
282 * calculate with the same number of data points for
283 * each window. That can be reasonable, if the histograms
284 * have different length, but due to autocorrelation,
285 * a longer simulation should not have larger weightin wham.
291 * \name Bootstrapping stuff
294 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
296 /* \brief bootstrap method
298 * if == bsMethod_hist, consider complete histograms as independent
299 * data points and, hence, only mix complete histograms.
300 * if == bsMethod_BayesianHist, consider complete histograms
301 * as independent data points, but assign random weights
302 * to the histograms during the bootstrapping ("Bayesian bootstrap")
303 * In case of long correlations (e.g., inside a channel), these
304 * will yield a more realistic error.
305 * if == bsMethod_traj(Gauss), generate synthetic histograms
307 * histogram by generating an autocorrelated random sequence
308 * that is distributed according to the respective given
309 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
310 * (instead of from the umbrella histogram) to generate a new
315 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
318 /* \brief when mixing histograms, mix only histograms withing blocks
319 long the reaction coordinate xi. Avoids gaps along xi. */
320 int histBootStrapBlockLength;
322 int bsSeed; //!< random seed for bootstrapping
324 /* \brief Write cumulative distribution functions (CDFs) of histograms
325 and write the generated histograms for each bootstrap */
329 * \name tabulated umbrella potential stuff
333 double * tabX, *tabY, tabMin, tabMax, tabDz;
336 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
337 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
340 //! Make an umbrella window (may contain several histograms)
341 static t_UmbrellaWindow* initUmbrellaWindows(int nwin)
343 t_UmbrellaWindow* win;
346 for (i = 0; i < nwin; i++)
348 win[i].Histo = win[i].cum = nullptr;
349 win[i].k = win[i].pos = win[i].z = nullptr;
350 win[i].N = win[i].Ntot = nullptr;
351 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
352 win[i].bContrib = nullptr;
353 win[i].ztime = nullptr;
354 win[i].forceAv = nullptr;
355 win[i].aver = win[i].sigma = nullptr;
356 win[i].bsWeight = nullptr;
361 //! Delete an umbrella window (may contain several histograms)
362 static void freeUmbrellaWindows(t_UmbrellaWindow* win, int nwin)
365 for (i = 0; i < nwin; i++)
369 for (j = 0; j < win[i].nPull; j++)
371 sfree(win[i].Histo[j]);
376 for (j = 0; j < win[i].nPull; j++)
378 sfree(win[i].cum[j]);
383 for (j = 0; j < win[i].nPull; j++)
385 sfree(win[i].bContrib[j]);
397 sfree(win[i].tausmooth);
398 sfree(win[i].bContrib);
400 sfree(win[i].forceAv);
403 sfree(win[i].bsWeight);
409 * Read and setup tabulated umbrella potential
411 static void setup_tab(const char* fn, t_UmbrellaOptions* opt)
416 printf("Setting up tabulated potential from file %s\n", fn);
417 nl = read_xvg(fn, &y, &ny);
421 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
423 opt->tabMin = y[0][0];
424 opt->tabMax = y[0][nl - 1];
425 opt->tabDz = (opt->tabMax - opt->tabMin) / (nl - 1);
429 "The tabulated potential in %s must be provided in \n"
430 "ascending z-direction",
433 for (i = 0; i < nl - 1; i++)
435 if (std::abs(y[0][i + 1] - y[0][i] - opt->tabDz) > opt->tabDz / 1e6)
437 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
442 for (i = 0; i < nl; i++)
444 opt->tabX[i] = y[0][i];
445 opt->tabY[i] = y[1][i];
447 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
453 //! Read the header of an PDO file (position, force const, nr of groups)
454 static void read_pdo_header(FILE* file, t_UmbrellaHeader* header, t_UmbrellaOptions* opt)
457 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
459 std::istringstream ist;
462 if (fgets(line, 2048, file) == nullptr)
464 gmx_fatal(FARGS, "Error reading header from pdo file\n");
467 ist >> Buffer0 >> Buffer1 >> Buffer2;
468 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
471 "This does not appear to be a valid pdo file. Found %s, expected %s\n"
472 "(Found in first line: `%s')\n",
477 if (std::strcmp(Buffer2, "3.0") != 0)
479 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
483 if (fgets(line, 2048, file) == nullptr)
485 gmx_fatal(FARGS, "Error reading header from pdo file\n");
488 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
489 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
491 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
492 if (header->nDim != 1)
494 gmx_fatal(FARGS, "Currently only supports one dimension");
498 if (fgets(line, 2048, file) == nullptr)
500 gmx_fatal(FARGS, "Error reading header from pdo file\n");
503 ist >> Buffer0 >> Buffer1 >> header->nSkip;
506 if (fgets(line, 2048, file) == nullptr)
508 gmx_fatal(FARGS, "Error reading header from pdo file\n");
511 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
514 if (fgets(line, 2048, file) == nullptr)
516 gmx_fatal(FARGS, "Error reading header from pdo file\n");
519 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
523 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip, header->Reference);
526 for (i = 0; i < header->nPull; ++i)
528 if (fgets(line, 2048, file) == nullptr)
530 gmx_fatal(FARGS, "Error reading header from pdo file\n");
533 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
534 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
535 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
539 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
542 header->UmbPos[i][0],
543 header->UmbCons[i][0]);
547 if (fgets(line, 2048, file) == nullptr)
549 gmx_fatal(FARGS, "Cannot read from file\n");
553 if (std::strcmp(Buffer3, "#####") != 0)
555 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
560 static char* fgets3(FILE* fp, char ptr[], int* len)
565 if (fgets(ptr, *len - 1, fp) == nullptr)
570 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
572 /* This line is longer than len characters, let's increase len! */
576 if (fgets(p - 1, STRLEN, fp) == nullptr)
581 slen = std::strlen(ptr);
582 if (ptr[slen - 1] == '\n')
584 ptr[slen - 1] = '\0';
590 /*! \brief Read the data columns of and PDO file.
592 * TO DO: Get rid of the scanf function to avoid the clang warning.
593 * At the moment, this warning is avoided by hiding the format string
594 * the variable fmtlf.
596 static void read_pdo_data(FILE* file,
597 t_UmbrellaHeader* header,
599 t_UmbrellaWindow* win,
600 t_UmbrellaOptions* opt,
605 int i, inttemp, bins, count, ntot;
606 real minval, maxval, minfound = 1e20, maxfound = -1e20;
607 double temp, time, time0 = 0, dt;
609 t_UmbrellaWindow* window = nullptr;
610 gmx_bool timeok, dt_ok = true;
611 char * tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
612 int len = STRLEN, dstep = 1;
613 const int blocklen = 4096;
614 int* lennow = nullptr;
622 window = win + fileno;
623 /* Need to alocate memory and set up structure */
624 window->nPull = header->nPull;
627 snew(window->Histo, window->nPull);
628 snew(window->z, window->nPull);
629 snew(window->k, window->nPull);
630 snew(window->pos, window->nPull);
631 snew(window->N, window->nPull);
632 snew(window->Ntot, window->nPull);
633 snew(window->g, window->nPull);
634 snew(window->bsWeight, window->nPull);
636 window->bContrib = nullptr;
638 if (opt->bCalcTauInt)
640 snew(window->ztime, window->nPull);
644 window->ztime = nullptr;
646 snew(lennow, window->nPull);
648 for (i = 0; i < window->nPull; ++i)
651 window->bsWeight[i] = 1.;
652 snew(window->Histo[i], bins);
653 window->k[i] = header->UmbCons[i][0];
654 window->pos[i] = header->UmbPos[i][0];
658 if (opt->bCalcTauInt)
660 window->ztime[i] = nullptr;
664 /* Done with setup */
670 minval = maxval = bins = 0; /* Get rid of warnings */
675 while ((ptr = fgets3(file, tmpbuf, &len)) != nullptr)
679 if (ptr[0] == '#' || std::strlen(ptr) < 2)
684 /* Initiate format string */
686 std::strcat(fmtign, "%*s");
688 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
689 /* Round time to fs */
690 time = 1.0 / 1000 * (gmx::roundToInt64(time * 1000));
692 /* get time step of pdo file */
702 dstep = gmx::roundToInt(opt->dt / dt);
710 window->dt = dt * dstep;
715 dt_ok = ((count - 1) % dstep == 0);
716 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
718 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
719 time,opt->tmin, opt->tmax, dt_ok,timeok); */
723 for (i = 0; i < header->nPull; ++i)
725 std::strcpy(fmt, fmtign);
726 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
727 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
728 if (sscanf(ptr, fmt, &temp))
730 temp += header->UmbPos[i][0];
744 if (opt->bCalcTauInt)
746 /* save time series for autocorrelation analysis */
747 ntot = window->Ntot[i];
748 if (ntot >= lennow[i])
750 lennow[i] += blocklen;
751 srenew(window->ztime[i], lennow[i]);
753 window->ztime[i][ntot] = temp;
757 temp /= (maxval - minval);
759 temp = std::floor(temp);
761 inttemp = static_cast<int>(temp);
768 else if (inttemp >= bins)
774 if (inttemp >= 0 && inttemp < bins)
776 window->Histo[i][inttemp] += 1.;
784 if (time > opt->tmax)
788 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
804 /*! \brief Set identical weights for all histograms
806 * Normally, the weight is given by the number data points in each
807 * histogram, together with the autocorrelation time. This can be overwritten
808 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
809 * an appropriate autocorrelation times instead of using this function.
811 static void enforceEqualWeights(t_UmbrellaWindow* window, int nWindows)
813 int i, k, j, NEnforced;
816 NEnforced = window[0].Ntot[0];
817 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
818 "non-weighted histogram analysis method. Ndata = %d\n",
820 /* enforce all histograms to have the same weight as the very first histogram */
822 for (j = 0; j < nWindows; ++j)
824 for (k = 0; k < window[j].nPull; ++k)
826 ratio = 1.0 * NEnforced / window[j].Ntot[k];
827 for (i = 0; i < window[0].nBin; ++i)
829 window[j].Histo[k][i] *= ratio;
831 window[j].N[k] = gmx::roundToInt(ratio * window[j].N[k]);
836 /*! \brief Simple linear interpolation between two given tabulated points
838 static double tabulated_pot(double dist, t_UmbrellaOptions* opt)
841 double pl, pu, dz, dp;
843 jl = static_cast<int>(std::floor((dist - opt->tabMin) / opt->tabDz));
845 if (jl < 0 || ju >= opt->tabNbins)
848 "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
849 "Provide an extended table.",
856 dz = dist - opt->tabX[jl];
857 dp = (pu - pl) * dz / opt->tabDz;
863 * Check which bins substiantially contribute (accelerates WHAM)
865 * Don't worry, that routine does not mean we compute the PMF in limited precision.
866 * After rapid convergence (using only substiantal contributions), we always switch to
869 static void setup_acc_wham(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
871 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
872 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
873 gmx_bool bAnyContrib;
874 static int bFirst = 1;
875 static double wham_contrib_lim;
879 for (i = 0; i < nWindows; ++i)
881 nGrptot += window[i].nPull;
883 wham_contrib_lim = opt->Tolerance / nGrptot;
886 ztot = opt->max - opt->min;
887 ztot_half = ztot / 2;
889 for (i = 0; i < nWindows; ++i)
891 if (!window[i].bContrib)
893 snew(window[i].bContrib, window[i].nPull);
895 for (j = 0; j < window[i].nPull; ++j)
897 if (!window[i].bContrib[j])
899 snew(window[i].bContrib[j], opt->bins);
902 for (k = 0; k < opt->bins; ++k)
904 temp = (1.0 * k + 0.5) * dz + min;
905 distance = temp - window[i].pos[j]; /* distance to umbrella center */
907 { /* in cyclic wham: */
908 if (distance > ztot_half) /* |distance| < ztot_half */
912 else if (distance < -ztot_half)
917 /* Note: there are two contributions to bin k in the wham equations:
918 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
919 ii) exp(- U/(BOLTZ*opt->Temperature))
920 where U is the umbrella potential
921 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
926 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
930 U = tabulated_pot(distance, opt); /* Use tabulated potential */
932 contrib1 = profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
933 contrib2 = window[i].N[j] * std::exp(-U / (BOLTZ * opt->Temperature) + window[i].z[j]);
934 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
935 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
936 if (window[i].bContrib[j][k])
942 /* If this histo is far outside min and max all bContrib may be FALSE,
943 causing a floating point exception later on. To avoid that, switch
947 for (k = 0; k < opt->bins; ++k)
949 window[i].bContrib[j][k] = TRUE;
956 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
957 "Evaluating only %d of %d expressions.\n\n",
965 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n", nContrib, nTot);
970 //! Compute the PMF (one of the two main WHAM routines)
971 static void calc_profile(double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
973 double ztot_half, ztot, min = opt->min, dz = opt->dz;
975 ztot = opt->max - opt->min;
976 ztot_half = ztot / 2;
982 int nthreads = gmx_omp_get_max_threads();
983 int thread_id = gmx_omp_get_thread_num();
985 int i0 = thread_id * opt->bins / nthreads;
986 int i1 = std::min(opt->bins, ((thread_id + 1) * opt->bins) / nthreads);
988 for (i = i0; i < i1; ++i)
991 double num, denom, invg, temp = 0, distance, U = 0;
993 for (j = 0; j < nWindows; ++j)
995 for (k = 0; k < window[j].nPull; ++k)
997 invg = 1.0 / window[j].g[k] * window[j].bsWeight[k];
998 temp = (1.0 * i + 0.5) * dz + min;
999 num += invg * window[j].Histo[k][i];
1001 if (!(bExact || window[j].bContrib[k][i]))
1005 distance = temp - window[j].pos[k]; /* distance to umbrella center */
1007 { /* in cyclic wham: */
1008 if (distance > ztot_half) /* |distance| < ztot_half */
1012 else if (distance < -ztot_half)
1020 U = 0.5 * window[j].k[k] * gmx::square(distance); /* harmonic potential assumed. */
1024 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1026 denom += invg * window[j].N[k]
1027 * std::exp(-U / (BOLTZ * opt->Temperature) + window[j].z[k]);
1030 profile[i] = num / denom;
1033 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1037 //! Compute the free energy offsets z (one of the two main WHAM routines)
1038 static double calc_z(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
1040 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1041 double maxglob = -1e20;
1043 ztot = opt->max - opt->min;
1044 ztot_half = ztot / 2;
1046 #pragma omp parallel
1050 int nthreads = gmx_omp_get_max_threads();
1051 int thread_id = gmx_omp_get_thread_num();
1053 int i0 = thread_id * nWindows / nthreads;
1054 int i1 = std::min(nWindows, ((thread_id + 1) * nWindows) / nthreads);
1055 double maxloc = -1e20;
1057 for (i = i0; i < i1; ++i)
1059 double total = 0, temp, distance, U = 0;
1062 for (j = 0; j < window[i].nPull; ++j)
1065 for (k = 0; k < window[i].nBin; ++k)
1067 if (!(bExact || window[i].bContrib[j][k]))
1071 temp = (1.0 * k + 0.5) * dz + min;
1072 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1074 { /* in cyclic wham: */
1075 if (distance > ztot_half) /* |distance| < ztot_half */
1079 else if (distance < -ztot_half)
1087 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
1091 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1093 total += profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
1095 /* Avoid floating point exception if window is far outside min and max */
1098 total = -std::log(total);
1104 temp = std::abs(total - window[i].z[j]);
1109 window[i].z[j] = total;
1112 /* Now get maximum maxloc from the threads and put in maxglob */
1113 if (maxloc > maxglob)
1115 #pragma omp critical
1117 if (maxloc > maxglob)
1124 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1130 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1131 static void symmetrizeProfile(double* profile, t_UmbrellaOptions* opt)
1133 int i, j, bins = opt->bins;
1134 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1137 if (min > 0. || max < 0.)
1139 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n", opt->min, opt->max);
1144 for (i = 0; i < bins; i++)
1146 z = min + (i + 0.5) * dz;
1148 /* bin left of zsym */
1149 j = gmx::roundToInt((zsym - min) / dz) - 1;
1150 if (j >= 0 && (j + 1) < bins)
1152 /* interpolate profile linearly between bins j and j+1 */
1153 z1 = min + (j + 0.5) * dz;
1155 profsym = profile[j] + (profile[j + 1] - profile[j]) / dz * deltaz;
1156 /* average between left and right */
1157 prof2[i] = 0.5 * (profsym + profile[i]);
1161 prof2[i] = profile[i];
1165 std::memcpy(profile, prof2, bins * sizeof(double));
1169 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1170 static void prof_normalization_and_unit(double* profile, t_UmbrellaOptions* opt)
1173 double unit_factor = 1., diff;
1177 /* Not log? Nothing to do! */
1183 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1184 if (opt->unit == en_kT)
1188 else if (opt->unit == en_kJ)
1190 unit_factor = BOLTZ * opt->Temperature;
1192 else if (opt->unit == en_kCal)
1194 unit_factor = (BOLTZ / CAL2JOULE) * opt->Temperature;
1198 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1201 for (i = 0; i < bins; i++)
1203 if (profile[i] > 0.0)
1205 profile[i] = -std::log(profile[i]) * unit_factor;
1209 /* shift to zero at z=opt->zProf0 */
1210 if (!opt->bProf0Set)
1216 /* Get bin with shortest distance to opt->zProf0
1217 (-0.5 from bin position and +0.5 from rounding cancel) */
1218 imin = static_cast<int>((opt->zProf0 - opt->min) / opt->dz);
1223 else if (imin >= bins)
1227 diff = profile[imin];
1231 for (i = 0; i < bins; i++)
1237 //! Make an array of random integers (used for bootstrapping)
1238 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine* rng)
1240 gmx::UniformIntDistribution<int> dist(0, blockLength - 1);
1242 int ipull, blockBase, nr, ipullRandom;
1244 if (blockLength == 0)
1246 blockLength = nPull;
1249 for (ipull = 0; ipull < nPull; ipull++)
1251 blockBase = (ipull / blockLength) * blockLength;
1253 { /* make sure nothing bad happens in the last block */
1254 nr = dist(*rng); // [0,blockLength-1]
1255 ipullRandom = blockBase + nr;
1256 } while (ipullRandom >= nPull);
1257 if (ipullRandom < 0 || ipullRandom >= nPull)
1260 "Ups, random iWin = %d, nPull = %d, nr = %d, "
1261 "blockLength = %d, blockBase = %d\n",
1268 randomArray[ipull] = ipullRandom;
1272 /*! \brief Set pull group information of a synthetic histogram
1274 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1275 * but it is not required if we bootstrap complete histograms.
1277 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow* synthWindow, t_UmbrellaWindow* thisWindow, int pullid)
1279 synthWindow->N[0] = thisWindow->N[pullid];
1280 synthWindow->Histo[0] = thisWindow->Histo[pullid];
1281 synthWindow->pos[0] = thisWindow->pos[pullid];
1282 synthWindow->z[0] = thisWindow->z[pullid];
1283 synthWindow->k[0] = thisWindow->k[pullid];
1284 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1285 synthWindow->g[0] = thisWindow->g[pullid];
1286 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1289 /*! \brief Calculate cumulative distribution function of of all histograms.
1291 * This allow to create random number sequences
1292 * which are distributed according to the histograms. Required to generate
1293 * the "synthetic" histograms for the Bootstrap method
1295 static void calc_cumulatives(t_UmbrellaWindow* window,
1297 t_UmbrellaOptions* opt,
1306 if (opt->bs_verbose)
1308 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1309 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1313 for (i = 0; i < nWindows; i++)
1315 snew(window[i].cum, window[i].nPull);
1316 for (j = 0; j < window[i].nPull; j++)
1318 snew(window[i].cum[j], nbin + 1);
1319 window[i].cum[j][0] = 0.;
1320 for (k = 1; k <= nbin; k++)
1322 window[i].cum[j][k] = window[i].cum[j][k - 1] + window[i].Histo[j][k - 1];
1325 /* normalize CDFs. Ensure cum[nbin]==1 */
1326 last = window[i].cum[j][nbin];
1327 for (k = 0; k <= nbin; k++)
1329 window[i].cum[j][k] /= last;
1334 printf("Cumulative distribution functions of all histograms created.\n");
1335 if (opt->bs_verbose)
1337 for (k = 0; k <= nbin; k++)
1339 fprintf(fp, "%g\t", opt->min + k * opt->dz);
1340 for (i = 0; i < nWindows; i++)
1342 for (j = 0; j < window[i].nPull; j++)
1344 fprintf(fp, "%g\t", window[i].cum[j][k]);
1349 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1355 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1357 * This is used to generate a random sequence distributed according to a histogram
1359 static void searchCumulative(const double xx[], int n, double x, int* j)
1367 jm = (ju + jl) >> 1;
1381 else if (x == xx[n - 1])
1391 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1392 static void create_synthetic_histo(t_UmbrellaWindow* synthWindow,
1393 t_UmbrellaWindow* thisWindow,
1395 t_UmbrellaOptions* opt)
1397 int N, i, nbins, r_index, ibin;
1398 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1401 N = thisWindow->N[pullid];
1402 dt = thisWindow->dt;
1403 nbins = thisWindow->nBin;
1405 /* tau = autocorrelation time */
1406 if (opt->tauBootStrap > 0.0)
1408 tausteps = opt->tauBootStrap / dt;
1410 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1412 /* calc tausteps from g=1+2tausteps */
1413 g = thisWindow->g[pullid];
1414 tausteps = (g - 1) / 2;
1419 "When generating hypothetical trajectories from given umbrella histograms,\n"
1420 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1421 "cannot be predicted. You have 3 options:\n"
1422 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1423 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1425 " with option -iiact for all umbrella windows.\n"
1426 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1427 " Use option (3) only if you are sure what you're doing, you may severely\n"
1428 " underestimate the error if a too small ACT is given.\n");
1429 gmx_fatal(FARGS, "%s", errstr);
1432 synthWindow->N[0] = N;
1433 synthWindow->pos[0] = thisWindow->pos[pullid];
1434 synthWindow->z[0] = thisWindow->z[pullid];
1435 synthWindow->k[0] = thisWindow->k[pullid];
1436 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1437 synthWindow->g[0] = thisWindow->g[pullid];
1438 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1440 for (i = 0; i < nbins; i++)
1442 synthWindow->Histo[0][i] = 0.;
1445 if (opt->bsMethod == bsMethod_trajGauss)
1447 sig = thisWindow->sigma[pullid];
1448 mu = thisWindow->aver[pullid];
1451 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1453 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1455 z = a*x + sqrt(1-a^2)*y
1456 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1457 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1459 C(t) = exp(-t/tau) with tau=-1/ln(a)
1461 Then, use error function to turn the Gaussian random variable into a uniformly
1462 distributed one in [0,1]. Eventually, use cumulative distribution function of
1463 histogram to get random variables distributed according to histogram.
1464 Note: The ACT of the flat distribution and of the generated histogram is not
1465 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1467 a = std::exp(-1.0 / tausteps);
1468 ap = std::sqrt(1.0 - a * a);
1469 invsqrt2 = 1.0 / std::sqrt(2.0);
1471 /* init random sequence */
1472 x = opt->normalDistribution(opt->rng);
1474 if (opt->bsMethod == bsMethod_traj)
1476 /* bootstrap points from the umbrella histograms */
1477 for (i = 0; i < N; i++)
1479 y = opt->normalDistribution(opt->rng);
1481 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1482 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1484 r = 0.5 * (1 + std::erf(x * invsqrt2));
1485 searchCumulative(thisWindow->cum[pullid], nbins + 1, r, &r_index);
1486 synthWindow->Histo[0][r_index] += 1.;
1489 else if (opt->bsMethod == bsMethod_trajGauss)
1491 /* bootstrap points from a Gaussian with the same average and sigma
1492 as the respective umbrella histogram. The idea was, that -given
1493 limited sampling- the bootstrapped histograms are otherwise biased
1494 from the limited sampling of the US histos. However, bootstrapping from
1495 the Gaussian seems to yield a similar estimate. */
1499 y = opt->normalDistribution(opt->rng);
1502 ibin = static_cast<int>(std::floor((z - opt->min) / opt->dz));
1507 while ((ibin += nbins) < 0) {}
1509 else if (ibin >= nbins)
1511 while ((ibin -= nbins) >= nbins) {}
1515 if (ibin >= 0 && ibin < nbins)
1517 synthWindow->Histo[0][ibin] += 1.;
1524 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1528 /*! \brief Write all histograms to a file
1530 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1531 * sets of bootstrapped histograms.
1533 static void print_histograms(const char* fnhist,
1534 t_UmbrellaWindow* window,
1537 t_UmbrellaOptions* opt,
1540 std::string fn, title;
1546 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1547 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1551 fn = gmx_strdup(fnhist);
1552 title = gmx::formatString("Umbrella histograms");
1555 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1558 /* Write histograms */
1559 for (l = 0; l < bins; ++l)
1561 fprintf(fp, "%e\t", (l + 0.5) * opt->dz + opt->min);
1562 for (i = 0; i < nWindows; ++i)
1564 for (j = 0; j < window[i].nPull; ++j)
1566 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1573 printf("Wrote %s\n", fn.c_str());
1576 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1577 static void setRandomBsWeights(t_UmbrellaWindow* synthwin, int nAllPull, t_UmbrellaOptions* opt)
1581 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1585 /* generate ordered random numbers between 0 and nAllPull */
1586 for (i = 0; i < nAllPull - 1; i++)
1588 r[i] = dist(opt->rng);
1590 std::sort(r, r + nAllPull - 1);
1591 r[nAllPull - 1] = 1.0 * nAllPull;
1593 synthwin[0].bsWeight[0] = r[0];
1594 for (i = 1; i < nAllPull; i++)
1596 synthwin[i].bsWeight[0] = r[i] - r[i - 1];
1599 /* avoid to have zero weight by adding a tiny value */
1600 for (i = 0; i < nAllPull; i++)
1602 if (synthwin[i].bsWeight[0] < 1e-5)
1604 synthwin[i].bsWeight[0] = 1e-5;
1611 //! The main bootstrapping routine
1612 static void do_bootstrapping(const char* fnres,
1618 t_UmbrellaWindow* window,
1620 t_UmbrellaOptions* opt)
1622 t_UmbrellaWindow* synthWindow;
1623 double * bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1624 int i, j, *randomArray = nullptr, winid, pullid, ib;
1625 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1627 gmx_bool bExact = FALSE;
1629 /* init random generator */
1630 if (opt->bsSeed == 0)
1632 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1634 opt->rng.seed(opt->bsSeed);
1636 snew(bsProfile, opt->bins);
1637 snew(bsProfiles_av, opt->bins);
1638 snew(bsProfiles_av2, opt->bins);
1640 /* Create array of all pull groups. Note that different windows
1641 may have different nr of pull groups
1642 First: Get total nr of pull groups */
1644 for (i = 0; i < nWindows; i++)
1646 nAllPull += window[i].nPull;
1648 snew(allPull_winId, nAllPull);
1649 snew(allPull_pullId, nAllPull);
1651 /* Setup one array of all pull groups */
1652 for (i = 0; i < nWindows; i++)
1654 for (j = 0; j < window[i].nPull; j++)
1656 allPull_winId[iAllPull] = i;
1657 allPull_pullId[iAllPull] = j;
1662 /* setup stuff for synthetic windows */
1663 snew(synthWindow, nAllPull);
1664 for (i = 0; i < nAllPull; i++)
1666 synthWindow[i].nPull = 1;
1667 synthWindow[i].nBin = opt->bins;
1668 snew(synthWindow[i].Histo, 1);
1669 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1671 snew(synthWindow[i].Histo[0], opt->bins);
1673 snew(synthWindow[i].N, 1);
1674 snew(synthWindow[i].pos, 1);
1675 snew(synthWindow[i].z, 1);
1676 snew(synthWindow[i].k, 1);
1677 snew(synthWindow[i].bContrib, 1);
1678 snew(synthWindow[i].g, 1);
1679 snew(synthWindow[i].bsWeight, 1);
1682 switch (opt->bsMethod)
1685 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1686 please_cite(stdout, "Hub2006");
1688 case bsMethod_BayesianHist:
1689 /* just copy all histogams into synthWindow array */
1690 for (i = 0; i < nAllPull; i++)
1692 winid = allPull_winId[i];
1693 pullid = allPull_pullId[i];
1694 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1698 case bsMethod_trajGauss: calc_cumulatives(window, nWindows, opt, fnhist, xlabel); break;
1699 default: gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1702 /* do bootstrapping */
1703 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1704 for (ib = 0; ib < opt->nBootStrap; ib++)
1706 printf(" *******************************************\n"
1707 " ******** Start bootstrap nr %d ************\n"
1708 " *******************************************\n",
1711 switch (opt->bsMethod)
1714 /* bootstrap complete histograms from given histograms */
1715 srenew(randomArray, nAllPull);
1716 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1717 for (i = 0; i < nAllPull; i++)
1719 winid = allPull_winId[randomArray[i]];
1720 pullid = allPull_pullId[randomArray[i]];
1721 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1724 case bsMethod_BayesianHist:
1725 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1726 setRandomBsWeights(synthWindow, nAllPull, opt);
1729 case bsMethod_trajGauss:
1730 /* create new histos from given histos, that is generate new hypothetical
1732 for (i = 0; i < nAllPull; i++)
1734 winid = allPull_winId[i];
1735 pullid = allPull_pullId[i];
1736 create_synthetic_histo(synthWindow + i, window + winid, pullid, opt);
1741 /* write histos in case of verbose output */
1742 if (opt->bs_verbose)
1744 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1751 std::memcpy(bsProfile, profile, opt->bins * sizeof(double)); /* use profile as guess */
1754 if ((i % opt->stepUpdateContrib) == 0)
1756 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1758 if (maxchange < opt->Tolerance)
1762 if (((i % opt->stepchange) == 0 || i == 1) && i != 0)
1764 printf("\t%4d) Maximum change %e\n", i, maxchange);
1766 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1768 } while ((maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance
1770 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1774 prof_normalization_and_unit(bsProfile, opt);
1777 /* symmetrize profile around z=0 */
1780 symmetrizeProfile(bsProfile, opt);
1783 /* save stuff to get average and stddev */
1784 for (i = 0; i < opt->bins; i++)
1787 bsProfiles_av[i] += tmp;
1788 bsProfiles_av2[i] += tmp * tmp;
1789 fprintf(fp, "%e\t%e\n", (i + 0.5) * opt->dz + opt->min, tmp);
1791 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1795 /* write average and stddev */
1796 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1797 if (output_env_get_print_xvgr_codes(opt->oenv))
1799 fprintf(fp, "@TYPE xydy\n");
1801 for (i = 0; i < opt->bins; i++)
1803 bsProfiles_av[i] /= opt->nBootStrap;
1804 bsProfiles_av2[i] /= opt->nBootStrap;
1805 tmp = bsProfiles_av2[i] - gmx::square(bsProfiles_av[i]);
1806 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1807 fprintf(fp, "%e\t%e\t%e\n", (i + 0.5) * opt->dz + opt->min, bsProfiles_av[i], stddev);
1810 printf("Wrote boot strap result to %s\n", fnres);
1813 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1814 static int whaminFileType(char* fn)
1817 len = std::strlen(fn);
1818 if (std::strcmp(fn + len - 3, "tpr") == 0)
1822 else if (std::strcmp(fn + len - 3, "xvg") == 0 || std::strcmp(fn + len - 6, "xvg.gz") == 0)
1824 return whamin_pullxf;
1826 else if (std::strcmp(fn + len - 3, "pdo") == 0 || std::strcmp(fn + len - 6, "pdo.gz") == 0)
1832 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1836 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1837 static void read_wham_in(const char* fn, char*** filenamesRet, int* nfilesRet, t_UmbrellaOptions* opt)
1839 char **filename = nullptr, tmp[WHAM_MAXFILELEN + 2];
1840 int nread, sizenow, i, block = 1;
1843 fp = gmx_ffopen(fn, "r");
1846 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1848 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1850 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1852 if (nread >= sizenow)
1855 srenew(filename, sizenow);
1856 for (i = sizenow - block; i < sizenow; i++)
1858 snew(filename[i], WHAM_MAXFILELEN);
1861 /* remove newline if there is one */
1862 if (tmp[std::strlen(tmp) - 1] == '\n')
1864 tmp[std::strlen(tmp) - 1] = '\0';
1866 std::strcpy(filename[nread], tmp);
1869 printf("Found file %s in %s\n", filename[nread], fn);
1873 *filenamesRet = filename;
1877 //! Open a file or a pipe to a gzipped file
1878 static FILE* open_pdo_pipe(const char* fn, t_UmbrellaOptions* opt, gmx_bool* bPipeOpen)
1880 char Buffer[2048], gunzip[1024], *Path = nullptr;
1881 FILE* pipe = nullptr;
1882 static gmx_bool bFirst = true;
1884 /* gzipped pdo file? */
1885 if ((std::strcmp(fn + std::strlen(fn) - 3, ".gz") == 0))
1887 /* search gunzip executable */
1888 if (!(Path = getenv("GMX_PATH_GZIP")))
1890 if (gmx_fexist("/bin/gunzip"))
1892 sprintf(gunzip, "%s", "/bin/gunzip");
1894 else if (gmx_fexist("/usr/bin/gunzip"))
1896 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1901 "Cannot find executable gunzip in /bin or /usr/bin.\n"
1902 "You may want to define the path to gunzip "
1903 "with the environment variable GMX_PATH_GZIP.");
1908 sprintf(gunzip, "%s/gunzip", Path);
1909 if (!gmx_fexist(gunzip))
1912 "Cannot find executable %s. Please define the path to gunzip"
1913 " in the environmental varialbe GMX_PATH_GZIP.",
1919 printf("Using gunzip executable %s\n", gunzip);
1922 if (!gmx_fexist(fn))
1924 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1926 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1929 printf("Executing command '%s'\n", Buffer);
1932 if ((pipe = popen(Buffer, "r")) == nullptr)
1934 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1937 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1943 pipe = gmx_ffopen(fn, "r");
1950 //! Close file or pipe
1951 static void pdo_close_file(FILE* fp)
1960 //! Reading all pdo files
1961 static void read_pdo_files(char** fn, int nfiles, t_UmbrellaHeader* header, t_UmbrellaWindow* window, t_UmbrellaOptions* opt)
1964 real mintmp, maxtmp, done = 0.;
1967 /* char Buffer0[1000]; */
1971 gmx_fatal(FARGS, "No files found. Hick.");
1974 /* if min and max are not given, get min and max from the input files */
1977 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1980 for (i = 0; i < nfiles; ++i)
1982 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1983 /*fgets(Buffer0,999,file);
1984 fprintf(stderr,"First line '%s'\n",Buffer0); */
1985 done = 100.0 * (i + 1) / nfiles;
1986 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
1992 read_pdo_header(file, header, opt);
1993 /* here only determine min and max of this window */
1994 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1995 if (maxtmp > opt->max)
1999 if (mintmp < opt->min)
2005 pdo_close_file(file);
2013 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2014 if (opt->bBoundsOnly)
2016 printf("Found option -boundsonly, now exiting.\n");
2020 /* store stepsize in profile */
2021 opt->dz = (opt->max - opt->min) / opt->bins;
2023 /* Having min and max, we read in all files */
2024 /* Loop over all files */
2025 for (i = 0; i < nfiles; ++i)
2027 done = 100.0 * (i + 1) / nfiles;
2028 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
2034 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2035 read_pdo_header(file, header, opt);
2036 /* load data into window */
2037 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
2038 if ((window + i)->Ntot[0] == 0)
2040 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2044 pdo_close_file(file);
2052 for (i = 0; i < nfiles; ++i)
2059 //! translate 0/1 to N/Y to write pull dimensions
2060 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2062 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2063 static void read_tpr_header(const char* fn, t_UmbrellaHeader* header, t_UmbrellaOptions* opt, t_coordselection* coordsel)
2065 t_inputrec irInstance;
2066 t_inputrec* ir = &irInstance;
2068 static int first = 1;
2070 /* printf("Reading %s \n",fn); */
2071 read_tpx_state(fn, ir, &state, nullptr);
2075 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2077 if (ir->pull->ncoord == 0)
2079 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2082 /* Read overall pull info */
2083 header->npullcrds = ir->pull->ncoord;
2084 header->bPrintCOM = ir->pull->bPrintCOM;
2085 header->bPrintRefValue = ir->pull->bPrintRefValue;
2086 header->bPrintComp = ir->pull->bPrintComp;
2088 /* Read pull coordinates */
2089 snew(header->pcrd, header->npullcrds);
2090 for (int i = 0; i < ir->pull->ncoord; i++)
2092 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2093 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2094 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2095 /* Angle type coordinates are handled fully in degrees in gmx wham.
2096 * The pull "distance" appears with a power of -2 in the force constant,
2097 * so we need to multiply with the internal units (radians for angle)
2098 * to user units (degrees for an angle) with the same power.
2101 ir->pull->coord[i].k
2102 / gmx::square(pull_conversion_factor_internal2userinput(&ir->pull->coord[i]));
2103 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2105 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2106 header->pcrd[i].ndim =
2107 header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2109 std::strcpy(header->pcrd[i].coord_unit, pull_coordinate_units(&ir->pull->coord[i]));
2111 if (ir->efep != FreeEnergyPerturbationType::No && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2114 "Seems like you did free-energy perturbation, and you perturbed the force "
2116 " This is not supported.\n");
2118 if (coordsel && (coordsel->n != ir->pull->ncoord))
2121 "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2122 "coordinate selection file (option -is)\n",
2129 /* Check pull coords for consistency */
2130 PullGroupGeometry geom = PullGroupGeometry::Count;
2131 ivec thedim = { 0, 0, 0 };
2132 bool geometryIsSet = false;
2133 for (int i = 0; i < ir->pull->ncoord; i++)
2135 if (coordsel == nullptr || coordsel->bUse[i])
2137 if (header->pcrd[i].pull_type != PullingAlgorithm::Umbrella)
2140 "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only "
2141 "umbrella coodinates can enter WHAM.\n"
2142 "If you have umrella and non-umbrella coordinates, you can select the "
2143 "umbrella coordinates with gmx wham -is\n",
2146 enumValueToString(header->pcrd[i].pull_type));
2150 geom = header->pcrd[i].geometry;
2151 copy_ivec(header->pcrd[i].dim, thedim);
2152 geometryIsSet = true;
2154 if (geom != header->pcrd[i].geometry)
2157 "%s: Your pull coordinates have different pull geometry (coordinate 1: "
2158 "%s, coordinate %d: %s)\n"
2159 "If you want to use only some pull coordinates in WHAM, please select "
2160 "them with option gmx wham -is\n",
2162 enumValueToString(geom),
2164 enumValueToString(header->pcrd[i].geometry));
2166 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY]
2167 || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2170 "%s: Your pull coordinates have different pull dimensions (coordinate 1: "
2171 "%s %s %s, coordinate %d: %s %s %s)\n"
2172 "If you want to use only some pull coordinates in WHAM, please select "
2173 "them with option gmx wham -is\n",
2179 int2YN(header->pcrd[i].dim[XX]),
2180 int2YN(header->pcrd[i].dim[YY]),
2181 int2YN(header->pcrd[i].dim[ZZ]));
2183 if (header->pcrd[i].geometry == PullGroupGeometry::Cylinder)
2185 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2189 "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2190 "However, found dimensions [%s %s %s]\n",
2191 int2YN(header->pcrd[i].dim[XX]),
2192 int2YN(header->pcrd[i].dim[YY]),
2193 int2YN(header->pcrd[i].dim[ZZ]));
2196 if (header->pcrd[i].k <= 0.0)
2199 "%s: Pull coordinate %d has force constant of of %g.\n"
2200 "That doesn't seem to be an Umbrella tpr.\n",
2208 if (opt->verbose || first)
2210 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2212 for (int i = 0; i < ir->pull->ncoord; i++)
2214 int lentmp = strlen(enumValueToString(header->pcrd[i].geometry));
2215 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2219 "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d "
2220 "dimensions). Used: %%s\n",
2222 for (int i = 0; i < ir->pull->ncoord; i++)
2224 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2226 enumValueToString(header->pcrd[i].geometry),
2228 header->pcrd[i].init_dist,
2229 int2YN(header->pcrd[i].dim[XX]),
2230 int2YN(header->pcrd[i].dim[YY]),
2231 int2YN(header->pcrd[i].dim[ZZ]),
2232 header->pcrd[i].ndim,
2233 use ? "Yes" : "No");
2234 printf("\tPull group coordinates of %d groups expected in pullx files.\n",
2235 ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2237 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2238 header->bPrintRefValue ? "" : " not");
2240 if (!opt->verbose && first)
2242 printf("\tUse option -v to see this output for all input tpr files\n\n");
2248 //! Read pullx.xvg or pullf.xvg
2249 static void read_pull_xf(const char* fn,
2250 t_UmbrellaHeader* header,
2251 t_UmbrellaWindow* window,
2252 t_UmbrellaOptions* opt,
2253 gmx_bool bGetMinMax,
2256 t_coordselection* coordsel)
2258 double ** y = nullptr, pos = 0., t, force, time0 = 0., dt;
2259 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2260 int nColExpect, ntot, column;
2261 real min, max, minfound = 1e20, maxfound = -1e20;
2262 gmx_bool dt_ok, timeok;
2263 const char* quantity;
2264 const int blocklen = 4096;
2265 int* lennow = nullptr;
2266 static gmx_bool bFirst = TRUE;
2269 * Data columns in pull output:
2270 * - in force output pullf.xvg:
2271 * No reference columns, one column per pull coordinate
2273 * - in position output pullx.xvg:
2274 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2275 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2276 * be written separately into pullx file, but this is not supported and throws an error below;
2277 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2280 if (header->bPrintComp && opt->bPullx)
2284 "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2285 "(mdp option pull-print-components). Provide the pull force files instead (with "
2289 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2290 snew(nColThisCrd, header->npullcrds);
2291 snew(nColCOMCrd, header->npullcrds);
2292 snew(nColRefCrd, header->npullcrds);
2296 /* pullf reading: simply one column per coordinate */
2297 for (g = 0; g < header->npullcrds; g++)
2306 /* pullx reading. Note explanation above. */
2307 for (g = 0; g < header->npullcrds; g++)
2309 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2310 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim * header->pcrd[g].ngroup : 0);
2311 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2315 nColExpect = 1; /* time column */
2316 for (g = 0; g < header->npullcrds; g++)
2318 nColExpect += nColThisCrd[g];
2321 /* read pullf or pullx. Could be optimized if min and max are given. */
2322 nt = read_xvg(fn, &y, &ny);
2324 /* Check consistency */
2325 quantity = opt->bPullx ? "position" : "force";
2328 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2330 if (bFirst || opt->verbose)
2332 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2333 for (i = 0; i < header->npullcrds; i++)
2335 printf("\tColumns for pull coordinate %d\n", i + 1);
2336 printf("\t\treaction coordinate: %d\n"
2337 "\t\tcenter-of-mass of groups: %d\n"
2338 "\t\treference position column: %s\n",
2341 (header->bPrintRefValue ? "Yes" : "No"));
2343 printf("\tFound %d times in %s\n", nt, fn);
2346 if (nColExpect != ny)
2349 "Expected %d columns (including time column) in %s, but found %d."
2350 " Maybe you confused options -if and -ix ?",
2364 window->dt = y[0][1] - y[0][0];
2366 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2368 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2371 /* Need to alocate memory and set up structure for windows */
2374 /* Use only groups selected with option -is file */
2375 if (header->npullcrds != coordsel->n)
2378 "tpr file contains %d pull groups, but expected %d from group selection "
2383 window->nPull = coordsel->nUse;
2387 window->nPull = header->npullcrds;
2390 window->nBin = bins;
2391 snew(window->Histo, window->nPull);
2392 snew(window->z, window->nPull);
2393 snew(window->k, window->nPull);
2394 snew(window->pos, window->nPull);
2395 snew(window->N, window->nPull);
2396 snew(window->Ntot, window->nPull);
2397 snew(window->g, window->nPull);
2398 snew(window->bsWeight, window->nPull);
2399 window->bContrib = nullptr;
2401 if (opt->bCalcTauInt)
2403 snew(window->ztime, window->nPull);
2407 window->ztime = nullptr;
2409 snew(lennow, window->nPull);
2411 for (g = 0; g < window->nPull; ++g)
2414 window->bsWeight[g] = 1.;
2416 window->Ntot[g] = 0;
2418 snew(window->Histo[g], bins);
2420 if (opt->bCalcTauInt)
2422 window->ztime[g] = nullptr;
2426 /* Copying umbrella center and force const is more involved since not
2427 all pull groups from header (tpr file) may be used in window variable */
2428 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2430 if (coordsel && !coordsel->bUse[g])
2434 window->k[gUsed] = header->pcrd[g].k;
2435 window->pos[gUsed] = header->pcrd[g].init_dist;
2440 { /* only determine min and max */
2443 min = max = bins = 0; /* Get rid of warnings */
2447 for (i = 0; i < nt; i++)
2449 /* Do you want that time frame? */
2450 t = 1.0 / 1000 * (gmx::roundToInt64((y[0][i] * 1000))); /* round time to fs */
2452 /* get time step of pdo file and get dstep from opt->dt */
2462 dstep = gmx::roundToInt(opt->dt / dt);
2470 window->dt = dt * dstep;
2474 dt_ok = (i % dstep == 0);
2475 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2477 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2478 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2481 /* Note: if coordsel == NULL:
2482 * all groups in pullf/x file are stored in this window, and gUsed == g
2483 * if coordsel != NULL:
2484 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2487 for (g = 0; g < header->npullcrds; ++g)
2489 /* was this group selected for application in WHAM? */
2490 if (coordsel && !coordsel->bUse[g])
2498 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2499 force = y[g + 1][i];
2500 pos = -force / header->pcrd[g].k + header->pcrd[g].init_dist;
2504 /* Pick the correct column index.
2505 Note that there is always exactly one displacement column.
2508 for (int j = 0; j < g; j++)
2510 column += nColThisCrd[j];
2515 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2529 if (gUsed >= window->nPull)
2532 "gUsed too large (%d, nPull=%d). This error should have been "
2538 if (opt->bCalcTauInt && !bGetMinMax)
2540 /* save time series for autocorrelation analysis */
2541 ntot = window->Ntot[gUsed];
2542 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2543 if (ntot >= lennow[gUsed])
2545 lennow[gUsed] += blocklen;
2546 srenew(window->ztime[gUsed], lennow[gUsed]);
2548 window->ztime[gUsed][ntot] = pos;
2551 ibin = static_cast<int>(std::floor((pos - min) / (max - min) * bins));
2556 while ((ibin += bins) < 0) {}
2558 else if (ibin >= bins)
2560 while ((ibin -= bins) >= bins) {}
2563 if (ibin >= 0 && ibin < bins)
2565 window->Histo[gUsed][ibin] += 1.;
2568 window->Ntot[gUsed]++;
2572 else if (t > opt->tmax)
2576 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2588 for (i = 0; i < ny; i++)
2594 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2595 static void read_tpr_pullxf_files(char** fnTprs,
2598 t_UmbrellaHeader* header,
2599 t_UmbrellaWindow* window,
2600 t_UmbrellaOptions* opt)
2603 real mintmp, maxtmp;
2605 printf("Reading %d tpr and pullf files\n", nfiles);
2607 /* min and max not given? */
2610 printf("Automatic determination of boundaries...\n");
2613 for (i = 0; i < nfiles; i++)
2615 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2617 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2619 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2620 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2623 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",
2626 read_pull_xf(fnPull[i],
2633 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2634 if (maxtmp > opt->max)
2638 if (mintmp < opt->min)
2643 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2644 if (opt->bBoundsOnly)
2646 printf("Found option -boundsonly, now exiting.\n");
2650 /* store stepsize in profile */
2651 opt->dz = (opt->max - opt->min) / opt->bins;
2653 bool foundData = false;
2654 for (i = 0; i < nfiles; i++)
2656 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2658 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2660 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2661 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2664 FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2666 read_pull_xf(fnPull[i],
2673 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2674 if (window[i].Ntot[0] == 0)
2676 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2686 "No data points were found in pullf/pullx files. Maybe you need to specify the "
2690 for (i = 0; i < nfiles; i++)
2699 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2701 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2702 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2704 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow* window, int nwins, const char* fn)
2706 int nlines, ny, i, ig;
2709 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2710 nlines = read_xvg(fn, &iact, &ny);
2711 if (nlines != nwins)
2714 "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2719 for (i = 0; i < nlines; i++)
2721 if (window[i].nPull != ny)
2724 "You are providing autocorrelation times with option -iiact and the\n"
2725 "number of pull groups is different in different simulations. That is not\n"
2726 "supported yet. Sorry.\n");
2728 for (ig = 0; ig < window[i].nPull; ig++)
2730 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2731 window[i].g[ig] = 1 + 2 * iact[ig][i] / window[i].dt;
2733 if (iact[ig][i] <= 0.0)
2735 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2742 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2745 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2746 * that -in case of limited sampling- the ACT may be severely underestimated.
2747 * Note: the g=1+2tau are overwritten.
2748 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2751 static void smoothIact(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
2754 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2756 /* only evaluate within +- 3sigma of the Gausian */
2757 siglim = 3.0 * opt->sigSmoothIact;
2758 siglim2 = gmx::square(siglim);
2759 /* pre-factor of Gaussian */
2760 gaufact = 1.0 / (std::sqrt(2 * M_PI) * opt->sigSmoothIact);
2761 invtwosig2 = 0.5 / gmx::square(opt->sigSmoothIact);
2763 for (i = 0; i < nwins; i++)
2765 snew(window[i].tausmooth, window[i].nPull);
2766 for (ig = 0; ig < window[i].nPull; ig++)
2770 pos = window[i].pos[ig];
2771 for (j = 0; j < nwins; j++)
2773 for (jg = 0; jg < window[j].nPull; jg++)
2775 dpos2 = gmx::square(window[j].pos[jg] - pos);
2776 if (dpos2 < siglim2)
2778 w = gaufact * std::exp(-dpos2 * invtwosig2);
2780 tausm += w * window[j].tau[jg];
2781 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2782 w,dpos2,pos,gaufact,invtwosig2); */
2787 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2789 window[i].tausmooth[ig] = tausm;
2793 window[i].tausmooth[ig] = window[i].tau[ig];
2795 window[i].g[ig] = 1 + 2 * tausm / window[i].dt;
2800 //! Stop integrating autoccorelation function when ACF drops under this value
2801 #define WHAM_AC_ZERO_LIMIT 0.05
2803 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2805 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow* window,
2807 t_UmbrellaOptions* opt,
2811 int i, ig, ncorr, ntot, j, k, *count, restart;
2812 real *corr, c0, dt, tmp;
2813 real *ztime, av, tausteps;
2814 FILE *fp, *fpcorr = nullptr;
2818 fpcorr = xvgropen("hist_autocorr.xvg",
2819 "Autocorrelation functions of umbrella windows",
2821 "autocorrelation function",
2826 for (i = 0; i < nwins; i++)
2829 "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...",
2830 100. * (i + 1) / nwins);
2832 ntot = window[i].Ntot[0];
2834 /* using half the maximum time as length of autocorrelation function */
2839 "Tryig to estimtate autocorrelation time from only %d"
2840 " points. Provide more pull data!",
2844 /* snew(corrSq,ncorr); */
2847 snew(window[i].tau, window[i].nPull);
2848 restart = gmx::roundToInt(opt->acTrestart / dt);
2854 for (ig = 0; ig < window[i].nPull; ig++)
2856 if (ntot != window[i].Ntot[ig])
2859 "Encountered different nr of frames in different pull groups.\n"
2860 "That should not happen. (%d and %d)\n",
2862 window[i].Ntot[ig]);
2864 ztime = window[i].ztime[ig];
2866 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2867 for (j = 0, av = 0; (j < ntot); j++)
2872 for (k = 0; (k < ncorr); k++)
2877 for (j = 0; (j < ntot); j += restart)
2879 for (k = 0; (k < ncorr) && (j + k < ntot); k++)
2881 tmp = (ztime[j] - av) * (ztime[j + k] - av);
2883 /* corrSq[k] += tmp*tmp; */
2887 /* divide by nr of frames for each time displacement */
2888 for (k = 0; (k < ncorr); k++)
2890 /* count probably = (ncorr-k+(restart-1))/restart; */
2891 corr[k] = corr[k] / count[k];
2892 /* variance of autocorrelation function */
2893 /* corrSq[k]=corrSq[k]/count[k]; */
2895 /* normalize such that corr[0] == 0 */
2897 for (k = 0; (k < ncorr); k++)
2900 /* corrSq[k]*=c0*c0; */
2903 /* write ACFs in verbose mode */
2906 for (k = 0; (k < ncorr); k++)
2908 fprintf(fpcorr, "%g %g\n", k * dt, corr[k]);
2910 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2913 /* esimate integrated correlation time, fitting is too unstable */
2914 tausteps = 0.5 * corr[0];
2915 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2916 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2918 tausteps += corr[j];
2921 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2922 Kumar et al, eq. 28 ff. */
2923 window[i].tau[ig] = tausteps * dt;
2924 window[i].g[ig] = 1 + 2 * tausteps;
2925 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2936 /* plot IACT along reaction coordinate */
2937 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2938 if (output_env_get_print_xvgr_codes(opt->oenv))
2940 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2941 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2942 for (i = 0; i < nwins; i++)
2944 fprintf(fp, "# %3d ", i);
2945 for (ig = 0; ig < window[i].nPull; ig++)
2947 fprintf(fp, " %11g", window[i].tau[ig]);
2952 for (i = 0; i < nwins; i++)
2954 for (ig = 0; ig < window[i].nPull; ig++)
2956 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2959 if (opt->sigSmoothIact > 0.0)
2961 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = "
2963 opt->sigSmoothIact);
2964 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2965 smoothIact(window, nwins, opt);
2966 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2967 if (output_env_get_print_xvgr_codes(opt->oenv))
2969 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2970 fprintf(fp, "@ s1 symbol color 2\n");
2972 for (i = 0; i < nwins; i++)
2974 for (ig = 0; ig < window[i].nPull; ig++)
2976 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2981 printf("Wrote %s\n", fn);
2985 * compute average and sigma of each umbrella histogram
2987 static void averageSigma(t_UmbrellaWindow* window, int nwins)
2990 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2992 for (i = 0; i < nwins; i++)
2994 snew(window[i].aver, window[i].nPull);
2995 snew(window[i].sigma, window[i].nPull);
2997 ntot = window[i].Ntot[0];
2998 for (ig = 0; ig < window[i].nPull; ig++)
3000 ztime = window[i].ztime[ig];
3001 for (k = 0, av = 0.; k < ntot; k++)
3006 for (k = 0, sum2 = 0.; k < ntot; k++)
3008 diff = ztime[k] - av;
3009 sum2 += diff * diff;
3011 sig = std::sqrt(sum2 / ntot);
3012 window[i].aver[ig] = av;
3014 /* Note: This estimate for sigma is biased from the limited sampling.
3015 Correct sigma by n/(n-1) where n = number of independent
3016 samples. Only possible if IACT is known.
3020 nSamplesIndep = window[i].N[ig] / (window[i].tau[ig] / window[i].dt);
3021 window[i].sigma[ig] = sig * nSamplesIndep / (nSamplesIndep - 1);
3025 window[i].sigma[ig] = sig;
3027 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
3034 * Use histograms to compute average force on pull group.
3036 static void computeAverageForce(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
3038 int i, j, bins = opt->bins, k;
3039 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
3042 dz = (max - min) / bins;
3043 ztot = opt->max - min;
3044 ztot_half = ztot / 2;
3046 /* Compute average displacement from histograms */
3047 for (j = 0; j < nWindows; ++j)
3049 snew(window[j].forceAv, window[j].nPull);
3050 for (k = 0; k < window[j].nPull; ++k)
3054 for (i = 0; i < opt->bins; ++i)
3056 temp = (1.0 * i + 0.5) * dz + min;
3057 distance = temp - window[j].pos[k];
3059 { /* in cyclic wham: */
3060 if (distance > ztot_half) /* |distance| < ztot_half */
3064 else if (distance < -ztot_half)
3069 w = window[j].Histo[k][i] / window[j].g[k];
3070 displAv += w * distance;
3072 /* Are we near min or max? We are getting wrong forces from the histgrams since
3073 the histograms are zero outside [min,max). Therefore, assume that the position
3074 on the other side of the histomgram center is equally likely. */
3077 posmirrored = window[j].pos[k] - distance;
3078 if (posmirrored >= max || posmirrored < min)
3080 displAv += -w * distance;
3087 /* average force from average displacement */
3088 window[j].forceAv[k] = displAv * window[j].k[k];
3089 /* sigma from average square displacement */
3090 /* window[j].sigma [k] = sqrt(displAv2); */
3091 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
3097 * Check if the complete reaction coordinate is covered by the histograms
3099 static void checkReactionCoordinateCovered(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
3101 int i, ig, j, bins = opt->bins;
3103 real avcount = 0, z, relcount, *count;
3104 snew(count, opt->bins);
3106 for (j = 0; j < opt->bins; ++j)
3108 for (i = 0; i < nwins; i++)
3110 for (ig = 0; ig < window[i].nPull; ig++)
3112 count[j] += window[i].Histo[ig][j];
3115 avcount += 1.0 * count[j];
3118 for (j = 0; j < bins; ++j)
3120 relcount = count[j] / avcount;
3121 z = (j + 0.5) * opt->dz + opt->min;
3122 bBoundary = j < bins / 20 || (bins - j) > bins / 20;
3123 /* check for bins with no data */
3127 "\nWARNING, no data point in bin %d (z=%g) !\n"
3128 "You may not get a reasonable profile. Check your histograms!\n",
3132 /* and check for poor sampling */
3133 else if (relcount < 0.005 && !bBoundary)
3135 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3141 /*! \brief Compute initial potential by integrating the average force
3143 * This speeds up the convergence by roughly a factor of 2
3145 static void guessPotByIntegration(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, const char* xlabel)
3147 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3148 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3151 dz = (opt->max - min) / bins;
3153 printf("Getting initial potential by integration.\n");
3155 /* Compute average displacement from histograms */
3156 computeAverageForce(window, nWindows, opt);
3158 /* Get force for each bin from all histograms in this bin, or, alternatively,
3159 if no histograms are inside this bin, from the closest histogram */
3162 for (j = 0; j < opt->bins; ++j)
3164 pos = (1.0 * j + 0.5) * dz + min;
3168 groupmin = winmin = 0;
3169 for (i = 0; i < nWindows; i++)
3171 for (ig = 0; ig < window[i].nPull; ig++)
3173 hispos = window[i].pos[ig];
3174 dist = std::abs(hispos - pos);
3175 /* average force within bin */
3179 fAv += window[i].forceAv[ig];
3181 /* at the same time, remember closest histogram */
3190 /* if no histogram found in this bin, use closest histogram */
3197 fAv = window[winmin].forceAv[groupmin];
3201 for (j = 1; j < opt->bins; ++j)
3203 pot[j] = pot[j - 1] - 0.5 * dz * (f[j - 1] + f[j]);
3206 /* cyclic wham: linearly correct possible offset */
3209 diff = (pot[bins - 1] - pot[0]) / (bins - 1);
3210 for (j = 1; j < opt->bins; ++j)
3217 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3218 for (j = 0; j < opt->bins; ++j)
3220 fprintf(fp, "%g %g\n", (j + 0.5) * dz + opt->min, pot[j]);
3223 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3226 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3227 energy offsets which are usually determined by wham
3228 First: turn pot into probabilities:
3230 for (j = 0; j < opt->bins; ++j)
3232 pot[j] = std::exp(-pot[j] / (BOLTZ * opt->Temperature));
3234 calc_z(pot, window, nWindows, opt, TRUE);
3240 //! Count number of words in an line
3241 static int wordcount(char* ptr)
3246 if (std::strlen(ptr) == 0)
3250 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3252 for (i = 0; (ptr[i] != '\0'); i++)
3254 is[cur] = isspace(ptr[i]);
3255 if ((i > 0) && (is[cur] && !is[1 - cur]))
3264 /*! \brief Read input file for pull group selection (option -is)
3266 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3268 static void readPullCoordSelection(t_UmbrellaOptions* opt, char** fnTpr, int nTpr)
3271 int i, iline, n, len = STRLEN, temp;
3272 char *ptr = nullptr, *tmpbuf = nullptr;
3273 char fmt[1024], fmtign[1024];
3274 int block = 1, sizenow;
3276 fp = gmx_ffopen(opt->fnCoordSel, "r");
3277 opt->coordsel = nullptr;
3282 while ((ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3287 if (iline >= sizenow)
3290 srenew(opt->coordsel, sizenow);
3292 opt->coordsel[iline].n = n;
3293 opt->coordsel[iline].nUse = 0;
3294 snew(opt->coordsel[iline].bUse, n);
3297 for (i = 0; i < n; i++)
3299 std::strcpy(fmt, fmtign);
3300 std::strcat(fmt, "%d");
3301 if (sscanf(ptr, fmt, &temp))
3303 opt->coordsel[iline].bUse[i] = (temp > 0);
3304 if (opt->coordsel[iline].bUse[i])
3306 opt->coordsel[iline].nUse++;
3309 std::strcat(fmtign, "%*s");
3313 opt->nCoordsel = iline;
3314 if (nTpr != opt->nCoordsel)
3316 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel, opt->fnCoordSel);
3319 printf("\nUse only these pull coordinates:\n");
3320 for (iline = 0; iline < nTpr; iline++)
3322 printf("%s (%d of %d coordinates):",
3324 opt->coordsel[iline].nUse,
3325 opt->coordsel[iline].n);
3326 for (i = 0; i < opt->coordsel[iline].n; i++)
3328 if (opt->coordsel[iline].bUse[i])
3330 printf(" %d", i + 1);
3341 #define WHAMBOOLXOR(a, b) (((!(a)) && (b)) || ((a) && (!(b))))
3343 //! Number of elements in fnm (used for command line parsing)
3344 #define NFILE asize(fnm)
3346 //! The main gmx wham routine
3347 int gmx_wham(int argc, char* argv[])
3349 const char* desc[] = {
3350 "[THISMODULE] is an analysis program that implements the Weighted",
3351 "Histogram Analysis Method (WHAM). It is intended to analyze",
3352 "output files generated by umbrella sampling simulations to ",
3353 "compute a potential of mean force (PMF).[PAR]",
3355 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3356 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3357 "and, if multiple coordinates need to be analyzed, all used the same",
3358 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3359 "At present, three input modes are supported.",
3361 "* With option [TT]-it[tt], the user provides a file which contains the",
3362 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3363 " AND, with option [TT]-ix[tt], a file which contains file names of",
3364 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3365 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3366 " first pullx, etc.",
3367 "* Same as the previous input mode, except that the user",
3368 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3369 " From the pull force the position in the umbrella potential is",
3370 " computed. This does not work with tabulated umbrella potentials.",
3371 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] ",
3373 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3374 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3375 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file ",
3376 " header must be similar to the following::",
3379 " # Component selection: 0 0 1",
3381 " # Ref. Group 'TestAtom'",
3382 " # Nr. of pull groups 2",
3383 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3384 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3387 " The number of pull groups, umbrella positions, force constants, and names ",
3388 " may (of course) differ. Following the header, a time column and ",
3389 " a data column for each pull group follows (i.e. the displacement",
3390 " with respect to the umbrella center). Up to four pull groups are possible ",
3391 " per [REF].pdo[ref] file at present.[PAR]",
3392 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If ",
3394 "some of the pull coordinates should be used, a pull coordinate selection file (option ",
3395 "[TT]-is[tt]) can ",
3396 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3397 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr ",
3399 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. ",
3401 "If you have three tpr files, each containing 4 pull coordinates, but only pull ",
3402 "coordinates 1 and 2 should be ",
3403 "used, coordsel.dat looks like this::",
3409 "By default, the output files are::",
3411 " [TT]-o[tt] PMF output file",
3412 " [TT]-hist[tt] Histograms output file",
3414 "Always check whether the histograms sufficiently overlap.[PAR]",
3415 "The umbrella potential is assumed to be harmonic and the force constants are ",
3416 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force ",
3418 "a tabulated potential can be provided with [TT]-tab[tt].",
3423 "* [TT]-bins[tt] Number of bins used in analysis",
3424 "* [TT]-temp[tt] Temperature in the simulations",
3425 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3426 "* [TT]-auto[tt] Automatic determination of boundaries",
3427 "* [TT]-min,-max[tt] Boundaries of the profile",
3429 "The data points that are used to compute the profile",
3430 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3431 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3432 "umbrella window.[PAR]",
3433 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3434 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3435 "With energy output, the energy in the first bin is defined to be zero. ",
3436 "If you want the free energy at a different ",
3437 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3438 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3439 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3440 "[THISMODULE] will make use of the",
3441 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3442 "reaction coordinate will assumed be be neighbors.[PAR]",
3443 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3444 "which may be useful for, e.g. membranes.",
3449 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3450 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3455 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3456 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3457 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3458 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3459 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3460 "Because the IACTs can be severely underestimated in case of limited ",
3461 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3462 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3463 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3464 "integration of the ACFs while the ACFs are larger 0.05.",
3465 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3466 "less robust) method such as fitting to a double exponential, you can ",
3467 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3468 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3469 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the ",
3475 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3476 "otherwise the statistical error may be substantially underestimated. ",
3477 "More background and examples for the bootstrap technique can be found in ",
3478 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3479 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3480 "Four bootstrapping methods are supported and ",
3481 "selected with [TT]-bs-method[tt].",
3483 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3484 " data points, and the bootstrap is carried out by assigning random weights to the ",
3485 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3486 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3487 " statistical error is underestimated.",
3488 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3489 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3490 " (allowing duplication, i.e. sampling with replacement).",
3491 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3492 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3493 " divided into blocks and only histograms within each block are mixed. Note that ",
3494 " the histograms within each block must be representative for all possible histograms, ",
3495 " otherwise the statistical error is underestimated.",
3496 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3497 " such that the generated data points are distributed according the given histograms ",
3498 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3499 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3500 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3501 " Note that this method may severely underestimate the error in case of limited ",
3503 " that is if individual histograms do not represent the complete phase space at ",
3504 " the respective positions.",
3505 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3506 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3507 " and width of the umbrella histograms. That method yields similar error estimates ",
3508 " like method [TT]traj[tt].",
3510 "Bootstrapping output:",
3512 "* [TT]-bsres[tt] Average profile and standard deviations",
3513 "* [TT]-bsprof[tt] All bootstrapping profiles",
3515 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3516 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3520 const char* en_unit[] = { nullptr, "kJ", "kCal", "kT", nullptr };
3521 const char* en_unit_label[] = { "", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr };
3522 const char* en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3523 static t_UmbrellaOptions opt;
3526 { "-min", FALSE, etREAL, { &opt.min }, "Minimum coordinate in profile" },
3527 { "-max", FALSE, etREAL, { &opt.max }, "Maximum coordinate in profile" },
3528 { "-auto", FALSE, etBOOL, { &opt.bAuto }, "Determine min and max automatically" },
3529 { "-bins", FALSE, etINT, { &opt.bins }, "Number of bins in profile" },
3530 { "-temp", FALSE, etREAL, { &opt.Temperature }, "Temperature" },
3531 { "-tol", FALSE, etREAL, { &opt.Tolerance }, "Tolerance" },
3532 { "-v", FALSE, etBOOL, { &opt.verbose }, "Verbose mode" },
3533 { "-b", FALSE, etREAL, { &opt.tmin }, "First time to analyse (ps)" },
3534 { "-e", FALSE, etREAL, { &opt.tmax }, "Last time to analyse (ps)" },
3535 { "-dt", FALSE, etREAL, { &opt.dt }, "Analyse only every dt ps" },
3536 { "-histonly", FALSE, etBOOL, { &opt.bHistOnly }, "Write histograms and exit" },
3540 { &opt.bBoundsOnly },
3541 "Determine min and max and exit (with [TT]-auto[tt])" },
3542 { "-log", FALSE, etBOOL, { &opt.bLog }, "Calculate the log of the profile before printing" },
3543 { "-unit", FALSE, etENUM, { en_unit }, "Energy unit in case of log output" },
3548 "Define profile to 0.0 at this position (with [TT]-log[tt])" },
3553 "Create cyclic/periodic profile. Assumes min and max are the same point." },
3554 { "-sym", FALSE, etBOOL, { &opt.bSym }, "Symmetrize profile around z=0" },
3559 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)" },
3563 { &opt.bCalcTauInt },
3564 "Calculate integrated autocorrelation times and use in wham" },
3568 { &opt.sigSmoothIact },
3569 "Smooth autocorrelation times along reaction coordinate with Gaussian of this "
3570 "[GRK]sigma[grk]" },
3574 { &opt.acTrestart },
3575 "When computing autocorrelation functions, restart computing every .. (ps)" },
3579 { &opt.bAllowReduceIact },
3580 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3581 "during smoothing" },
3585 { &opt.nBootStrap },
3586 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3587 { "-bs-method", FALSE, etENUM, { en_bsMethod }, "Bootstrap method" },
3591 { &opt.tauBootStrap },
3592 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is "
3594 { "-bs-seed", FALSE, etINT, { &opt.bsSeed }, "Seed for bootstrapping. (-1 = use time)" },
3598 { &opt.histBootStrapBlockLength },
3599 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]." },
3603 { &opt.bs_verbose },
3604 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap." },
3608 { &opt.stepchange },
3609 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])" },
3613 { &opt.stepUpdateContrib },
3614 "HIDDENUpdate table with significan contributions to WHAM every ... iterations" },
3618 { efDAT, "-ix", "pullx-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3619 { efDAT, "-if", "pullf-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3620 { efDAT, "-it", "tpr-files", ffOPTRD }, /* wham input: tprs */
3621 { efDAT, "-ip", "pdo-files", ffOPTRD }, /* wham input: pdo files (gmx3 style) */
3622 { efDAT, "-is", "coordsel", ffOPTRD }, /* input: select pull coords to use */
3623 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3624 { efXVG, "-hist", "histo", ffWRITE }, /* output file for histograms */
3625 { efXVG, "-oiact", "iact", ffOPTWR }, /* writing integrated autocorrelation times */
3626 { efDAT, "-iiact", "iact-in", ffOPTRD }, /* reading integrated autocorrelation times */
3627 { efXVG, "-bsres", "bsResult", ffOPTWR }, /* average and errors of bootstrap analysis */
3628 { efXVG, "-bsprof", "bsProfs", ffOPTWR }, /* output file for bootstrap profiles */
3629 { efDAT, "-tab", "umb-pot", ffOPTRD }, /* Tabulated umbrella potential (if not harmonic) */
3632 int i, j, l, nfiles, nwins, nfiles2;
3633 t_UmbrellaHeader header;
3634 t_UmbrellaWindow* window = nullptr;
3635 double * profile, maxchange = 1e20;
3636 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3637 char ** fninTpr, **fninPull, **fninPdo;
3639 FILE * histout, *profout;
3640 char xlabel[STRLEN], ylabel[256], title[256];
3643 opt.verbose = FALSE;
3644 opt.bHistOnly = FALSE;
3653 opt.coordsel = nullptr;
3655 /* bootstrapping stuff */
3657 opt.bsMethod = bsMethod_hist;
3658 opt.tauBootStrap = 0.0;
3660 opt.histBootStrapBlockLength = 8;
3661 opt.bs_verbose = FALSE;
3666 opt.Temperature = 298;
3667 opt.Tolerance = 1e-6;
3668 opt.bBoundsOnly = FALSE;
3670 opt.bCalcTauInt = FALSE;
3671 opt.sigSmoothIact = 0.0;
3672 opt.bAllowReduceIact = TRUE;
3673 opt.bInitPotByIntegration = TRUE;
3674 opt.acTrestart = 1.0;
3675 opt.stepchange = 100;
3676 opt.stepUpdateContrib = 100;
3678 if (!parse_common_args(
3679 &argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3684 opt.unit = nenum(en_unit);
3685 opt.bsMethod = nenum(en_bsMethod);
3687 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3689 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3690 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3691 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3692 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3693 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3694 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3695 if (opt.bTab && opt.bPullf)
3698 "Force input does not work with tabulated potentials. "
3699 "Provide pullx.xvg or pdo files!");
3702 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3704 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3706 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3709 "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3710 "\n\n Check gmx wham -h !");
3713 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3714 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3715 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3716 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3717 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3719 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3720 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3721 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3722 if ((bMinSet || bMaxSet) && bAutoSet)
3724 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3727 if ((bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3729 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3732 if (bMinSet && opt.bAuto)
3734 printf("Note: min and max given, switching off -auto.\n");
3738 if (opt.bTauIntGiven && opt.bCalcTauInt)
3741 "Either read (option -iiact) or calculate (option -ac) the\n"
3742 "the autocorrelation times. Not both.");
3745 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3748 "Either compute autocorrelation times (ACTs) (option -ac) or "
3749 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3751 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3754 "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3755 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3758 /* Reading gmx4/gmx5 pull output and tpr files */
3759 if (opt.bTpr || opt.bPullf || opt.bPullx)
3761 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3763 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3764 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3765 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3768 opt.bPullf ? "force" : "position",
3771 if (nfiles != nfiles2)
3773 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles, opt.fnTpr, nfiles2, fnPull);
3776 /* Read file that selects the pull group to be used */
3777 if (opt.fnCoordSel != nullptr)
3779 readPullCoordSelection(&opt, fninTpr, nfiles);
3782 window = initUmbrellaWindows(nfiles);
3783 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3786 { /* reading pdo files */
3787 if (opt.fnCoordSel != nullptr)
3790 "Reading a -is file is not supported with PDO input files.\n"
3791 "Use awk or a similar tool to pick the required pull groups from your PDO "
3794 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3795 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3796 window = initUmbrellaWindows(nfiles);
3797 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3800 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3801 We can therefore get the units for the xlabel from the first coordinate. */
3802 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3806 /* enforce equal weight for all histograms? */
3809 enforceEqualWeights(window, nwins);
3812 /* write histograms */
3813 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms", xlabel, "count", opt.oenv);
3814 for (l = 0; l < opt.bins; ++l)
3816 fprintf(histout, "%e\t", (l + 0.5) / opt.bins * (opt.max - opt.min) + opt.min);
3817 for (i = 0; i < nwins; ++i)
3819 for (j = 0; j < window[i].nPull; ++j)
3821 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3824 fprintf(histout, "\n");
3827 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3830 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3834 /* Using tabulated umbrella potential */
3837 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3840 /* Integrated autocorrelation times provided ? */
3841 if (opt.bTauIntGiven)
3843 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3846 /* Compute integrated autocorrelation times */
3847 if (opt.bCalcTauInt)
3849 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3852 /* calc average and sigma for each histogram
3853 (maybe required for bootstrapping. If not, this is fast anyhow) */
3854 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3856 averageSigma(window, nwins);
3859 /* Get initial potential by simple integration */
3860 if (opt.bInitPotByIntegration)
3862 guessPotByIntegration(window, nwins, &opt, xlabel);
3865 /* Check if complete reaction coordinate is covered */
3866 checkReactionCoordinateCovered(window, nwins, &opt);
3868 /* Calculate profile */
3869 snew(profile, opt.bins);
3877 if ((i % opt.stepUpdateContrib) == 0)
3879 setup_acc_wham(profile, window, nwins, &opt);
3881 if (maxchange < opt.Tolerance)
3884 /* if (opt.verbose) */
3885 printf("Switched to exact iteration in iteration %d\n", i);
3887 calc_profile(profile, window, nwins, &opt, bExact);
3888 if (((i % opt.stepchange) == 0 || i == 1) && i != 0)
3890 printf("\t%4d) Maximum change %e\n", i, maxchange);
3893 } while ((maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3894 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3896 /* calc error from Kumar's formula */
3897 /* Unclear how the error propagates along reaction coordinate, therefore
3899 /* calc_error_kumar(profile,window, nwins,&opt); */
3901 /* Write profile in energy units? */
3904 prof_normalization_and_unit(profile, &opt);
3905 std::strcpy(ylabel, en_unit_label[opt.unit]);
3906 std::strcpy(title, "Umbrella potential");
3910 std::strcpy(ylabel, "Density of states");
3911 std::strcpy(title, "Density of states");
3914 /* symmetrize profile around z=0? */
3917 symmetrizeProfile(profile, &opt);
3920 /* write profile or density of states */
3921 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3922 for (i = 0; i < opt.bins; ++i)
3924 fprintf(profout, "%e\t%e\n", (i + 0.5) / opt.bins * (opt.max - opt.min) + opt.min, profile[i]);
3927 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3929 /* Bootstrap Method */
3932 do_bootstrapping(opt2fn("-bsres", NFILE, fnm),
3933 opt2fn("-bsprof", NFILE, fnm),
3934 opt2fn("-hist", NFILE, fnm),
3944 freeUmbrellaWindows(window, nfiles);
3946 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3947 please_cite(stdout, "Hub2010");