2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull-params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/pleasecite.h"
80 #include "gromacs/utility/smalloc.h"
82 //! longest file names allowed in input files
83 #define WHAM_MAXFILELEN 2048
86 * enum for energy units
89 enSel, en_kJ, en_kCal, en_kT, enNr
92 * enum for type of input files (pdos, tpr, or pullf)
95 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
98 /*! \brief enum for bootstrapping method
100 * These bootstrap methods are supported:
101 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
102 * (bsMethod_BayesianHist)
103 * - bootstrap complete histograms (bsMethod_hist)
104 * - bootstrap trajectories from given umbrella histograms. This generates new
105 * "synthetic" histograms (bsMethod_traj)
106 * - bootstrap trajectories from Gaussian with mu/sigma computed from
107 * the respective histogram (bsMethod_trajGauss). This gives very similar
108 * results compared to bsMethod_traj.
110 * ********************************************************************
111 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
112 * JS Hub, BL de Groot, D van der Spoel
113 * g_wham - A free weighted histogram analysis implementation including
114 * robust error and autocorrelation estimates,
115 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
116 * ********************************************************************
119 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
120 bsMethod_traj, bsMethod_trajGauss
123 //! Parameters of one pull coodinate
126 int pull_type; //!< such as constraint, umbrella, ...
127 int geometry; //!< such as distance, direction, cylinder
128 int ngroup; //!< the number of pull groups involved
129 ivec dim; //!< pull dimension with geometry distance
130 int ndim; //!< nr of pull_dim != 0
131 real k; //!< force constants in tpr file
132 real init_dist; //!< reference displacement
133 char coord_unit[256]; //!< unit of the displacement
136 //! Parameters of the umbrella potentials
140 * \name Using umbrella pull code since gromacs 4.x
143 int npullcrds; //!< nr of umbrella pull coordinates for reading
144 t_pullcoord *pcrd; //!< the pull coordinates
145 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
146 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
147 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
151 * \name Using PDO files common until gromacs 3.x
159 char PullName[4][256];
161 double UmbCons[4][3];
165 //! Data in the umbrella histograms
168 int nPull; //!< nr of pull groups in this pdo or pullf/x file
169 double **Histo; //!< nPull histograms
170 double **cum; //!< nPull cumulative distribution functions
171 int nBin; //!< nr of bins. identical to opt->bins
172 double *k; //!< force constants for the nPull coords
173 double *pos; //!< umbrella positions for the nPull coords
174 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
175 int *N; //!< nr of data points in nPull histograms.
176 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
178 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
180 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
181 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
184 double *tau; //!< intetrated autocorrelation time (IACT)
185 double *tausmooth; //!< smoothed IACT
187 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
189 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
191 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
193 /*! \brief average force estimated from average displacement, fAv=dzAv*k
195 * Used for integration to guess the potential.
198 real *aver; //!< average of histograms
199 real *sigma; //!< stddev of histograms
200 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
203 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
206 int n; //!< total nr of pull coords in this tpr file
207 int nUse; //!< nr of pull coords used
208 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
211 //! Parameters of WHAM
212 typedef struct // NOLINT(clang-analyzer-optin.performance.Padding)
218 const char *fnTpr, *fnPullf, *fnCoordSel;
219 const char *fnPdo, *fnPullx; //!< file names of input
220 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
221 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
223 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
224 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
225 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
226 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
229 * \name Basic WHAM options
232 int bins; //!< nr of bins, min, max, and dz of profile
234 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
235 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
238 * \name Output control
241 gmx_bool bLog; //!< energy output (instead of probability) for profile
242 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
243 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
244 /*! \brief after wham, set prof to zero at this z-position.
245 * When bootstrapping, set zProf0 to a "stable" reference position.
248 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
250 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
251 gmx_bool bAuto; //!< determine min and max automatically but do not exit
253 gmx_bool verbose; //!< more noisy wham mode
254 int stepchange; //!< print maximum change in prof after how many interations
255 gmx_output_env_t *oenv; //!< xvgr options
258 * \name Autocorrelation stuff
261 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
262 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
263 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
264 real acTrestart; //!< when computing ACT, time between restarting points
266 /* \brief Enforce the same weight for each umbella window, that is
267 * calculate with the same number of data points for
268 * each window. That can be reasonable, if the histograms
269 * have different length, but due to autocorrelation,
270 * a longer simulation should not have larger weightin wham.
276 * \name Bootstrapping stuff
279 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
281 /* \brief bootstrap method
283 * if == bsMethod_hist, consider complete histograms as independent
284 * data points and, hence, only mix complete histograms.
285 * if == bsMethod_BayesianHist, consider complete histograms
286 * as independent data points, but assign random weights
287 * to the histograms during the bootstrapping ("Bayesian bootstrap")
288 * In case of long correlations (e.g., inside a channel), these
289 * will yield a more realistic error.
290 * if == bsMethod_traj(Gauss), generate synthetic histograms
292 * histogram by generating an autocorrelated random sequence
293 * that is distributed according to the respective given
294 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
295 * (instead of from the umbrella histogram) to generate a new
300 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
303 /* \brief when mixing histograms, mix only histograms withing blocks
304 long the reaction coordinate xi. Avoids gaps along xi. */
305 int histBootStrapBlockLength;
307 int bsSeed; //!< random seed for bootstrapping
309 /* \brief Write cumulative distribution functions (CDFs) of histograms
310 and write the generated histograms for each bootstrap */
314 * \name tabulated umbrella potential stuff
318 double *tabX, *tabY, tabMin, tabMax, tabDz;
321 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
322 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
325 //! Make an umbrella window (may contain several histograms)
326 static t_UmbrellaWindow * initUmbrellaWindows(int nwin)
328 t_UmbrellaWindow *win;
331 for (i = 0; i < nwin; i++)
333 win[i].Histo = win[i].cum = nullptr;
334 win[i].k = win[i].pos = win[i].z = nullptr;
335 win[i].N = win[i].Ntot = nullptr;
336 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
337 win[i].bContrib = nullptr;
338 win[i].ztime = nullptr;
339 win[i].forceAv = nullptr;
340 win[i].aver = win[i].sigma = nullptr;
341 win[i].bsWeight = nullptr;
346 //! Delete an umbrella window (may contain several histograms)
347 static void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
350 for (i = 0; i < nwin; i++)
354 for (j = 0; j < win[i].nPull; j++)
356 sfree(win[i].Histo[j]);
361 for (j = 0; j < win[i].nPull; j++)
363 sfree(win[i].cum[j]);
368 for (j = 0; j < win[i].nPull; j++)
370 sfree(win[i].bContrib[j]);
382 sfree(win[i].tausmooth);
383 sfree(win[i].bContrib);
385 sfree(win[i].forceAv);
388 sfree(win[i].bsWeight);
394 * Read and setup tabulated umbrella potential
396 static void setup_tab(const char *fn, t_UmbrellaOptions *opt)
401 printf("Setting up tabulated potential from file %s\n", fn);
402 nl = read_xvg(fn, &y, &ny);
406 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
408 opt->tabMin = y[0][0];
409 opt->tabMax = y[0][nl-1];
410 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
413 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
414 "ascending z-direction", fn);
416 for (i = 0; i < nl-1; i++)
418 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
420 gmx_fatal(FARGS, "z-values in %d are not equally spaced.\n", ny, fn);
425 for (i = 0; i < nl; i++)
427 opt->tabX[i] = y[0][i];
428 opt->tabY[i] = y[1][i];
430 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
431 opt->tabMin, opt->tabMax, opt->tabDz);
434 //! Read the header of an PDO file (position, force const, nr of groups)
435 static void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
438 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
440 std::istringstream ist;
443 if (fgets(line, 2048, file) == nullptr)
445 gmx_fatal(FARGS, "Error reading header from pdo file\n");
448 ist >> Buffer0 >> Buffer1 >> Buffer2;
449 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
451 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
452 "(Found in first line: `%s')\n",
453 Buffer1, "UMBRELLA", line);
455 if (std::strcmp(Buffer2, "3.0") != 0)
457 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
461 if (fgets(line, 2048, file) == nullptr)
463 gmx_fatal(FARGS, "Error reading header from pdo file\n");
466 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
467 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
469 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
470 if (header->nDim != 1)
472 gmx_fatal(FARGS, "Currently only supports one dimension");
476 if (fgets(line, 2048, file) == nullptr)
478 gmx_fatal(FARGS, "Error reading header from pdo file\n");
481 ist >> Buffer0 >> Buffer1 >> header->nSkip;
484 if (fgets(line, 2048, file) == nullptr)
486 gmx_fatal(FARGS, "Error reading header from pdo file\n");
489 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
492 if (fgets(line, 2048, file) == nullptr)
494 gmx_fatal(FARGS, "Error reading header from pdo file\n");
497 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
501 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
505 for (i = 0; i < header->nPull; ++i)
507 if (fgets(line, 2048, file) == nullptr)
509 gmx_fatal(FARGS, "Error reading header from pdo file\n");
512 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
513 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
514 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
518 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
519 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
523 if (fgets(line, 2048, file) == nullptr)
525 gmx_fatal(FARGS, "Cannot read from file\n");
529 if (std::strcmp(Buffer3, "#####") != 0)
531 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
536 static char *fgets3(FILE *fp, char ptr[], int *len)
541 if (fgets(ptr, *len-1, fp) == nullptr)
546 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
548 /* This line is longer than len characters, let's increase len! */
552 if (fgets(p-1, STRLEN, fp) == nullptr)
557 slen = std::strlen(ptr);
558 if (ptr[slen-1] == '\n')
566 /*! \brief Read the data columns of and PDO file.
568 * TO DO: Get rid of the scanf function to avoid the clang warning.
569 * At the moment, this warning is avoided by hiding the format string
570 * the variable fmtlf.
572 static void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
573 int fileno, t_UmbrellaWindow * win,
574 t_UmbrellaOptions *opt,
575 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
577 int i, inttemp, bins, count, ntot;
578 real minval, maxval, minfound = 1e20, maxfound = -1e20;
579 double temp, time, time0 = 0, dt;
581 t_UmbrellaWindow * window = nullptr;
582 gmx_bool timeok, dt_ok = 1;
583 char *tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
584 int len = STRLEN, dstep = 1;
585 const int blocklen = 4096;
586 int *lennow = nullptr;
595 /* Need to alocate memory and set up structure */
596 window->nPull = header->nPull;
599 snew(window->Histo, window->nPull);
600 snew(window->z, window->nPull);
601 snew(window->k, window->nPull);
602 snew(window->pos, window->nPull);
603 snew(window->N, window->nPull);
604 snew(window->Ntot, window->nPull);
605 snew(window->g, window->nPull);
606 snew(window->bsWeight, window->nPull);
608 window->bContrib = nullptr;
610 if (opt->bCalcTauInt)
612 snew(window->ztime, window->nPull);
616 window->ztime = nullptr;
618 snew(lennow, window->nPull);
620 for (i = 0; i < window->nPull; ++i)
623 window->bsWeight[i] = 1.;
624 snew(window->Histo[i], bins);
625 window->k[i] = header->UmbCons[i][0];
626 window->pos[i] = header->UmbPos[i][0];
630 if (opt->bCalcTauInt)
632 window->ztime[i] = nullptr;
636 /* Done with setup */
642 minval = maxval = bins = 0; /* Get rid of warnings */
647 while ( (ptr = fgets3(file, tmpbuf, &len)) != nullptr)
651 if (ptr[0] == '#' || std::strlen(ptr) < 2)
656 /* Initiate format string */
658 std::strcat(fmtign, "%*s");
660 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
661 /* Round time to fs */
662 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
664 /* get time step of pdo file */
674 dstep = static_cast<int>(opt->dt/dt+0.5);
682 window->dt = dt*dstep;
687 dt_ok = ((count-1)%dstep == 0);
688 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
690 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
691 time,opt->tmin, opt->tmax, dt_ok,timeok); */
695 for (i = 0; i < header->nPull; ++i)
697 std::strcpy(fmt, fmtign);
698 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
699 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
700 if (sscanf(ptr, fmt, &temp))
702 temp += header->UmbPos[i][0];
716 if (opt->bCalcTauInt)
718 /* save time series for autocorrelation analysis */
719 ntot = window->Ntot[i];
720 if (ntot >= lennow[i])
722 lennow[i] += blocklen;
723 srenew(window->ztime[i], lennow[i]);
725 window->ztime[i][ntot] = temp;
729 temp /= (maxval-minval);
731 temp = std::floor(temp);
733 inttemp = static_cast<int> (temp);
740 else if (inttemp >= bins)
746 if (inttemp >= 0 && inttemp < bins)
748 window->Histo[i][inttemp] += 1.;
756 if (time > opt->tmax)
760 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
776 /*! \brief Set identical weights for all histograms
778 * Normally, the weight is given by the number data points in each
779 * histogram, together with the autocorrelation time. This can be overwritten
780 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
781 * an appropriate autocorrelation times instead of using this function.
783 static void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
785 int i, k, j, NEnforced;
788 NEnforced = window[0].Ntot[0];
789 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
790 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
791 /* enforce all histograms to have the same weight as the very first histogram */
793 for (j = 0; j < nWindows; ++j)
795 for (k = 0; k < window[j].nPull; ++k)
797 ratio = 1.0*NEnforced/window[j].Ntot[k];
798 for (i = 0; i < window[0].nBin; ++i)
800 window[j].Histo[k][i] *= ratio;
802 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
807 /*! \brief Simple linear interpolation between two given tabulated points
809 static double tabulated_pot(double dist, t_UmbrellaOptions *opt)
812 double pl, pu, dz, dp;
814 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
816 if (jl < 0 || ju >= opt->tabNbins)
818 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
819 "Provide an extended table.", dist, jl, ju);
823 dz = dist-opt->tabX[jl];
824 dp = (pu-pl)*dz/opt->tabDz;
830 * Check which bins substiantially contribute (accelerates WHAM)
832 * Don't worry, that routine does not mean we compute the PMF in limited precision.
833 * After rapid convergence (using only substiantal contributions), we always switch to
836 static void setup_acc_wham(const double *profile, t_UmbrellaWindow * window, int nWindows,
837 t_UmbrellaOptions *opt)
839 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
840 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
841 gmx_bool bAnyContrib;
842 static int bFirst = 1;
843 static double wham_contrib_lim;
847 for (i = 0; i < nWindows; ++i)
849 nGrptot += window[i].nPull;
851 wham_contrib_lim = opt->Tolerance/nGrptot;
854 ztot = opt->max-opt->min;
857 for (i = 0; i < nWindows; ++i)
859 if (!window[i].bContrib)
861 snew(window[i].bContrib, window[i].nPull);
863 for (j = 0; j < window[i].nPull; ++j)
865 if (!window[i].bContrib[j])
867 snew(window[i].bContrib[j], opt->bins);
870 for (k = 0; k < opt->bins; ++k)
872 temp = (1.0*k+0.5)*dz+min;
873 distance = temp - window[i].pos[j]; /* distance to umbrella center */
875 { /* in cyclic wham: */
876 if (distance > ztot_half) /* |distance| < ztot_half */
880 else if (distance < -ztot_half)
885 /* Note: there are two contributions to bin k in the wham equations:
886 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
887 ii) exp(- U/(BOLTZ*opt->Temperature))
888 where U is the umbrella potential
889 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
894 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
898 U = tabulated_pot(distance, opt); /* Use tabulated potential */
900 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
901 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
902 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
903 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
904 if (window[i].bContrib[j][k])
910 /* If this histo is far outside min and max all bContrib may be FALSE,
911 causing a floating point exception later on. To avoid that, switch
915 for (k = 0; k < opt->bins; ++k)
917 window[i].bContrib[j][k] = TRUE;
924 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
925 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
930 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
936 //! Compute the PMF (one of the two main WHAM routines)
937 static void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
938 t_UmbrellaOptions *opt, gmx_bool bExact)
940 double ztot_half, ztot, min = opt->min, dz = opt->dz;
942 ztot = opt->max-opt->min;
949 int nthreads = gmx_omp_get_max_threads();
950 int thread_id = gmx_omp_get_thread_num();
952 int i0 = thread_id*opt->bins/nthreads;
953 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
955 for (i = i0; i < i1; ++i)
958 double num, denom, invg, temp = 0, distance, U = 0;
960 for (j = 0; j < nWindows; ++j)
962 for (k = 0; k < window[j].nPull; ++k)
964 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
965 temp = (1.0*i+0.5)*dz+min;
966 num += invg*window[j].Histo[k][i];
968 if (!(bExact || window[j].bContrib[k][i]))
972 distance = temp - window[j].pos[k]; /* distance to umbrella center */
974 { /* in cyclic wham: */
975 if (distance > ztot_half) /* |distance| < ztot_half */
979 else if (distance < -ztot_half)
987 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
991 U = tabulated_pot(distance, opt); /* Use tabulated potential */
993 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
996 profile[i] = num/denom;
999 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1003 //! Compute the free energy offsets z (one of the two main WHAM routines)
1004 static double calc_z(const double * profile, t_UmbrellaWindow * window, int nWindows,
1005 t_UmbrellaOptions *opt, gmx_bool bExact)
1007 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1008 double maxglob = -1e20;
1010 ztot = opt->max-opt->min;
1013 #pragma omp parallel
1017 int nthreads = gmx_omp_get_max_threads();
1018 int thread_id = gmx_omp_get_thread_num();
1020 int i0 = thread_id*nWindows/nthreads;
1021 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1022 double maxloc = -1e20;
1024 for (i = i0; i < i1; ++i)
1026 double total = 0, temp, distance, U = 0;
1029 for (j = 0; j < window[i].nPull; ++j)
1032 for (k = 0; k < window[i].nBin; ++k)
1034 if (!(bExact || window[i].bContrib[j][k]))
1038 temp = (1.0*k+0.5)*dz+min;
1039 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1041 { /* in cyclic wham: */
1042 if (distance > ztot_half) /* |distance| < ztot_half */
1046 else if (distance < -ztot_half)
1054 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1058 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1060 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1062 /* Avoid floating point exception if window is far outside min and max */
1065 total = -std::log(total);
1071 temp = std::abs(total - window[i].z[j]);
1076 window[i].z[j] = total;
1079 /* Now get maximum maxloc from the threads and put in maxglob */
1080 if (maxloc > maxglob)
1082 #pragma omp critical
1084 if (maxloc > maxglob)
1091 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1097 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1098 static void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1100 int i, j, bins = opt->bins;
1101 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1104 if (min > 0. || max < 0.)
1106 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1107 opt->min, opt->max);
1112 for (i = 0; i < bins; i++)
1116 /* bin left of zsym */
1117 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1118 if (j >= 0 && (j+1) < bins)
1120 /* interpolate profile linearly between bins j and j+1 */
1121 z1 = min+(j+0.5)*dz;
1123 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1124 /* average between left and right */
1125 prof2[i] = 0.5*(profsym+profile[i]);
1129 prof2[i] = profile[i];
1133 std::memcpy(profile, prof2, bins*sizeof(double));
1137 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1138 static void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1141 double unit_factor = 1., diff;
1145 /* Not log? Nothing to do! */
1151 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1152 if (opt->unit == en_kT)
1156 else if (opt->unit == en_kJ)
1158 unit_factor = BOLTZ*opt->Temperature;
1160 else if (opt->unit == en_kCal)
1162 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1166 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1169 for (i = 0; i < bins; i++)
1171 if (profile[i] > 0.0)
1173 profile[i] = -std::log(profile[i])*unit_factor;
1177 /* shift to zero at z=opt->zProf0 */
1178 if (!opt->bProf0Set)
1184 /* Get bin with shortest distance to opt->zProf0
1185 (-0.5 from bin position and +0.5 from rounding cancel) */
1186 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1191 else if (imin >= bins)
1195 diff = profile[imin];
1199 for (i = 0; i < bins; i++)
1205 //! Make an array of random integers (used for bootstrapping)
1206 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1208 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1210 int ipull, blockBase, nr, ipullRandom;
1212 if (blockLength == 0)
1214 blockLength = nPull;
1217 for (ipull = 0; ipull < nPull; ipull++)
1219 blockBase = (ipull/blockLength)*blockLength;
1221 { /* make sure nothing bad happens in the last block */
1222 nr = dist(*rng); // [0,blockLength-1]
1223 ipullRandom = blockBase + nr;
1225 while (ipullRandom >= nPull);
1226 if (ipullRandom < 0 || ipullRandom >= nPull)
1228 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1229 "blockLength = %d, blockBase = %d\n",
1230 ipullRandom, nPull, nr, blockLength, blockBase);
1232 randomArray[ipull] = ipullRandom;
1234 /*for (ipull=0; ipull<nPull; ipull++)
1235 printf("%d ",randomArray[ipull]); printf("\n"); */
1238 /*! \brief Set pull group information of a synthetic histogram
1240 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1241 * but it is not required if we bootstrap complete histograms.
1243 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1244 t_UmbrellaWindow *thisWindow, int pullid)
1246 synthWindow->N [0] = thisWindow->N [pullid];
1247 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1248 synthWindow->pos [0] = thisWindow->pos [pullid];
1249 synthWindow->z [0] = thisWindow->z [pullid];
1250 synthWindow->k [0] = thisWindow->k [pullid];
1251 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1252 synthWindow->g [0] = thisWindow->g [pullid];
1253 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1256 /*! \brief Calculate cumulative distribution function of of all histograms.
1258 * This allow to create random number sequences
1259 * which are distributed according to the histograms. Required to generate
1260 * the "synthetic" histograms for the Bootstrap method
1262 static void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1263 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1267 char *fn = nullptr, *buf = nullptr;
1270 if (opt->bs_verbose)
1272 snew(fn, std::strlen(fnhist)+10);
1273 snew(buf, std::strlen(fnhist)+10);
1274 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1275 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1279 for (i = 0; i < nWindows; i++)
1281 snew(window[i].cum, window[i].nPull);
1282 for (j = 0; j < window[i].nPull; j++)
1284 snew(window[i].cum[j], nbin+1);
1285 window[i].cum[j][0] = 0.;
1286 for (k = 1; k <= nbin; k++)
1288 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1291 /* normalize CDFs. Ensure cum[nbin]==1 */
1292 last = window[i].cum[j][nbin];
1293 for (k = 0; k <= nbin; k++)
1295 window[i].cum[j][k] /= last;
1300 printf("Cumulative distribution functions of all histograms created.\n");
1301 if (opt->bs_verbose)
1303 for (k = 0; k <= nbin; k++)
1305 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1306 for (i = 0; i < nWindows; i++)
1308 for (j = 0; j < window[i].nPull; j++)
1310 fprintf(fp, "%g\t", window[i].cum[j][k]);
1315 printf("Wrote cumulative distribution functions to %s\n", fn);
1323 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1325 * This is used to generate a random sequence distributed according to a histogram
1327 static void searchCumulative(const double xx[], int n, double x, int *j)
1349 else if (x == xx[n-1])
1359 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1360 static void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1361 int pullid, t_UmbrellaOptions *opt)
1363 int N, i, nbins, r_index, ibin;
1364 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1367 N = thisWindow->N[pullid];
1368 dt = thisWindow->dt;
1369 nbins = thisWindow->nBin;
1371 /* tau = autocorrelation time */
1372 if (opt->tauBootStrap > 0.0)
1374 tausteps = opt->tauBootStrap/dt;
1376 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1378 /* calc tausteps from g=1+2tausteps */
1379 g = thisWindow->g[pullid];
1385 "When generating hypothetical trajectories from given umbrella histograms,\n"
1386 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1387 "cannot be predicted. You have 3 options:\n"
1388 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1389 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1391 " with option -iiact for all umbrella windows.\n"
1392 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1393 " Use option (3) only if you are sure what you're doing, you may severely\n"
1394 " underestimate the error if a too small ACT is given.\n");
1395 gmx_fatal(FARGS, "%s", errstr);
1398 synthWindow->N [0] = N;
1399 synthWindow->pos [0] = thisWindow->pos[pullid];
1400 synthWindow->z [0] = thisWindow->z[pullid];
1401 synthWindow->k [0] = thisWindow->k[pullid];
1402 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1403 synthWindow->g [0] = thisWindow->g [pullid];
1404 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1406 for (i = 0; i < nbins; i++)
1408 synthWindow->Histo[0][i] = 0.;
1411 if (opt->bsMethod == bsMethod_trajGauss)
1413 sig = thisWindow->sigma [pullid];
1414 mu = thisWindow->aver [pullid];
1417 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1419 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1421 z = a*x + sqrt(1-a^2)*y
1422 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1423 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1425 C(t) = exp(-t/tau) with tau=-1/ln(a)
1427 Then, use error function to turn the Gaussian random variable into a uniformly
1428 distributed one in [0,1]. Eventually, use cumulative distribution function of
1429 histogram to get random variables distributed according to histogram.
1430 Note: The ACT of the flat distribution and of the generated histogram is not
1431 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1433 a = std::exp(-1.0/tausteps);
1434 ap = std::sqrt(1.0-a*a);
1435 invsqrt2 = 1.0/std::sqrt(2.0);
1437 /* init random sequence */
1438 x = opt->normalDistribution(opt->rng);
1440 if (opt->bsMethod == bsMethod_traj)
1442 /* bootstrap points from the umbrella histograms */
1443 for (i = 0; i < N; i++)
1445 y = opt->normalDistribution(opt->rng);
1447 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1448 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1450 r = 0.5*(1+std::erf(x*invsqrt2));
1451 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1452 synthWindow->Histo[0][r_index] += 1.;
1455 else if (opt->bsMethod == bsMethod_trajGauss)
1457 /* bootstrap points from a Gaussian with the same average and sigma
1458 as the respective umbrella histogram. The idea was, that -given
1459 limited sampling- the bootstrapped histograms are otherwise biased
1460 from the limited sampling of the US histos. However, bootstrapping from
1461 the Gaussian seems to yield a similar estimate. */
1465 y = opt->normalDistribution(opt->rng);
1468 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1473 while ( (ibin += nbins) < 0)
1478 else if (ibin >= nbins)
1480 while ( (ibin -= nbins) >= nbins)
1487 if (ibin >= 0 && ibin < nbins)
1489 synthWindow->Histo[0][ibin] += 1.;
1496 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1500 /*! \brief Write all histograms to a file
1502 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1503 * sets of bootstrapped histograms.
1505 static void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1506 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1508 char *fn = nullptr, *buf = nullptr, title[256];
1514 snew(fn, std::strlen(fnhist)+10);
1515 snew(buf, std::strlen(fnhist)+1);
1516 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1517 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1521 fn = gmx_strdup(fnhist);
1522 std::strcpy(title, "Umbrella histograms");
1525 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1528 /* Write histograms */
1529 for (l = 0; l < bins; ++l)
1531 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1532 for (i = 0; i < nWindows; ++i)
1534 for (j = 0; j < window[i].nPull; ++j)
1536 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1543 printf("Wrote %s\n", fn);
1551 //! Used for qsort to sort random numbers
1552 static int func_wham_is_larger(const void *a, const void *b)
1571 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1572 static void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1576 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1580 /* generate ordered random numbers between 0 and nAllPull */
1581 for (i = 0; i < nAllPull-1; i++)
1583 r[i] = dist(opt->rng);
1585 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1586 r[nAllPull-1] = 1.0*nAllPull;
1588 synthwin[0].bsWeight[0] = r[0];
1589 for (i = 1; i < nAllPull; i++)
1591 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1594 /* avoid to have zero weight by adding a tiny value */
1595 for (i = 0; i < nAllPull; i++)
1597 if (synthwin[i].bsWeight[0] < 1e-5)
1599 synthwin[i].bsWeight[0] = 1e-5;
1606 //! The main bootstrapping routine
1607 static void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1608 const char *xlabel, char* ylabel, double *profile,
1609 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1611 t_UmbrellaWindow * synthWindow;
1612 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1613 int i, j, *randomArray = nullptr, winid, pullid, ib;
1614 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1616 gmx_bool bExact = FALSE;
1618 /* init random generator */
1619 if (opt->bsSeed == 0)
1621 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1623 opt->rng.seed(opt->bsSeed);
1625 snew(bsProfile, opt->bins);
1626 snew(bsProfiles_av, opt->bins);
1627 snew(bsProfiles_av2, opt->bins);
1629 /* Create array of all pull groups. Note that different windows
1630 may have different nr of pull groups
1631 First: Get total nr of pull groups */
1633 for (i = 0; i < nWindows; i++)
1635 nAllPull += window[i].nPull;
1637 snew(allPull_winId, nAllPull);
1638 snew(allPull_pullId, nAllPull);
1640 /* Setup one array of all pull groups */
1641 for (i = 0; i < nWindows; i++)
1643 for (j = 0; j < window[i].nPull; j++)
1645 allPull_winId[iAllPull] = i;
1646 allPull_pullId[iAllPull] = j;
1651 /* setup stuff for synthetic windows */
1652 snew(synthWindow, nAllPull);
1653 for (i = 0; i < nAllPull; i++)
1655 synthWindow[i].nPull = 1;
1656 synthWindow[i].nBin = opt->bins;
1657 snew(synthWindow[i].Histo, 1);
1658 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1660 snew(synthWindow[i].Histo[0], opt->bins);
1662 snew(synthWindow[i].N, 1);
1663 snew(synthWindow[i].pos, 1);
1664 snew(synthWindow[i].z, 1);
1665 snew(synthWindow[i].k, 1);
1666 snew(synthWindow[i].bContrib, 1);
1667 snew(synthWindow[i].g, 1);
1668 snew(synthWindow[i].bsWeight, 1);
1671 switch (opt->bsMethod)
1674 snew(randomArray, nAllPull);
1675 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1676 please_cite(stdout, "Hub2006");
1678 case bsMethod_BayesianHist:
1679 /* just copy all histogams into synthWindow array */
1680 for (i = 0; i < nAllPull; i++)
1682 winid = allPull_winId [i];
1683 pullid = allPull_pullId[i];
1684 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1688 case bsMethod_trajGauss:
1689 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1692 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1695 /* do bootstrapping */
1696 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1697 for (ib = 0; ib < opt->nBootStrap; ib++)
1699 printf(" *******************************************\n"
1700 " ******** Start bootstrap nr %d ************\n"
1701 " *******************************************\n", ib+1);
1703 switch (opt->bsMethod)
1706 /* bootstrap complete histograms from given histograms */
1707 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1708 for (i = 0; i < nAllPull; i++)
1710 winid = allPull_winId [randomArray[i]];
1711 pullid = allPull_pullId[randomArray[i]];
1712 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1715 case bsMethod_BayesianHist:
1716 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1717 setRandomBsWeights(synthWindow, nAllPull, opt);
1720 case bsMethod_trajGauss:
1721 /* create new histos from given histos, that is generate new hypothetical
1723 for (i = 0; i < nAllPull; i++)
1725 winid = allPull_winId[i];
1726 pullid = allPull_pullId[i];
1727 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1732 /* write histos in case of verbose output */
1733 if (opt->bs_verbose)
1735 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1742 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1745 if ( (i%opt->stepUpdateContrib) == 0)
1747 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1749 if (maxchange < opt->Tolerance)
1753 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1755 printf("\t%4d) Maximum change %e\n", i, maxchange);
1757 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1760 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1761 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1765 prof_normalization_and_unit(bsProfile, opt);
1768 /* symmetrize profile around z=0 */
1771 symmetrizeProfile(bsProfile, opt);
1774 /* save stuff to get average and stddev */
1775 for (i = 0; i < opt->bins; i++)
1778 bsProfiles_av[i] += tmp;
1779 bsProfiles_av2[i] += tmp*tmp;
1780 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1782 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1786 /* write average and stddev */
1787 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1788 if (output_env_get_print_xvgr_codes(opt->oenv))
1790 fprintf(fp, "@TYPE xydy\n");
1792 for (i = 0; i < opt->bins; i++)
1794 bsProfiles_av [i] /= opt->nBootStrap;
1795 bsProfiles_av2[i] /= opt->nBootStrap;
1796 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1797 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1798 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1801 printf("Wrote boot strap result to %s\n", fnres);
1804 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1805 static int whaminFileType(char *fn)
1808 len = std::strlen(fn);
1809 if (std::strcmp(fn+len-3, "tpr") == 0)
1813 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1815 return whamin_pullxf;
1817 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1823 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1827 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1828 static void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1829 t_UmbrellaOptions *opt)
1831 char **filename = nullptr, tmp[WHAM_MAXFILELEN+2];
1832 int nread, sizenow, i, block = 1;
1835 fp = gmx_ffopen(fn, "r");
1838 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1840 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1842 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1844 if (nread >= sizenow)
1847 srenew(filename, sizenow);
1848 for (i = sizenow-block; i < sizenow; i++)
1850 snew(filename[i], WHAM_MAXFILELEN);
1853 /* remove newline if there is one */
1854 if (tmp[std::strlen(tmp)-1] == '\n')
1856 tmp[std::strlen(tmp)-1] = '\0';
1858 std::strcpy(filename[nread], tmp);
1861 printf("Found file %s in %s\n", filename[nread], fn);
1865 *filenamesRet = filename;
1869 //! Open a file or a pipe to a gzipped file
1870 static FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1872 char Buffer[1024], gunzip[1024], *Path = nullptr;
1873 FILE *pipe = nullptr;
1874 static gmx_bool bFirst = 1;
1876 /* gzipped pdo file? */
1877 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1879 /* search gunzip executable */
1880 if (!(Path = getenv("GMX_PATH_GZIP")))
1882 if (gmx_fexist("/bin/gunzip"))
1884 sprintf(gunzip, "%s", "/bin/gunzip");
1886 else if (gmx_fexist("/usr/bin/gunzip"))
1888 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1892 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1893 "You may want to define the path to gunzip "
1894 "with the environment variable GMX_PATH_GZIP.", gunzip);
1899 sprintf(gunzip, "%s/gunzip", Path);
1900 if (!gmx_fexist(gunzip))
1902 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1903 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1908 printf("Using gunzip executable %s\n", gunzip);
1911 if (!gmx_fexist(fn))
1913 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1915 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1918 printf("Executing command '%s'\n", Buffer);
1921 if ((pipe = popen(Buffer, "r")) == nullptr)
1923 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1926 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1932 pipe = gmx_ffopen(fn, "r");
1939 //! Close file or pipe
1940 static void pdo_close_file(FILE *fp)
1949 //! Reading all pdo files
1950 static void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1951 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1954 real mintmp, maxtmp, done = 0.;
1957 /* char Buffer0[1000]; */
1961 gmx_fatal(FARGS, "No files found. Hick.");
1964 /* if min and max are not given, get min and max from the input files */
1967 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1970 for (i = 0; i < nfiles; ++i)
1972 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1973 /*fgets(Buffer0,999,file);
1974 fprintf(stderr,"First line '%s'\n",Buffer0); */
1975 done = 100.0*(i+1)/nfiles;
1976 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1981 read_pdo_header(file, header, opt);
1982 /* here only determine min and max of this window */
1983 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1984 if (maxtmp > opt->max)
1988 if (mintmp < opt->min)
1994 pdo_close_file(file);
2002 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2003 if (opt->bBoundsOnly)
2005 printf("Found option -boundsonly, now exiting.\n");
2009 /* store stepsize in profile */
2010 opt->dz = (opt->max-opt->min)/opt->bins;
2012 /* Having min and max, we read in all files */
2013 /* Loop over all files */
2014 for (i = 0; i < nfiles; ++i)
2016 done = 100.0*(i+1)/nfiles;
2017 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2022 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2023 read_pdo_header(file, header, opt);
2024 /* load data into window */
2025 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
2026 if ((window+i)->Ntot[0] == 0)
2028 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2032 pdo_close_file(file);
2040 for (i = 0; i < nfiles; ++i)
2047 //! translate 0/1 to N/Y to write pull dimensions
2048 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2050 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2051 static void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2053 t_inputrec irInstance;
2054 t_inputrec *ir = &irInstance;
2056 static int first = 1;
2058 /* printf("Reading %s \n",fn); */
2059 read_tpx_state(fn, ir, &state, nullptr);
2063 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2065 if (ir->pull->ncoord == 0)
2067 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2070 /* Read overall pull info */
2071 header->npullcrds = ir->pull->ncoord;
2072 header->bPrintCOM = ir->pull->bPrintCOM;
2073 header->bPrintRefValue = ir->pull->bPrintRefValue;
2074 header->bPrintComp = ir->pull->bPrintComp;
2076 /* Read pull coordinates */
2077 snew(header->pcrd, header->npullcrds);
2078 for (int i = 0; i < ir->pull->ncoord; i++)
2080 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2081 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2082 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2083 header->pcrd[i].k = ir->pull->coord[i].k;
2084 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2086 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2087 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2089 std::strcpy(header->pcrd[i].coord_unit,
2090 pull_coordinate_units(&ir->pull->coord[i]));
2092 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2094 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2095 " This is not supported.\n");
2097 if (coordsel && (coordsel->n != ir->pull->ncoord))
2099 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2100 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2104 /* Check pull coords for consistency */
2106 ivec thedim = { 0, 0, 0 };
2107 bool geometryIsSet = false;
2108 for (int i = 0; i < ir->pull->ncoord; i++)
2110 if (coordsel == nullptr || coordsel->bUse[i])
2112 if (header->pcrd[i].pull_type != epullUMBRELLA)
2114 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2115 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2116 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2120 geom = header->pcrd[i].geometry;
2121 copy_ivec(header->pcrd[i].dim, thedim);
2122 geometryIsSet = true;
2124 if (geom != header->pcrd[i].geometry)
2126 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2127 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2128 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2130 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2132 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2133 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2134 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2135 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2137 if (header->pcrd[i].geometry == epullgCYL)
2139 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2141 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2142 "However, found dimensions [%s %s %s]\n",
2143 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2144 int2YN(header->pcrd[i].dim[ZZ]));
2147 if (header->pcrd[i].k <= 0.0)
2149 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g in %s.\n"
2150 "That doesn't seem to be an Umbrella tpr.\n",
2151 fn, i+1, header->pcrd[i].k);
2156 if (opt->verbose || first)
2158 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2160 for (int i = 0; i < ir->pull->ncoord; i++)
2162 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2163 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2166 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2168 for (int i = 0; i < ir->pull->ncoord; i++)
2170 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2172 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2173 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2174 header->pcrd[i].ndim, use ? "Yes" : "No");
2175 printf("\tPull group coordinates of %d groups expected in pullx files.\n", ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2177 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2178 header->bPrintRefValue ? "" : " not");
2180 if (!opt->verbose && first)
2182 printf("\tUse option -v to see this output for all input tpr files\n\n");
2188 //! Read pullx.xvg or pullf.xvg
2189 static void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2190 t_UmbrellaWindow * window,
2191 t_UmbrellaOptions *opt,
2192 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2193 t_coordselection *coordsel)
2195 double **y = nullptr, pos = 0., t, force, time0 = 0., dt;
2196 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2197 int nColExpect, ntot, column;
2198 real min, max, minfound = 1e20, maxfound = -1e20;
2199 gmx_bool dt_ok, timeok;
2200 const char *quantity;
2201 const int blocklen = 4096;
2202 int *lennow = nullptr;
2203 static gmx_bool bFirst = TRUE;
2206 * Data columns in pull output:
2207 * - in force output pullf.xvg:
2208 * No reference columns, one column per pull coordinate
2210 * - in position output pullx.xvg:
2211 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2212 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2213 * be written separately into pullx file, but this is not supported and throws an error below;
2214 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2217 if (header->bPrintComp && opt->bPullx)
2219 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2220 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2223 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2224 snew(nColThisCrd, header->npullcrds);
2225 snew(nColCOMCrd, header->npullcrds);
2226 snew(nColRefCrd, header->npullcrds);
2228 if (opt->bPullx == FALSE)
2230 /* pullf reading: simply one column per coordinate */
2231 for (g = 0; g < header->npullcrds; g++)
2240 /* pullx reading. Note explanation above. */
2241 for (g = 0; g < header->npullcrds; g++)
2243 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2244 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim*header->pcrd[g].ngroup : 0);
2245 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2249 nColExpect = 1; /* time column */
2250 for (g = 0; g < header->npullcrds; g++)
2252 nColExpect += nColThisCrd[g];
2255 /* read pullf or pullx. Could be optimized if min and max are given. */
2256 nt = read_xvg(fn, &y, &ny);
2258 /* Check consistency */
2259 quantity = opt->bPullx ? "position" : "force";
2262 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2264 if (bFirst || opt->verbose)
2266 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2267 for (i = 0; i < header->npullcrds; i++)
2269 printf("\tColumns for pull coordinate %d\n", i+1);
2270 printf("\t\treaction coordinate: %d\n"
2271 "\t\tcenter-of-mass of groups: %d\n"
2272 "\t\treference position column: %s\n",
2273 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2275 printf("\tFound %d times in %s\n", nt, fn);
2278 if (nColExpect != ny)
2280 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2281 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2292 window->dt = y[0][1]-y[0][0];
2294 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2296 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2299 /* Need to alocate memory and set up structure for windows */
2302 /* Use only groups selected with option -is file */
2303 if (header->npullcrds != coordsel->n)
2305 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2306 header->npullcrds, coordsel->n);
2308 window->nPull = coordsel->nUse;
2312 window->nPull = header->npullcrds;
2315 window->nBin = bins;
2316 snew(window->Histo, window->nPull);
2317 snew(window->z, window->nPull);
2318 snew(window->k, window->nPull);
2319 snew(window->pos, window->nPull);
2320 snew(window->N, window->nPull);
2321 snew(window->Ntot, window->nPull);
2322 snew(window->g, window->nPull);
2323 snew(window->bsWeight, window->nPull);
2324 window->bContrib = nullptr;
2326 if (opt->bCalcTauInt)
2328 snew(window->ztime, window->nPull);
2332 window->ztime = nullptr;
2334 snew(lennow, window->nPull);
2336 for (g = 0; g < window->nPull; ++g)
2339 window->bsWeight [g] = 1.;
2341 window->Ntot [g] = 0;
2343 snew(window->Histo[g], bins);
2345 if (opt->bCalcTauInt)
2347 window->ztime[g] = nullptr;
2351 /* Copying umbrella center and force const is more involved since not
2352 all pull groups from header (tpr file) may be used in window variable */
2353 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2355 if (coordsel && (coordsel->bUse[g] == FALSE))
2359 window->k [gUsed] = header->pcrd[g].k;
2360 window->pos[gUsed] = header->pcrd[g].init_dist;
2365 { /* only determine min and max */
2368 min = max = bins = 0; /* Get rid of warnings */
2372 for (i = 0; i < nt; i++)
2374 /* Do you want that time frame? */
2375 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2377 /* get time step of pdo file and get dstep from opt->dt */
2387 dstep = static_cast<int>(opt->dt/dt+0.5);
2395 window->dt = dt*dstep;
2399 dt_ok = (i%dstep == 0);
2400 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2402 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2403 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2406 /* Note: if coordsel == NULL:
2407 * all groups in pullf/x file are stored in this window, and gUsed == g
2408 * if coordsel != NULL:
2409 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2412 for (g = 0; g < header->npullcrds; ++g)
2414 /* was this group selected for application in WHAM? */
2415 if (coordsel && (coordsel->bUse[g] == FALSE))
2423 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2425 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2429 /* Pick the correct column index.
2430 Note that there is always exactly one displacement column.
2433 for (int j = 0; j < g; j++)
2435 column += nColThisCrd[j];
2440 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2454 if (gUsed >= window->nPull)
2456 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2457 gUsed, window->nPull);
2460 if (opt->bCalcTauInt && !bGetMinMax)
2462 /* save time series for autocorrelation analysis */
2463 ntot = window->Ntot[gUsed];
2464 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2465 if (ntot >= lennow[gUsed])
2467 lennow[gUsed] += blocklen;
2468 srenew(window->ztime[gUsed], lennow[gUsed]);
2470 window->ztime[gUsed][ntot] = pos;
2473 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2478 while ( (ibin += bins) < 0)
2483 else if (ibin >= bins)
2485 while ( (ibin -= bins) >= bins)
2491 if (ibin >= 0 && ibin < bins)
2493 window->Histo[gUsed][ibin] += 1.;
2496 window->Ntot[gUsed]++;
2500 else if (t > opt->tmax)
2504 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2516 for (i = 0; i < ny; i++)
2522 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2523 static void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2524 t_UmbrellaHeader* header,
2525 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2528 real mintmp, maxtmp;
2530 printf("Reading %d tpr and pullf files\n", nfiles);
2532 /* min and max not given? */
2535 printf("Automatic determination of boundaries...\n");
2538 for (i = 0; i < nfiles; i++)
2540 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2542 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2544 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2545 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2547 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2549 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2550 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2551 if (maxtmp > opt->max)
2555 if (mintmp < opt->min)
2560 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2561 if (opt->bBoundsOnly)
2563 printf("Found option -boundsonly, now exiting.\n");
2567 /* store stepsize in profile */
2568 opt->dz = (opt->max-opt->min)/opt->bins;
2570 bool foundData = false;
2571 for (i = 0; i < nfiles; i++)
2573 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2575 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2577 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2578 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2580 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2582 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, nullptr, nullptr,
2583 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2584 if (window[i].Ntot[0] == 0)
2586 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2595 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2598 for (i = 0; i < nfiles; i++)
2607 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2609 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2610 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2612 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2614 int nlines, ny, i, ig;
2617 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2618 nlines = read_xvg(fn, &iact, &ny);
2619 if (nlines != nwins)
2621 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2624 for (i = 0; i < nlines; i++)
2626 if (window[i].nPull != ny)
2628 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2629 "number of pull groups is different in different simulations. That is not\n"
2630 "supported yet. Sorry.\n");
2632 for (ig = 0; ig < window[i].nPull; ig++)
2634 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2635 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2637 if (iact[ig][i] <= 0.0)
2639 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2646 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2649 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2650 * that -in case of limited sampling- the ACT may be severely underestimated.
2651 * Note: the g=1+2tau are overwritten.
2652 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2655 static void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2658 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2660 /* only evaluate within +- 3sigma of the Gausian */
2661 siglim = 3.0*opt->sigSmoothIact;
2662 siglim2 = gmx::square(siglim);
2663 /* pre-factor of Gaussian */
2664 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2665 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2667 for (i = 0; i < nwins; i++)
2669 snew(window[i].tausmooth, window[i].nPull);
2670 for (ig = 0; ig < window[i].nPull; ig++)
2674 pos = window[i].pos[ig];
2675 for (j = 0; j < nwins; j++)
2677 for (jg = 0; jg < window[j].nPull; jg++)
2679 dpos2 = gmx::square(window[j].pos[jg]-pos);
2680 if (dpos2 < siglim2)
2682 w = gaufact*std::exp(-dpos2*invtwosig2);
2684 tausm += w*window[j].tau[jg];
2685 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2686 w,dpos2,pos,gaufact,invtwosig2); */
2691 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2693 window[i].tausmooth[ig] = tausm;
2697 window[i].tausmooth[ig] = window[i].tau[ig];
2699 window[i].g[ig] = 1+2*tausm/window[i].dt;
2704 //! Stop integrating autoccorelation function when ACF drops under this value
2705 #define WHAM_AC_ZERO_LIMIT 0.05
2707 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2709 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2710 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2712 int i, ig, ncorr, ntot, j, k, *count, restart;
2713 real *corr, c0, dt, tmp;
2714 real *ztime, av, tausteps;
2715 FILE *fp, *fpcorr = nullptr;
2719 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2720 "time [ps]", "autocorrelation function", opt->oenv);
2724 for (i = 0; i < nwins; i++)
2726 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2728 ntot = window[i].Ntot[0];
2730 /* using half the maximum time as length of autocorrelation function */
2734 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2735 " points. Provide more pull data!", ntot);
2738 /* snew(corrSq,ncorr); */
2741 snew(window[i].tau, window[i].nPull);
2742 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2748 for (ig = 0; ig < window[i].nPull; ig++)
2750 if (ntot != window[i].Ntot[ig])
2752 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2753 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2755 ztime = window[i].ztime[ig];
2757 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2758 for (j = 0, av = 0; (j < ntot); j++)
2763 for (k = 0; (k < ncorr); k++)
2768 for (j = 0; (j < ntot); j += restart)
2770 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2772 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2774 /* corrSq[k] += tmp*tmp; */
2778 /* divide by nr of frames for each time displacement */
2779 for (k = 0; (k < ncorr); k++)
2781 /* count probably = (ncorr-k+(restart-1))/restart; */
2782 corr[k] = corr[k]/count[k];
2783 /* variance of autocorrelation function */
2784 /* corrSq[k]=corrSq[k]/count[k]; */
2786 /* normalize such that corr[0] == 0 */
2788 for (k = 0; (k < ncorr); k++)
2791 /* corrSq[k]*=c0*c0; */
2794 /* write ACFs in verbose mode */
2797 for (k = 0; (k < ncorr); k++)
2799 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2801 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2804 /* esimate integrated correlation time, fitting is too unstable */
2805 tausteps = 0.5*corr[0];
2806 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2807 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2809 tausteps += corr[j];
2812 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2813 Kumar et al, eq. 28 ff. */
2814 window[i].tau[ig] = tausteps*dt;
2815 window[i].g[ig] = 1+2*tausteps;
2816 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2827 /* plot IACT along reaction coordinate */
2828 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2829 if (output_env_get_print_xvgr_codes(opt->oenv))
2831 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2832 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2833 for (i = 0; i < nwins; i++)
2835 fprintf(fp, "# %3d ", i);
2836 for (ig = 0; ig < window[i].nPull; ig++)
2838 fprintf(fp, " %11g", window[i].tau[ig]);
2843 for (i = 0; i < nwins; i++)
2845 for (ig = 0; ig < window[i].nPull; ig++)
2847 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2850 if (opt->sigSmoothIact > 0.0)
2852 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2853 opt->sigSmoothIact);
2854 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2855 smoothIact(window, nwins, opt);
2856 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2857 if (output_env_get_print_xvgr_codes(opt->oenv))
2859 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2860 fprintf(fp, "@ s1 symbol color 2\n");
2862 for (i = 0; i < nwins; i++)
2864 for (ig = 0; ig < window[i].nPull; ig++)
2866 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2871 printf("Wrote %s\n", fn);
2875 * compute average and sigma of each umbrella histogram
2877 static void averageSigma(t_UmbrellaWindow *window, int nwins)
2880 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2882 for (i = 0; i < nwins; i++)
2884 snew(window[i].aver, window[i].nPull);
2885 snew(window[i].sigma, window[i].nPull);
2887 ntot = window[i].Ntot[0];
2888 for (ig = 0; ig < window[i].nPull; ig++)
2890 ztime = window[i].ztime[ig];
2891 for (k = 0, av = 0.; k < ntot; k++)
2896 for (k = 0, sum2 = 0.; k < ntot; k++)
2901 sig = std::sqrt(sum2/ntot);
2902 window[i].aver[ig] = av;
2904 /* Note: This estimate for sigma is biased from the limited sampling.
2905 Correct sigma by n/(n-1) where n = number of independent
2906 samples. Only possible if IACT is known.
2910 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2911 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2915 window[i].sigma[ig] = sig;
2917 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2924 * Use histograms to compute average force on pull group.
2926 static void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2928 int i, j, bins = opt->bins, k;
2929 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2932 dz = (max-min)/bins;
2933 ztot = opt->max-min;
2936 /* Compute average displacement from histograms */
2937 for (j = 0; j < nWindows; ++j)
2939 snew(window[j].forceAv, window[j].nPull);
2940 for (k = 0; k < window[j].nPull; ++k)
2944 for (i = 0; i < opt->bins; ++i)
2946 temp = (1.0*i+0.5)*dz+min;
2947 distance = temp - window[j].pos[k];
2949 { /* in cyclic wham: */
2950 if (distance > ztot_half) /* |distance| < ztot_half */
2954 else if (distance < -ztot_half)
2959 w = window[j].Histo[k][i]/window[j].g[k];
2960 displAv += w*distance;
2962 /* Are we near min or max? We are getting wrong forces from the histgrams since
2963 the histograms are zero outside [min,max). Therefore, assume that the position
2964 on the other side of the histomgram center is equally likely. */
2967 posmirrored = window[j].pos[k]-distance;
2968 if (posmirrored >= max || posmirrored < min)
2970 displAv += -w*distance;
2977 /* average force from average displacement */
2978 window[j].forceAv[k] = displAv*window[j].k[k];
2979 /* sigma from average square displacement */
2980 /* window[j].sigma [k] = sqrt(displAv2); */
2981 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2987 * Check if the complete reaction coordinate is covered by the histograms
2989 static void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2990 t_UmbrellaOptions *opt)
2992 int i, ig, j, bins = opt->bins, bBoundary;
2993 real avcount = 0, z, relcount, *count;
2994 snew(count, opt->bins);
2996 for (j = 0; j < opt->bins; ++j)
2998 for (i = 0; i < nwins; i++)
3000 for (ig = 0; ig < window[i].nPull; ig++)
3002 count[j] += window[i].Histo[ig][j];
3005 avcount += 1.0*count[j];
3008 for (j = 0; j < bins; ++j)
3010 relcount = count[j]/avcount;
3011 z = (j+0.5)*opt->dz+opt->min;
3012 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3013 /* check for bins with no data */
3016 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3017 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3019 /* and check for poor sampling */
3020 else if (relcount < 0.005 && !bBoundary)
3022 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3028 /*! \brief Compute initial potential by integrating the average force
3030 * This speeds up the convergence by roughly a factor of 2
3032 static void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3034 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3035 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3038 dz = (opt->max-min)/bins;
3040 printf("Getting initial potential by integration.\n");
3042 /* Compute average displacement from histograms */
3043 computeAverageForce(window, nWindows, opt);
3045 /* Get force for each bin from all histograms in this bin, or, alternatively,
3046 if no histograms are inside this bin, from the closest histogram */
3049 for (j = 0; j < opt->bins; ++j)
3051 pos = (1.0*j+0.5)*dz+min;
3055 groupmin = winmin = 0;
3056 for (i = 0; i < nWindows; i++)
3058 for (ig = 0; ig < window[i].nPull; ig++)
3060 hispos = window[i].pos[ig];
3061 dist = std::abs(hispos-pos);
3062 /* average force within bin */
3066 fAv += window[i].forceAv[ig];
3068 /* at the same time, remember closest histogram */
3077 /* if no histogram found in this bin, use closest histogram */
3084 fAv = window[winmin].forceAv[groupmin];
3088 for (j = 1; j < opt->bins; ++j)
3090 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3093 /* cyclic wham: linearly correct possible offset */
3096 diff = (pot[bins-1]-pot[0])/(bins-1);
3097 for (j = 1; j < opt->bins; ++j)
3104 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3105 for (j = 0; j < opt->bins; ++j)
3107 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3110 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3113 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3114 energy offsets which are usually determined by wham
3115 First: turn pot into probabilities:
3117 for (j = 0; j < opt->bins; ++j)
3119 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3121 calc_z(pot, window, nWindows, opt, TRUE);
3127 //! Count number of words in an line
3128 static int wordcount(char *ptr)
3133 if (std::strlen(ptr) == 0)
3137 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3139 for (i = 0; (ptr[i] != '\0'); i++)
3141 is[cur] = isspace(ptr[i]);
3142 if ((i > 0) && (is[cur] && !is[1-cur]))
3151 /*! \brief Read input file for pull group selection (option -is)
3153 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3155 static void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3158 int i, iline, n, len = STRLEN, temp;
3159 char *ptr = nullptr, *tmpbuf = nullptr;
3160 char fmt[1024], fmtign[1024];
3161 int block = 1, sizenow;
3163 fp = gmx_ffopen(opt->fnCoordSel, "r");
3164 opt->coordsel = nullptr;
3169 while ( (ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3174 if (iline >= sizenow)
3177 srenew(opt->coordsel, sizenow);
3179 opt->coordsel[iline].n = n;
3180 opt->coordsel[iline].nUse = 0;
3181 snew(opt->coordsel[iline].bUse, n);
3184 for (i = 0; i < n; i++)
3186 std::strcpy(fmt, fmtign);
3187 std::strcat(fmt, "%d");
3188 if (sscanf(ptr, fmt, &temp))
3190 opt->coordsel[iline].bUse[i] = (temp > 0);
3191 if (opt->coordsel[iline].bUse[i])
3193 opt->coordsel[iline].nUse++;
3196 std::strcat(fmtign, "%*s");
3200 opt->nCoordsel = iline;
3201 if (nTpr != opt->nCoordsel)
3203 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3207 printf("\nUse only these pull coordinates:\n");
3208 for (iline = 0; iline < nTpr; iline++)
3210 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3211 for (i = 0; i < opt->coordsel[iline].n; i++)
3213 if (opt->coordsel[iline].bUse[i])
3226 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3228 //! Number of elements in fnm (used for command line parsing)
3229 #define NFILE asize(fnm)
3231 //! The main gmx wham routine
3232 int gmx_wham(int argc, char *argv[])
3234 const char *desc[] = {
3235 "[THISMODULE] is an analysis program that implements the Weighted",
3236 "Histogram Analysis Method (WHAM). It is intended to analyze",
3237 "output files generated by umbrella sampling simulations to ",
3238 "compute a potential of mean force (PMF).[PAR]",
3240 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3241 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3242 "and, if multiple coordinates need to be analyzed, all used the same",
3243 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3244 "At present, three input modes are supported.",
3246 "* With option [TT]-it[tt], the user provides a file which contains the",
3247 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3248 " AND, with option [TT]-ix[tt], a file which contains file names of",
3249 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3250 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3251 " first pullx, etc.",
3252 "* Same as the previous input mode, except that the the user",
3253 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3254 " From the pull force the position in the umbrella potential is",
3255 " computed. This does not work with tabulated umbrella potentials.",
3256 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3257 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3258 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3259 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3260 " must be similar to the following::",
3263 " # Component selection: 0 0 1",
3265 " # Ref. Group 'TestAtom'",
3266 " # Nr. of pull groups 2",
3267 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3268 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3271 " The number of pull groups, umbrella positions, force constants, and names ",
3272 " may (of course) differ. Following the header, a time column and ",
3273 " a data column for each pull group follows (i.e. the displacement",
3274 " with respect to the umbrella center). Up to four pull groups are possible ",
3275 " per [REF].pdo[ref] file at present.[PAR]",
3276 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3277 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3278 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3279 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3280 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3281 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3282 "used, coordsel.dat looks like this::",
3288 "By default, the output files are::",
3290 " [TT]-o[tt] PMF output file",
3291 " [TT]-hist[tt] Histograms output file",
3293 "Always check whether the histograms sufficiently overlap.[PAR]",
3294 "The umbrella potential is assumed to be harmonic and the force constants are ",
3295 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3296 "a tabulated potential can be provided with [TT]-tab[tt].",
3301 "* [TT]-bins[tt] Number of bins used in analysis",
3302 "* [TT]-temp[tt] Temperature in the simulations",
3303 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3304 "* [TT]-auto[tt] Automatic determination of boundaries",
3305 "* [TT]-min,-max[tt] Boundaries of the profile",
3307 "The data points that are used to compute the profile",
3308 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3309 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3310 "umbrella window.[PAR]",
3311 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3312 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3313 "With energy output, the energy in the first bin is defined to be zero. ",
3314 "If you want the free energy at a different ",
3315 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3316 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3317 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3318 "[THISMODULE] will make use of the",
3319 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3320 "reaction coordinate will assumed be be neighbors.[PAR]",
3321 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3322 "which may be useful for, e.g. membranes.",
3327 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3328 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3333 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3334 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3335 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3336 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3337 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3338 "Because the IACTs can be severely underestimated in case of limited ",
3339 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3340 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3341 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3342 "integration of the ACFs while the ACFs are larger 0.05.",
3343 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3344 "less robust) method such as fitting to a double exponential, you can ",
3345 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3346 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3347 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3352 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3353 "otherwise the statistical error may be substantially underestimated. ",
3354 "More background and examples for the bootstrap technique can be found in ",
3355 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3356 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3357 "Four bootstrapping methods are supported and ",
3358 "selected with [TT]-bs-method[tt].",
3360 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3361 " data points, and the bootstrap is carried out by assigning random weights to the ",
3362 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3363 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3364 " statistical error is underestimated.",
3365 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3366 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3367 " (allowing duplication, i.e. sampling with replacement).",
3368 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3369 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3370 " divided into blocks and only histograms within each block are mixed. Note that ",
3371 " the histograms within each block must be representative for all possible histograms, ",
3372 " otherwise the statistical error is underestimated.",
3373 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3374 " such that the generated data points are distributed according the given histograms ",
3375 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3376 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3377 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3378 " Note that this method may severely underestimate the error in case of limited sampling, ",
3379 " that is if individual histograms do not represent the complete phase space at ",
3380 " the respective positions.",
3381 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3382 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3383 " and width of the umbrella histograms. That method yields similar error estimates ",
3384 " like method [TT]traj[tt].",
3386 "Bootstrapping output:",
3388 "* [TT]-bsres[tt] Average profile and standard deviations",
3389 "* [TT]-bsprof[tt] All bootstrapping profiles",
3391 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3392 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3396 const char *en_unit[] = {nullptr, "kJ", "kCal", "kT", nullptr};
3397 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr};
3398 const char *en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3399 static t_UmbrellaOptions opt;
3402 { "-min", FALSE, etREAL, {&opt.min},
3403 "Minimum coordinate in profile"},
3404 { "-max", FALSE, etREAL, {&opt.max},
3405 "Maximum coordinate in profile"},
3406 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3407 "Determine min and max automatically"},
3408 { "-bins", FALSE, etINT, {&opt.bins},
3409 "Number of bins in profile"},
3410 { "-temp", FALSE, etREAL, {&opt.Temperature},
3412 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3414 { "-v", FALSE, etBOOL, {&opt.verbose},
3416 { "-b", FALSE, etREAL, {&opt.tmin},
3417 "First time to analyse (ps)"},
3418 { "-e", FALSE, etREAL, {&opt.tmax},
3419 "Last time to analyse (ps)"},
3420 { "-dt", FALSE, etREAL, {&opt.dt},
3421 "Analyse only every dt ps"},
3422 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3423 "Write histograms and exit"},
3424 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3425 "Determine min and max and exit (with [TT]-auto[tt])"},
3426 { "-log", FALSE, etBOOL, {&opt.bLog},
3427 "Calculate the log of the profile before printing"},
3428 { "-unit", FALSE, etENUM, {en_unit},
3429 "Energy unit in case of log output" },
3430 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3431 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3432 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3433 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3434 { "-sym", FALSE, etBOOL, {&opt.bSym},
3435 "Symmetrize profile around z=0"},
3436 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3437 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3438 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3439 "Calculate integrated autocorrelation times and use in wham"},
3440 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3441 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3442 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3443 "When computing autocorrelation functions, restart computing every .. (ps)"},
3444 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3445 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3446 "during smoothing"},
3447 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3448 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3449 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3450 "Bootstrap method" },
3451 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3452 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3453 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3454 "Seed for bootstrapping. (-1 = use time)"},
3455 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3456 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3457 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3458 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3459 { "-stepout", FALSE, etINT, {&opt.stepchange},
3460 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3461 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3462 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3466 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3467 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3468 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3469 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3470 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3471 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3472 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3473 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3474 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3475 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3476 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3477 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3480 int i, j, l, nfiles, nwins, nfiles2;
3481 t_UmbrellaHeader header;
3482 t_UmbrellaWindow * window = nullptr;
3483 double *profile, maxchange = 1e20;
3484 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3485 char **fninTpr, **fninPull, **fninPdo;
3487 FILE *histout, *profout;
3488 char xlabel[STRLEN], ylabel[256], title[256];
3491 opt.verbose = FALSE;
3492 opt.bHistOnly = FALSE;
3501 opt.coordsel = nullptr;
3503 /* bootstrapping stuff */
3505 opt.bsMethod = bsMethod_hist;
3506 opt.tauBootStrap = 0.0;
3508 opt.histBootStrapBlockLength = 8;
3509 opt.bs_verbose = FALSE;
3514 opt.Temperature = 298;
3515 opt.Tolerance = 1e-6;
3516 opt.bBoundsOnly = FALSE;
3518 opt.bCalcTauInt = FALSE;
3519 opt.sigSmoothIact = 0.0;
3520 opt.bAllowReduceIact = TRUE;
3521 opt.bInitPotByIntegration = TRUE;
3522 opt.acTrestart = 1.0;
3523 opt.stepchange = 100;
3524 opt.stepUpdateContrib = 100;
3526 if (!parse_common_args(&argc, argv, 0,
3527 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3532 opt.unit = nenum(en_unit);
3533 opt.bsMethod = nenum(en_bsMethod);
3535 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3537 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3538 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3539 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3540 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3541 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3542 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3543 if (opt.bTab && opt.bPullf)
3545 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3546 "Provide pullx.xvg or pdo files!");
3549 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3551 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3553 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3555 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3556 "\n\n Check gmx wham -h !");
3559 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3560 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3561 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3562 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3563 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3565 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3566 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3567 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3568 if ( (bMinSet || bMaxSet) && bAutoSet)
3570 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3573 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3575 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3578 if (bMinSet && opt.bAuto)
3580 printf("Note: min and max given, switching off -auto.\n");
3584 if (opt.bTauIntGiven && opt.bCalcTauInt)
3586 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3587 "the autocorrelation times. Not both.");
3590 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3592 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3593 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3595 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3597 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3598 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3601 /* Reading gmx4/gmx5 pull output and tpr files */
3602 if (opt.bTpr || opt.bPullf || opt.bPullx)
3604 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3606 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3607 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3608 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3609 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3610 if (nfiles != nfiles2)
3612 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3613 opt.fnTpr, nfiles2, fnPull);
3616 /* Read file that selects the pull group to be used */
3617 if (opt.fnCoordSel != nullptr)
3619 readPullCoordSelection(&opt, fninTpr, nfiles);
3622 window = initUmbrellaWindows(nfiles);
3623 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3626 { /* reading pdo files */
3627 if (opt.fnCoordSel != nullptr)
3629 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3630 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3632 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3633 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3634 window = initUmbrellaWindows(nfiles);
3635 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3638 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3639 We can therefore get the units for the xlabel from the first coordinate. */
3640 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3644 /* enforce equal weight for all histograms? */
3647 enforceEqualWeights(window, nwins);
3650 /* write histograms */
3651 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3652 xlabel, "count", opt.oenv);
3653 for (l = 0; l < opt.bins; ++l)
3655 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3656 for (i = 0; i < nwins; ++i)
3658 for (j = 0; j < window[i].nPull; ++j)
3660 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3663 fprintf(histout, "\n");
3666 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3669 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3673 /* Using tabulated umbrella potential */
3676 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3679 /* Integrated autocorrelation times provided ? */
3680 if (opt.bTauIntGiven)
3682 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3685 /* Compute integrated autocorrelation times */
3686 if (opt.bCalcTauInt)
3688 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3691 /* calc average and sigma for each histogram
3692 (maybe required for bootstrapping. If not, this is fast anyhow) */
3693 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3695 averageSigma(window, nwins);
3698 /* Get initial potential by simple integration */
3699 if (opt.bInitPotByIntegration)
3701 guessPotByIntegration(window, nwins, &opt, xlabel);
3704 /* Check if complete reaction coordinate is covered */
3705 checkReactionCoordinateCovered(window, nwins, &opt);
3707 /* Calculate profile */
3708 snew(profile, opt.bins);
3716 if ( (i%opt.stepUpdateContrib) == 0)
3718 setup_acc_wham(profile, window, nwins, &opt);
3720 if (maxchange < opt.Tolerance)
3723 /* if (opt.verbose) */
3724 printf("Switched to exact iteration in iteration %d\n", i);
3726 calc_profile(profile, window, nwins, &opt, bExact);
3727 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3729 printf("\t%4d) Maximum change %e\n", i, maxchange);
3733 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3734 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3736 /* calc error from Kumar's formula */
3737 /* Unclear how the error propagates along reaction coordinate, therefore
3739 /* calc_error_kumar(profile,window, nwins,&opt); */
3741 /* Write profile in energy units? */
3744 prof_normalization_and_unit(profile, &opt);
3745 std::strcpy(ylabel, en_unit_label[opt.unit]);
3746 std::strcpy(title, "Umbrella potential");
3750 std::strcpy(ylabel, "Density of states");
3751 std::strcpy(title, "Density of states");
3754 /* symmetrize profile around z=0? */
3757 symmetrizeProfile(profile, &opt);
3760 /* write profile or density of states */
3761 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3762 for (i = 0; i < opt.bins; ++i)
3764 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3767 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3769 /* Bootstrap Method */
3772 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3773 opt2fn("-hist", NFILE, fnm),
3774 xlabel, ylabel, profile, window, nwins, &opt);
3778 freeUmbrellaWindows(window, nfiles);
3780 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3781 please_cite(stdout, "Hub2010");