2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull_params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/path.h"
80 #include "gromacs/utility/pleasecite.h"
81 #include "gromacs/utility/smalloc.h"
83 //! longest file names allowed in input files
84 #define WHAM_MAXFILELEN 2048
87 * enum for energy units
90 enSel, en_kJ, en_kCal, en_kT, enNr
93 * enum for type of input files (pdos, tpr, or pullf)
96 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
99 /*! \brief enum for bootstrapping method
101 * These bootstrap methods are supported:
102 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
103 * (bsMethod_BayesianHist)
104 * - bootstrap complete histograms (bsMethod_hist)
105 * - bootstrap trajectories from given umbrella histograms. This generates new
106 * "synthetic" histograms (bsMethod_traj)
107 * - bootstrap trajectories from Gaussian with mu/sigma computed from
108 * the respective histogram (bsMethod_trajGauss). This gives very similar
109 * results compared to bsMethod_traj.
111 * ********************************************************************
112 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
113 * JS Hub, BL de Groot, D van der Spoel
114 * g_wham - A free weighted histogram analysis implementation including
115 * robust error and autocorrelation estimates,
116 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
117 * ********************************************************************
120 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
121 bsMethod_traj, bsMethod_trajGauss
124 //! Parameters of one pull coodinate
127 int pull_type; //!< such as constraint, umbrella, ...
128 int geometry; //!< such as distance, direction, cylinder
129 int ngroup; //!< the number of pull groups involved
130 ivec dim; //!< pull dimension with geometry distance
131 int ndim; //!< nr of pull_dim != 0
132 real k; //!< force constants in tpr file
133 real init_dist; //!< reference displacement
134 char coord_unit[256]; //!< unit of the displacement
137 //! Parameters of the umbrella potentials
141 * \name Using umbrella pull code since gromacs 4.x
144 int npullcrds; //!< nr of umbrella pull coordinates for reading
145 t_pullcoord *pcrd; //!< the pull coordinates
146 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
147 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
148 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
152 * \name Using PDO files common until gromacs 3.x
160 char PullName[4][256];
162 double UmbCons[4][3];
166 //! Data in the umbrella histograms
169 int nPull; //!< nr of pull groups in this pdo or pullf/x file
170 double **Histo; //!< nPull histograms
171 double **cum; //!< nPull cumulative distribution functions
172 int nBin; //!< nr of bins. identical to opt->bins
173 double *k; //!< force constants for the nPull coords
174 double *pos; //!< umbrella positions for the nPull coords
175 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
176 int *N; //!< nr of data points in nPull histograms.
177 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
179 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
181 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
182 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
185 double *tau; //!< intetrated autocorrelation time (IACT)
186 double *tausmooth; //!< smoothed IACT
188 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
190 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
192 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
194 /*! \brief average force estimated from average displacement, fAv=dzAv*k
196 * Used for integration to guess the potential.
199 real *aver; //!< average of histograms
200 real *sigma; //!< stddev of histograms
201 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
204 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
207 int n; //!< total nr of pull coords in this tpr file
208 int nUse; //!< nr of pull coords used
209 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
212 //! Parameters of WHAM
213 typedef struct // NOLINT(clang-analyzer-optin.performance.Padding)
219 const char *fnTpr, *fnPullf, *fnCoordSel;
220 const char *fnPdo, *fnPullx; //!< file names of input
221 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
222 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
224 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
225 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
226 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
227 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
230 * \name Basic WHAM options
233 int bins; //!< nr of bins, min, max, and dz of profile
235 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
236 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
239 * \name Output control
242 gmx_bool bLog; //!< energy output (instead of probability) for profile
243 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
244 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
245 /*! \brief after wham, set prof to zero at this z-position.
246 * When bootstrapping, set zProf0 to a "stable" reference position.
249 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
251 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
252 gmx_bool bAuto; //!< determine min and max automatically but do not exit
254 gmx_bool verbose; //!< more noisy wham mode
255 int stepchange; //!< print maximum change in prof after how many interations
256 gmx_output_env_t *oenv; //!< xvgr options
259 * \name Autocorrelation stuff
262 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
263 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
264 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
265 real acTrestart; //!< when computing ACT, time between restarting points
267 /* \brief Enforce the same weight for each umbella window, that is
268 * calculate with the same number of data points for
269 * each window. That can be reasonable, if the histograms
270 * have different length, but due to autocorrelation,
271 * a longer simulation should not have larger weightin wham.
277 * \name Bootstrapping stuff
280 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
282 /* \brief bootstrap method
284 * if == bsMethod_hist, consider complete histograms as independent
285 * data points and, hence, only mix complete histograms.
286 * if == bsMethod_BayesianHist, consider complete histograms
287 * as independent data points, but assign random weights
288 * to the histograms during the bootstrapping ("Bayesian bootstrap")
289 * In case of long correlations (e.g., inside a channel), these
290 * will yield a more realistic error.
291 * if == bsMethod_traj(Gauss), generate synthetic histograms
293 * histogram by generating an autocorrelated random sequence
294 * that is distributed according to the respective given
295 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
296 * (instead of from the umbrella histogram) to generate a new
301 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
304 /* \brief when mixing histograms, mix only histograms withing blocks
305 long the reaction coordinate xi. Avoids gaps along xi. */
306 int histBootStrapBlockLength;
308 int bsSeed; //!< random seed for bootstrapping
310 /* \brief Write cumulative distribution functions (CDFs) of histograms
311 and write the generated histograms for each bootstrap */
315 * \name tabulated umbrella potential stuff
319 double *tabX, *tabY, tabMin, tabMax, tabDz;
322 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
323 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
326 //! Make an umbrella window (may contain several histograms)
327 static t_UmbrellaWindow * initUmbrellaWindows(int nwin)
329 t_UmbrellaWindow *win;
332 for (i = 0; i < nwin; i++)
334 win[i].Histo = win[i].cum = nullptr;
335 win[i].k = win[i].pos = win[i].z = nullptr;
336 win[i].N = win[i].Ntot = nullptr;
337 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
338 win[i].bContrib = nullptr;
339 win[i].ztime = nullptr;
340 win[i].forceAv = nullptr;
341 win[i].aver = win[i].sigma = nullptr;
342 win[i].bsWeight = nullptr;
347 //! Delete an umbrella window (may contain several histograms)
348 static void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
351 for (i = 0; i < nwin; i++)
355 for (j = 0; j < win[i].nPull; j++)
357 sfree(win[i].Histo[j]);
362 for (j = 0; j < win[i].nPull; j++)
364 sfree(win[i].cum[j]);
369 for (j = 0; j < win[i].nPull; j++)
371 sfree(win[i].bContrib[j]);
383 sfree(win[i].tausmooth);
384 sfree(win[i].bContrib);
386 sfree(win[i].forceAv);
389 sfree(win[i].bsWeight);
395 * Read and setup tabulated umbrella potential
397 static void setup_tab(const char *fn, t_UmbrellaOptions *opt)
402 printf("Setting up tabulated potential from file %s\n", fn);
403 nl = read_xvg(fn, &y, &ny);
407 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
409 opt->tabMin = y[0][0];
410 opt->tabMax = y[0][nl-1];
411 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
414 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
415 "ascending z-direction", fn);
417 for (i = 0; i < nl-1; i++)
419 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
421 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
426 for (i = 0; i < nl; i++)
428 opt->tabX[i] = y[0][i];
429 opt->tabY[i] = y[1][i];
431 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
432 opt->tabMin, opt->tabMax, opt->tabDz);
435 //! Read the header of an PDO file (position, force const, nr of groups)
436 static void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
439 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
441 std::istringstream ist;
444 if (fgets(line, 2048, file) == nullptr)
446 gmx_fatal(FARGS, "Error reading header from pdo file\n");
449 ist >> Buffer0 >> Buffer1 >> Buffer2;
450 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
452 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
453 "(Found in first line: `%s')\n",
454 Buffer1, "UMBRELLA", line);
456 if (std::strcmp(Buffer2, "3.0") != 0)
458 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
462 if (fgets(line, 2048, file) == nullptr)
464 gmx_fatal(FARGS, "Error reading header from pdo file\n");
467 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
468 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
470 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
471 if (header->nDim != 1)
473 gmx_fatal(FARGS, "Currently only supports one dimension");
477 if (fgets(line, 2048, file) == nullptr)
479 gmx_fatal(FARGS, "Error reading header from pdo file\n");
482 ist >> Buffer0 >> Buffer1 >> header->nSkip;
485 if (fgets(line, 2048, file) == nullptr)
487 gmx_fatal(FARGS, "Error reading header from pdo file\n");
490 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
493 if (fgets(line, 2048, file) == nullptr)
495 gmx_fatal(FARGS, "Error reading header from pdo file\n");
498 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
502 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
506 for (i = 0; i < header->nPull; ++i)
508 if (fgets(line, 2048, file) == nullptr)
510 gmx_fatal(FARGS, "Error reading header from pdo file\n");
513 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
514 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
515 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
519 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
520 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
524 if (fgets(line, 2048, file) == nullptr)
526 gmx_fatal(FARGS, "Cannot read from file\n");
530 if (std::strcmp(Buffer3, "#####") != 0)
532 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
537 static char *fgets3(FILE *fp, char ptr[], int *len)
542 if (fgets(ptr, *len-1, fp) == nullptr)
547 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
549 /* This line is longer than len characters, let's increase len! */
553 if (fgets(p-1, STRLEN, fp) == nullptr)
558 slen = std::strlen(ptr);
559 if (ptr[slen-1] == '\n')
567 /*! \brief Read the data columns of and PDO file.
569 * TO DO: Get rid of the scanf function to avoid the clang warning.
570 * At the moment, this warning is avoided by hiding the format string
571 * the variable fmtlf.
573 static void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
574 int fileno, t_UmbrellaWindow * win,
575 t_UmbrellaOptions *opt,
576 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
578 int i, inttemp, bins, count, ntot;
579 real minval, maxval, minfound = 1e20, maxfound = -1e20;
580 double temp, time, time0 = 0, dt;
582 t_UmbrellaWindow * window = nullptr;
583 gmx_bool timeok, dt_ok = true;
584 char *tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
585 int len = STRLEN, dstep = 1;
586 const int blocklen = 4096;
587 int *lennow = nullptr;
596 /* Need to alocate memory and set up structure */
597 window->nPull = header->nPull;
600 snew(window->Histo, window->nPull);
601 snew(window->z, window->nPull);
602 snew(window->k, window->nPull);
603 snew(window->pos, window->nPull);
604 snew(window->N, window->nPull);
605 snew(window->Ntot, window->nPull);
606 snew(window->g, window->nPull);
607 snew(window->bsWeight, window->nPull);
609 window->bContrib = nullptr;
611 if (opt->bCalcTauInt)
613 snew(window->ztime, window->nPull);
617 window->ztime = nullptr;
619 snew(lennow, window->nPull);
621 for (i = 0; i < window->nPull; ++i)
624 window->bsWeight[i] = 1.;
625 snew(window->Histo[i], bins);
626 window->k[i] = header->UmbCons[i][0];
627 window->pos[i] = header->UmbPos[i][0];
631 if (opt->bCalcTauInt)
633 window->ztime[i] = nullptr;
637 /* Done with setup */
643 minval = maxval = bins = 0; /* Get rid of warnings */
648 while ( (ptr = fgets3(file, tmpbuf, &len)) != nullptr)
652 if (ptr[0] == '#' || std::strlen(ptr) < 2)
657 /* Initiate format string */
659 std::strcat(fmtign, "%*s");
661 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
662 /* Round time to fs */
663 time = 1.0/1000*( gmx::roundToInt64(time*1000) );
665 /* get time step of pdo file */
675 dstep = gmx::roundToInt(opt->dt/dt);
683 window->dt = dt*dstep;
688 dt_ok = ((count-1)%dstep == 0);
689 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
691 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
692 time,opt->tmin, opt->tmax, dt_ok,timeok); */
696 for (i = 0; i < header->nPull; ++i)
698 std::strcpy(fmt, fmtign);
699 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
700 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
701 if (sscanf(ptr, fmt, &temp))
703 temp += header->UmbPos[i][0];
717 if (opt->bCalcTauInt)
719 /* save time series for autocorrelation analysis */
720 ntot = window->Ntot[i];
721 if (ntot >= lennow[i])
723 lennow[i] += blocklen;
724 srenew(window->ztime[i], lennow[i]);
726 window->ztime[i][ntot] = temp;
730 temp /= (maxval-minval);
732 temp = std::floor(temp);
734 inttemp = static_cast<int> (temp);
741 else if (inttemp >= bins)
747 if (inttemp >= 0 && inttemp < bins)
749 window->Histo[i][inttemp] += 1.;
757 if (time > opt->tmax)
761 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
777 /*! \brief Set identical weights for all histograms
779 * Normally, the weight is given by the number data points in each
780 * histogram, together with the autocorrelation time. This can be overwritten
781 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
782 * an appropriate autocorrelation times instead of using this function.
784 static void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
786 int i, k, j, NEnforced;
789 NEnforced = window[0].Ntot[0];
790 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
791 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
792 /* enforce all histograms to have the same weight as the very first histogram */
794 for (j = 0; j < nWindows; ++j)
796 for (k = 0; k < window[j].nPull; ++k)
798 ratio = 1.0*NEnforced/window[j].Ntot[k];
799 for (i = 0; i < window[0].nBin; ++i)
801 window[j].Histo[k][i] *= ratio;
803 window[j].N[k] = gmx::roundToInt(ratio*window[j].N[k]);
808 /*! \brief Simple linear interpolation between two given tabulated points
810 static double tabulated_pot(double dist, t_UmbrellaOptions *opt)
813 double pl, pu, dz, dp;
815 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
817 if (jl < 0 || ju >= opt->tabNbins)
819 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
820 "Provide an extended table.", dist, jl, ju);
824 dz = dist-opt->tabX[jl];
825 dp = (pu-pl)*dz/opt->tabDz;
831 * Check which bins substiantially contribute (accelerates WHAM)
833 * Don't worry, that routine does not mean we compute the PMF in limited precision.
834 * After rapid convergence (using only substiantal contributions), we always switch to
837 static void setup_acc_wham(const double *profile, t_UmbrellaWindow * window, int nWindows,
838 t_UmbrellaOptions *opt)
840 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
841 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
842 gmx_bool bAnyContrib;
843 static int bFirst = 1;
844 static double wham_contrib_lim;
848 for (i = 0; i < nWindows; ++i)
850 nGrptot += window[i].nPull;
852 wham_contrib_lim = opt->Tolerance/nGrptot;
855 ztot = opt->max-opt->min;
858 for (i = 0; i < nWindows; ++i)
860 if (!window[i].bContrib)
862 snew(window[i].bContrib, window[i].nPull);
864 for (j = 0; j < window[i].nPull; ++j)
866 if (!window[i].bContrib[j])
868 snew(window[i].bContrib[j], opt->bins);
871 for (k = 0; k < opt->bins; ++k)
873 temp = (1.0*k+0.5)*dz+min;
874 distance = temp - window[i].pos[j]; /* distance to umbrella center */
876 { /* in cyclic wham: */
877 if (distance > ztot_half) /* |distance| < ztot_half */
881 else if (distance < -ztot_half)
886 /* Note: there are two contributions to bin k in the wham equations:
887 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
888 ii) exp(- U/(BOLTZ*opt->Temperature))
889 where U is the umbrella potential
890 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
895 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
899 U = tabulated_pot(distance, opt); /* Use tabulated potential */
901 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
902 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
903 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
904 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
905 if (window[i].bContrib[j][k])
911 /* If this histo is far outside min and max all bContrib may be FALSE,
912 causing a floating point exception later on. To avoid that, switch
916 for (k = 0; k < opt->bins; ++k)
918 window[i].bContrib[j][k] = TRUE;
925 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
926 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
931 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
937 //! Compute the PMF (one of the two main WHAM routines)
938 static void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
939 t_UmbrellaOptions *opt, gmx_bool bExact)
941 double ztot_half, ztot, min = opt->min, dz = opt->dz;
943 ztot = opt->max-opt->min;
950 int nthreads = gmx_omp_get_max_threads();
951 int thread_id = gmx_omp_get_thread_num();
953 int i0 = thread_id*opt->bins/nthreads;
954 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
956 for (i = i0; i < i1; ++i)
959 double num, denom, invg, temp = 0, distance, U = 0;
961 for (j = 0; j < nWindows; ++j)
963 for (k = 0; k < window[j].nPull; ++k)
965 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
966 temp = (1.0*i+0.5)*dz+min;
967 num += invg*window[j].Histo[k][i];
969 if (!(bExact || window[j].bContrib[k][i]))
973 distance = temp - window[j].pos[k]; /* distance to umbrella center */
975 { /* in cyclic wham: */
976 if (distance > ztot_half) /* |distance| < ztot_half */
980 else if (distance < -ztot_half)
988 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
992 U = tabulated_pot(distance, opt); /* Use tabulated potential */
994 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
997 profile[i] = num/denom;
1000 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1004 //! Compute the free energy offsets z (one of the two main WHAM routines)
1005 static double calc_z(const double * profile, t_UmbrellaWindow * window, int nWindows,
1006 t_UmbrellaOptions *opt, gmx_bool bExact)
1008 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1009 double maxglob = -1e20;
1011 ztot = opt->max-opt->min;
1014 #pragma omp parallel
1018 int nthreads = gmx_omp_get_max_threads();
1019 int thread_id = gmx_omp_get_thread_num();
1021 int i0 = thread_id*nWindows/nthreads;
1022 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1023 double maxloc = -1e20;
1025 for (i = i0; i < i1; ++i)
1027 double total = 0, temp, distance, U = 0;
1030 for (j = 0; j < window[i].nPull; ++j)
1033 for (k = 0; k < window[i].nBin; ++k)
1035 if (!(bExact || window[i].bContrib[j][k]))
1039 temp = (1.0*k+0.5)*dz+min;
1040 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1042 { /* in cyclic wham: */
1043 if (distance > ztot_half) /* |distance| < ztot_half */
1047 else if (distance < -ztot_half)
1055 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1059 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1061 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1063 /* Avoid floating point exception if window is far outside min and max */
1066 total = -std::log(total);
1072 temp = std::abs(total - window[i].z[j]);
1077 window[i].z[j] = total;
1080 /* Now get maximum maxloc from the threads and put in maxglob */
1081 if (maxloc > maxglob)
1083 #pragma omp critical
1085 if (maxloc > maxglob)
1092 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1098 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1099 static void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1101 int i, j, bins = opt->bins;
1102 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1105 if (min > 0. || max < 0.)
1107 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1108 opt->min, opt->max);
1113 for (i = 0; i < bins; i++)
1117 /* bin left of zsym */
1118 j = gmx::roundToInt((zsym-min)/dz)-1;
1119 if (j >= 0 && (j+1) < bins)
1121 /* interpolate profile linearly between bins j and j+1 */
1122 z1 = min+(j+0.5)*dz;
1124 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1125 /* average between left and right */
1126 prof2[i] = 0.5*(profsym+profile[i]);
1130 prof2[i] = profile[i];
1134 std::memcpy(profile, prof2, bins*sizeof(double));
1138 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1139 static void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1142 double unit_factor = 1., diff;
1146 /* Not log? Nothing to do! */
1152 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1153 if (opt->unit == en_kT)
1157 else if (opt->unit == en_kJ)
1159 unit_factor = BOLTZ*opt->Temperature;
1161 else if (opt->unit == en_kCal)
1163 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1167 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1170 for (i = 0; i < bins; i++)
1172 if (profile[i] > 0.0)
1174 profile[i] = -std::log(profile[i])*unit_factor;
1178 /* shift to zero at z=opt->zProf0 */
1179 if (!opt->bProf0Set)
1185 /* Get bin with shortest distance to opt->zProf0
1186 (-0.5 from bin position and +0.5 from rounding cancel) */
1187 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1192 else if (imin >= bins)
1196 diff = profile[imin];
1200 for (i = 0; i < bins; i++)
1206 //! Make an array of random integers (used for bootstrapping)
1207 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1209 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1211 int ipull, blockBase, nr, ipullRandom;
1213 if (blockLength == 0)
1215 blockLength = nPull;
1218 for (ipull = 0; ipull < nPull; ipull++)
1220 blockBase = (ipull/blockLength)*blockLength;
1222 { /* make sure nothing bad happens in the last block */
1223 nr = dist(*rng); // [0,blockLength-1]
1224 ipullRandom = blockBase + nr;
1226 while (ipullRandom >= nPull);
1227 if (ipullRandom < 0 || ipullRandom >= nPull)
1229 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1230 "blockLength = %d, blockBase = %d\n",
1231 ipullRandom, nPull, nr, blockLength, blockBase);
1233 randomArray[ipull] = ipullRandom;
1237 /*! \brief Set pull group information of a synthetic histogram
1239 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1240 * but it is not required if we bootstrap complete histograms.
1242 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1243 t_UmbrellaWindow *thisWindow, int pullid)
1245 synthWindow->N [0] = thisWindow->N [pullid];
1246 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1247 synthWindow->pos [0] = thisWindow->pos [pullid];
1248 synthWindow->z [0] = thisWindow->z [pullid];
1249 synthWindow->k [0] = thisWindow->k [pullid];
1250 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1251 synthWindow->g [0] = thisWindow->g [pullid];
1252 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1255 /*! \brief Calculate cumulative distribution function of of all histograms.
1257 * This allow to create random number sequences
1258 * which are distributed according to the histograms. Required to generate
1259 * the "synthetic" histograms for the Bootstrap method
1261 static void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1262 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1269 if (opt->bs_verbose)
1271 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1272 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1276 for (i = 0; i < nWindows; i++)
1278 snew(window[i].cum, window[i].nPull);
1279 for (j = 0; j < window[i].nPull; j++)
1281 snew(window[i].cum[j], nbin+1);
1282 window[i].cum[j][0] = 0.;
1283 for (k = 1; k <= nbin; k++)
1285 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1288 /* normalize CDFs. Ensure cum[nbin]==1 */
1289 last = window[i].cum[j][nbin];
1290 for (k = 0; k <= nbin; k++)
1292 window[i].cum[j][k] /= last;
1297 printf("Cumulative distribution functions of all histograms created.\n");
1298 if (opt->bs_verbose)
1300 for (k = 0; k <= nbin; k++)
1302 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1303 for (i = 0; i < nWindows; i++)
1305 for (j = 0; j < window[i].nPull; j++)
1307 fprintf(fp, "%g\t", window[i].cum[j][k]);
1312 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1318 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1320 * This is used to generate a random sequence distributed according to a histogram
1322 static void searchCumulative(const double xx[], int n, double x, int *j)
1344 else if (x == xx[n-1])
1354 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1355 static void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1356 int pullid, t_UmbrellaOptions *opt)
1358 int N, i, nbins, r_index, ibin;
1359 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1362 N = thisWindow->N[pullid];
1363 dt = thisWindow->dt;
1364 nbins = thisWindow->nBin;
1366 /* tau = autocorrelation time */
1367 if (opt->tauBootStrap > 0.0)
1369 tausteps = opt->tauBootStrap/dt;
1371 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1373 /* calc tausteps from g=1+2tausteps */
1374 g = thisWindow->g[pullid];
1380 "When generating hypothetical trajectories from given umbrella histograms,\n"
1381 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1382 "cannot be predicted. You have 3 options:\n"
1383 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1384 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1386 " with option -iiact for all umbrella windows.\n"
1387 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1388 " Use option (3) only if you are sure what you're doing, you may severely\n"
1389 " underestimate the error if a too small ACT is given.\n");
1390 gmx_fatal(FARGS, "%s", errstr);
1393 synthWindow->N [0] = N;
1394 synthWindow->pos [0] = thisWindow->pos[pullid];
1395 synthWindow->z [0] = thisWindow->z[pullid];
1396 synthWindow->k [0] = thisWindow->k[pullid];
1397 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1398 synthWindow->g [0] = thisWindow->g [pullid];
1399 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1401 for (i = 0; i < nbins; i++)
1403 synthWindow->Histo[0][i] = 0.;
1406 if (opt->bsMethod == bsMethod_trajGauss)
1408 sig = thisWindow->sigma [pullid];
1409 mu = thisWindow->aver [pullid];
1412 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1414 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1416 z = a*x + sqrt(1-a^2)*y
1417 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1418 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1420 C(t) = exp(-t/tau) with tau=-1/ln(a)
1422 Then, use error function to turn the Gaussian random variable into a uniformly
1423 distributed one in [0,1]. Eventually, use cumulative distribution function of
1424 histogram to get random variables distributed according to histogram.
1425 Note: The ACT of the flat distribution and of the generated histogram is not
1426 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1428 a = std::exp(-1.0/tausteps);
1429 ap = std::sqrt(1.0-a*a);
1430 invsqrt2 = 1.0/std::sqrt(2.0);
1432 /* init random sequence */
1433 x = opt->normalDistribution(opt->rng);
1435 if (opt->bsMethod == bsMethod_traj)
1437 /* bootstrap points from the umbrella histograms */
1438 for (i = 0; i < N; i++)
1440 y = opt->normalDistribution(opt->rng);
1442 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1443 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1445 r = 0.5*(1+std::erf(x*invsqrt2));
1446 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1447 synthWindow->Histo[0][r_index] += 1.;
1450 else if (opt->bsMethod == bsMethod_trajGauss)
1452 /* bootstrap points from a Gaussian with the same average and sigma
1453 as the respective umbrella histogram. The idea was, that -given
1454 limited sampling- the bootstrapped histograms are otherwise biased
1455 from the limited sampling of the US histos. However, bootstrapping from
1456 the Gaussian seems to yield a similar estimate. */
1460 y = opt->normalDistribution(opt->rng);
1463 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1468 while ( (ibin += nbins) < 0)
1473 else if (ibin >= nbins)
1475 while ( (ibin -= nbins) >= nbins)
1482 if (ibin >= 0 && ibin < nbins)
1484 synthWindow->Histo[0][ibin] += 1.;
1491 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1495 /*! \brief Write all histograms to a file
1497 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1498 * sets of bootstrapped histograms.
1500 static void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1501 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1503 std::string fn, title;
1509 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1510 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1514 fn = gmx_strdup(fnhist);
1515 title = gmx::formatString("Umbrella histograms");
1518 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1521 /* Write histograms */
1522 for (l = 0; l < bins; ++l)
1524 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1525 for (i = 0; i < nWindows; ++i)
1527 for (j = 0; j < window[i].nPull; ++j)
1529 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1536 printf("Wrote %s\n", fn.c_str());
1539 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1540 static void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1544 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1548 /* generate ordered random numbers between 0 and nAllPull */
1549 for (i = 0; i < nAllPull-1; i++)
1551 r[i] = dist(opt->rng);
1553 std::sort(r, r+nAllPull-1);
1554 r[nAllPull-1] = 1.0*nAllPull;
1556 synthwin[0].bsWeight[0] = r[0];
1557 for (i = 1; i < nAllPull; i++)
1559 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1562 /* avoid to have zero weight by adding a tiny value */
1563 for (i = 0; i < nAllPull; i++)
1565 if (synthwin[i].bsWeight[0] < 1e-5)
1567 synthwin[i].bsWeight[0] = 1e-5;
1574 //! The main bootstrapping routine
1575 static void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1576 const char *xlabel, char* ylabel, double *profile,
1577 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1579 t_UmbrellaWindow * synthWindow;
1580 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1581 int i, j, *randomArray = nullptr, winid, pullid, ib;
1582 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1584 gmx_bool bExact = FALSE;
1586 /* init random generator */
1587 if (opt->bsSeed == 0)
1589 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1591 opt->rng.seed(opt->bsSeed);
1593 snew(bsProfile, opt->bins);
1594 snew(bsProfiles_av, opt->bins);
1595 snew(bsProfiles_av2, opt->bins);
1597 /* Create array of all pull groups. Note that different windows
1598 may have different nr of pull groups
1599 First: Get total nr of pull groups */
1601 for (i = 0; i < nWindows; i++)
1603 nAllPull += window[i].nPull;
1605 snew(allPull_winId, nAllPull);
1606 snew(allPull_pullId, nAllPull);
1608 /* Setup one array of all pull groups */
1609 for (i = 0; i < nWindows; i++)
1611 for (j = 0; j < window[i].nPull; j++)
1613 allPull_winId[iAllPull] = i;
1614 allPull_pullId[iAllPull] = j;
1619 /* setup stuff for synthetic windows */
1620 snew(synthWindow, nAllPull);
1621 for (i = 0; i < nAllPull; i++)
1623 synthWindow[i].nPull = 1;
1624 synthWindow[i].nBin = opt->bins;
1625 snew(synthWindow[i].Histo, 1);
1626 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1628 snew(synthWindow[i].Histo[0], opt->bins);
1630 snew(synthWindow[i].N, 1);
1631 snew(synthWindow[i].pos, 1);
1632 snew(synthWindow[i].z, 1);
1633 snew(synthWindow[i].k, 1);
1634 snew(synthWindow[i].bContrib, 1);
1635 snew(synthWindow[i].g, 1);
1636 snew(synthWindow[i].bsWeight, 1);
1639 switch (opt->bsMethod)
1642 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1643 please_cite(stdout, "Hub2006");
1645 case bsMethod_BayesianHist:
1646 /* just copy all histogams into synthWindow array */
1647 for (i = 0; i < nAllPull; i++)
1649 winid = allPull_winId [i];
1650 pullid = allPull_pullId[i];
1651 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1655 case bsMethod_trajGauss:
1656 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1659 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1662 /* do bootstrapping */
1663 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1664 for (ib = 0; ib < opt->nBootStrap; ib++)
1666 printf(" *******************************************\n"
1667 " ******** Start bootstrap nr %d ************\n"
1668 " *******************************************\n", ib+1);
1670 switch (opt->bsMethod)
1673 /* bootstrap complete histograms from given histograms */
1674 srenew(randomArray, nAllPull);
1675 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1676 for (i = 0; i < nAllPull; i++)
1678 winid = allPull_winId [randomArray[i]];
1679 pullid = allPull_pullId[randomArray[i]];
1680 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1683 case bsMethod_BayesianHist:
1684 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1685 setRandomBsWeights(synthWindow, nAllPull, opt);
1688 case bsMethod_trajGauss:
1689 /* create new histos from given histos, that is generate new hypothetical
1691 for (i = 0; i < nAllPull; i++)
1693 winid = allPull_winId[i];
1694 pullid = allPull_pullId[i];
1695 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1700 /* write histos in case of verbose output */
1701 if (opt->bs_verbose)
1703 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1710 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1713 if ( (i%opt->stepUpdateContrib) == 0)
1715 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1717 if (maxchange < opt->Tolerance)
1721 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1723 printf("\t%4d) Maximum change %e\n", i, maxchange);
1725 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1728 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1729 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1733 prof_normalization_and_unit(bsProfile, opt);
1736 /* symmetrize profile around z=0 */
1739 symmetrizeProfile(bsProfile, opt);
1742 /* save stuff to get average and stddev */
1743 for (i = 0; i < opt->bins; i++)
1746 bsProfiles_av[i] += tmp;
1747 bsProfiles_av2[i] += tmp*tmp;
1748 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1750 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1754 /* write average and stddev */
1755 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1756 if (output_env_get_print_xvgr_codes(opt->oenv))
1758 fprintf(fp, "@TYPE xydy\n");
1760 for (i = 0; i < opt->bins; i++)
1762 bsProfiles_av [i] /= opt->nBootStrap;
1763 bsProfiles_av2[i] /= opt->nBootStrap;
1764 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1765 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1766 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1769 printf("Wrote boot strap result to %s\n", fnres);
1772 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1773 static int whaminFileType(char *fn)
1776 len = std::strlen(fn);
1777 if (std::strcmp(fn+len-3, "tpr") == 0)
1781 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1783 return whamin_pullxf;
1785 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1791 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1795 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1796 static void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1797 t_UmbrellaOptions *opt)
1799 char **filename = nullptr, tmp[WHAM_MAXFILELEN+2];
1800 int nread, sizenow, i, block = 1;
1803 fp = gmx_ffopen(fn, "r");
1806 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1808 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1810 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1812 if (nread >= sizenow)
1815 srenew(filename, sizenow);
1816 for (i = sizenow-block; i < sizenow; i++)
1818 snew(filename[i], WHAM_MAXFILELEN);
1821 /* remove newline if there is one */
1822 if (tmp[std::strlen(tmp)-1] == '\n')
1824 tmp[std::strlen(tmp)-1] = '\0';
1826 std::strcpy(filename[nread], tmp);
1829 printf("Found file %s in %s\n", filename[nread], fn);
1833 *filenamesRet = filename;
1837 //! Open a file or a pipe to a gzipped file
1838 static FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1840 char Buffer[2048], gunzip[1024], *Path = nullptr;
1841 FILE *pipe = nullptr;
1842 static gmx_bool bFirst = true;
1844 /* gzipped pdo file? */
1845 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1847 /* search gunzip executable */
1848 if (!(Path = getenv("GMX_PATH_GZIP")))
1850 if (gmx_fexist("/bin/gunzip"))
1852 sprintf(gunzip, "%s", "/bin/gunzip");
1854 else if (gmx_fexist("/usr/bin/gunzip"))
1856 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1860 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1861 "You may want to define the path to gunzip "
1862 "with the environment variable GMX_PATH_GZIP.");
1867 sprintf(gunzip, "%s/gunzip", Path);
1868 if (!gmx_fexist(gunzip))
1870 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1871 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1876 printf("Using gunzip executable %s\n", gunzip);
1879 if (!gmx_fexist(fn))
1881 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1883 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1886 printf("Executing command '%s'\n", Buffer);
1889 if ((pipe = popen(Buffer, "r")) == nullptr)
1891 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1894 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1900 pipe = gmx_ffopen(fn, "r");
1907 //! Close file or pipe
1908 static void pdo_close_file(FILE *fp)
1917 //! Reading all pdo files
1918 static void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1919 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1922 real mintmp, maxtmp, done = 0.;
1925 /* char Buffer0[1000]; */
1929 gmx_fatal(FARGS, "No files found. Hick.");
1932 /* if min and max are not given, get min and max from the input files */
1935 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1938 for (i = 0; i < nfiles; ++i)
1940 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1941 /*fgets(Buffer0,999,file);
1942 fprintf(stderr,"First line '%s'\n",Buffer0); */
1943 done = 100.0*(i+1)/nfiles;
1944 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1949 read_pdo_header(file, header, opt);
1950 /* here only determine min and max of this window */
1951 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1952 if (maxtmp > opt->max)
1956 if (mintmp < opt->min)
1962 pdo_close_file(file);
1970 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1971 if (opt->bBoundsOnly)
1973 printf("Found option -boundsonly, now exiting.\n");
1977 /* store stepsize in profile */
1978 opt->dz = (opt->max-opt->min)/opt->bins;
1980 /* Having min and max, we read in all files */
1981 /* Loop over all files */
1982 for (i = 0; i < nfiles; ++i)
1984 done = 100.0*(i+1)/nfiles;
1985 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1990 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1991 read_pdo_header(file, header, opt);
1992 /* load data into window */
1993 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
1994 if ((window+i)->Ntot[0] == 0)
1996 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2000 pdo_close_file(file);
2008 for (i = 0; i < nfiles; ++i)
2015 //! translate 0/1 to N/Y to write pull dimensions
2016 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2018 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2019 static void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2021 t_inputrec irInstance;
2022 t_inputrec *ir = &irInstance;
2024 static int first = 1;
2026 /* printf("Reading %s \n",fn); */
2027 read_tpx_state(fn, ir, &state, nullptr);
2031 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2033 if (ir->pull->ncoord == 0)
2035 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2038 /* Read overall pull info */
2039 header->npullcrds = ir->pull->ncoord;
2040 header->bPrintCOM = ir->pull->bPrintCOM;
2041 header->bPrintRefValue = ir->pull->bPrintRefValue;
2042 header->bPrintComp = ir->pull->bPrintComp;
2044 /* Read pull coordinates */
2045 snew(header->pcrd, header->npullcrds);
2046 for (int i = 0; i < ir->pull->ncoord; i++)
2048 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2049 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2050 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2051 /* Angle type coordinates are handled fully in degrees in gmx wham.
2052 * The pull "distance" appears with a power of -2 in the force constant,
2053 * so we need to multiply with the internal units (radians for angle)
2054 * to user units (degrees for an angle) with the same power.
2056 header->pcrd[i].k = ir->pull->coord[i].k/gmx::square(pull_conversion_factor_internal2userinput(&ir->pull->coord[i]));
2057 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2059 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2060 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2062 std::strcpy(header->pcrd[i].coord_unit,
2063 pull_coordinate_units(&ir->pull->coord[i]));
2065 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2067 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2068 " This is not supported.\n");
2070 if (coordsel && (coordsel->n != ir->pull->ncoord))
2072 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2073 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2077 /* Check pull coords for consistency */
2079 ivec thedim = { 0, 0, 0 };
2080 bool geometryIsSet = false;
2081 for (int i = 0; i < ir->pull->ncoord; i++)
2083 if (coordsel == nullptr || coordsel->bUse[i])
2085 if (header->pcrd[i].pull_type != epullUMBRELLA)
2087 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2088 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2089 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2093 geom = header->pcrd[i].geometry;
2094 copy_ivec(header->pcrd[i].dim, thedim);
2095 geometryIsSet = true;
2097 if (geom != header->pcrd[i].geometry)
2099 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2100 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2101 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2103 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2105 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2106 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2107 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2108 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2110 if (header->pcrd[i].geometry == epullgCYL)
2112 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2114 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2115 "However, found dimensions [%s %s %s]\n",
2116 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2117 int2YN(header->pcrd[i].dim[ZZ]));
2120 if (header->pcrd[i].k <= 0.0)
2122 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g.\n"
2123 "That doesn't seem to be an Umbrella tpr.\n",
2124 fn, i+1, header->pcrd[i].k);
2129 if (opt->verbose || first)
2131 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2133 for (int i = 0; i < ir->pull->ncoord; i++)
2135 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2136 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2139 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2141 for (int i = 0; i < ir->pull->ncoord; i++)
2143 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2145 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2146 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2147 header->pcrd[i].ndim, use ? "Yes" : "No");
2148 printf("\tPull group coordinates of %d groups expected in pullx files.\n", ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2150 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2151 header->bPrintRefValue ? "" : " not");
2153 if (!opt->verbose && first)
2155 printf("\tUse option -v to see this output for all input tpr files\n\n");
2161 //! Read pullx.xvg or pullf.xvg
2162 static void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2163 t_UmbrellaWindow * window,
2164 t_UmbrellaOptions *opt,
2165 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2166 t_coordselection *coordsel)
2168 double **y = nullptr, pos = 0., t, force, time0 = 0., dt;
2169 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2170 int nColExpect, ntot, column;
2171 real min, max, minfound = 1e20, maxfound = -1e20;
2172 gmx_bool dt_ok, timeok;
2173 const char *quantity;
2174 const int blocklen = 4096;
2175 int *lennow = nullptr;
2176 static gmx_bool bFirst = TRUE;
2179 * Data columns in pull output:
2180 * - in force output pullf.xvg:
2181 * No reference columns, one column per pull coordinate
2183 * - in position output pullx.xvg:
2184 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2185 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2186 * be written separately into pullx file, but this is not supported and throws an error below;
2187 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2190 if (header->bPrintComp && opt->bPullx)
2192 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2193 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2196 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2197 snew(nColThisCrd, header->npullcrds);
2198 snew(nColCOMCrd, header->npullcrds);
2199 snew(nColRefCrd, header->npullcrds);
2203 /* pullf reading: simply one column per coordinate */
2204 for (g = 0; g < header->npullcrds; g++)
2213 /* pullx reading. Note explanation above. */
2214 for (g = 0; g < header->npullcrds; g++)
2216 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2217 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim*header->pcrd[g].ngroup : 0);
2218 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2222 nColExpect = 1; /* time column */
2223 for (g = 0; g < header->npullcrds; g++)
2225 nColExpect += nColThisCrd[g];
2228 /* read pullf or pullx. Could be optimized if min and max are given. */
2229 nt = read_xvg(fn, &y, &ny);
2231 /* Check consistency */
2232 quantity = opt->bPullx ? "position" : "force";
2235 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2237 if (bFirst || opt->verbose)
2239 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2240 for (i = 0; i < header->npullcrds; i++)
2242 printf("\tColumns for pull coordinate %d\n", i+1);
2243 printf("\t\treaction coordinate: %d\n"
2244 "\t\tcenter-of-mass of groups: %d\n"
2245 "\t\treference position column: %s\n",
2246 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2248 printf("\tFound %d times in %s\n", nt, fn);
2251 if (nColExpect != ny)
2253 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2254 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2265 window->dt = y[0][1]-y[0][0];
2267 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2269 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2272 /* Need to alocate memory and set up structure for windows */
2275 /* Use only groups selected with option -is file */
2276 if (header->npullcrds != coordsel->n)
2278 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2279 header->npullcrds, coordsel->n);
2281 window->nPull = coordsel->nUse;
2285 window->nPull = header->npullcrds;
2288 window->nBin = bins;
2289 snew(window->Histo, window->nPull);
2290 snew(window->z, window->nPull);
2291 snew(window->k, window->nPull);
2292 snew(window->pos, window->nPull);
2293 snew(window->N, window->nPull);
2294 snew(window->Ntot, window->nPull);
2295 snew(window->g, window->nPull);
2296 snew(window->bsWeight, window->nPull);
2297 window->bContrib = nullptr;
2299 if (opt->bCalcTauInt)
2301 snew(window->ztime, window->nPull);
2305 window->ztime = nullptr;
2307 snew(lennow, window->nPull);
2309 for (g = 0; g < window->nPull; ++g)
2312 window->bsWeight [g] = 1.;
2314 window->Ntot [g] = 0;
2316 snew(window->Histo[g], bins);
2318 if (opt->bCalcTauInt)
2320 window->ztime[g] = nullptr;
2324 /* Copying umbrella center and force const is more involved since not
2325 all pull groups from header (tpr file) may be used in window variable */
2326 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2328 if (coordsel && !coordsel->bUse[g])
2332 window->k [gUsed] = header->pcrd[g].k;
2333 window->pos[gUsed] = header->pcrd[g].init_dist;
2338 { /* only determine min and max */
2341 min = max = bins = 0; /* Get rid of warnings */
2345 for (i = 0; i < nt; i++)
2347 /* Do you want that time frame? */
2348 t = 1.0/1000*( gmx::roundToInt64((y[0][i]*1000))); /* round time to fs */
2350 /* get time step of pdo file and get dstep from opt->dt */
2360 dstep = gmx::roundToInt(opt->dt/dt);
2368 window->dt = dt*dstep;
2372 dt_ok = (i%dstep == 0);
2373 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2375 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2376 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2379 /* Note: if coordsel == NULL:
2380 * all groups in pullf/x file are stored in this window, and gUsed == g
2381 * if coordsel != NULL:
2382 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2385 for (g = 0; g < header->npullcrds; ++g)
2387 /* was this group selected for application in WHAM? */
2388 if (coordsel && !coordsel->bUse[g])
2396 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2398 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2402 /* Pick the correct column index.
2403 Note that there is always exactly one displacement column.
2406 for (int j = 0; j < g; j++)
2408 column += nColThisCrd[j];
2413 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2427 if (gUsed >= window->nPull)
2429 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2430 gUsed, window->nPull);
2433 if (opt->bCalcTauInt && !bGetMinMax)
2435 /* save time series for autocorrelation analysis */
2436 ntot = window->Ntot[gUsed];
2437 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2438 if (ntot >= lennow[gUsed])
2440 lennow[gUsed] += blocklen;
2441 srenew(window->ztime[gUsed], lennow[gUsed]);
2443 window->ztime[gUsed][ntot] = pos;
2446 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2451 while ( (ibin += bins) < 0)
2456 else if (ibin >= bins)
2458 while ( (ibin -= bins) >= bins)
2464 if (ibin >= 0 && ibin < bins)
2466 window->Histo[gUsed][ibin] += 1.;
2469 window->Ntot[gUsed]++;
2473 else if (t > opt->tmax)
2477 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2489 for (i = 0; i < ny; i++)
2495 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2496 static void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2497 t_UmbrellaHeader* header,
2498 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2501 real mintmp, maxtmp;
2503 printf("Reading %d tpr and pullf files\n", nfiles);
2505 /* min and max not given? */
2508 printf("Automatic determination of boundaries...\n");
2511 for (i = 0; i < nfiles; i++)
2513 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2515 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2517 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2518 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2520 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2522 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2523 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2524 if (maxtmp > opt->max)
2528 if (mintmp < opt->min)
2533 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2534 if (opt->bBoundsOnly)
2536 printf("Found option -boundsonly, now exiting.\n");
2540 /* store stepsize in profile */
2541 opt->dz = (opt->max-opt->min)/opt->bins;
2543 bool foundData = false;
2544 for (i = 0; i < nfiles; i++)
2546 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2548 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2550 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2551 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2553 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2555 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, nullptr, nullptr,
2556 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2557 if (window[i].Ntot[0] == 0)
2559 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2568 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2571 for (i = 0; i < nfiles; i++)
2580 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2582 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2583 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2585 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2587 int nlines, ny, i, ig;
2590 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2591 nlines = read_xvg(fn, &iact, &ny);
2592 if (nlines != nwins)
2594 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2597 for (i = 0; i < nlines; i++)
2599 if (window[i].nPull != ny)
2601 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2602 "number of pull groups is different in different simulations. That is not\n"
2603 "supported yet. Sorry.\n");
2605 for (ig = 0; ig < window[i].nPull; ig++)
2607 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2608 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2610 if (iact[ig][i] <= 0.0)
2612 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2619 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2622 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2623 * that -in case of limited sampling- the ACT may be severely underestimated.
2624 * Note: the g=1+2tau are overwritten.
2625 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2628 static void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2631 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2633 /* only evaluate within +- 3sigma of the Gausian */
2634 siglim = 3.0*opt->sigSmoothIact;
2635 siglim2 = gmx::square(siglim);
2636 /* pre-factor of Gaussian */
2637 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2638 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2640 for (i = 0; i < nwins; i++)
2642 snew(window[i].tausmooth, window[i].nPull);
2643 for (ig = 0; ig < window[i].nPull; ig++)
2647 pos = window[i].pos[ig];
2648 for (j = 0; j < nwins; j++)
2650 for (jg = 0; jg < window[j].nPull; jg++)
2652 dpos2 = gmx::square(window[j].pos[jg]-pos);
2653 if (dpos2 < siglim2)
2655 w = gaufact*std::exp(-dpos2*invtwosig2);
2657 tausm += w*window[j].tau[jg];
2658 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2659 w,dpos2,pos,gaufact,invtwosig2); */
2664 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2666 window[i].tausmooth[ig] = tausm;
2670 window[i].tausmooth[ig] = window[i].tau[ig];
2672 window[i].g[ig] = 1+2*tausm/window[i].dt;
2677 //! Stop integrating autoccorelation function when ACF drops under this value
2678 #define WHAM_AC_ZERO_LIMIT 0.05
2680 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2682 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2683 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2685 int i, ig, ncorr, ntot, j, k, *count, restart;
2686 real *corr, c0, dt, tmp;
2687 real *ztime, av, tausteps;
2688 FILE *fp, *fpcorr = nullptr;
2692 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2693 "time [ps]", "autocorrelation function", opt->oenv);
2697 for (i = 0; i < nwins; i++)
2699 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2701 ntot = window[i].Ntot[0];
2703 /* using half the maximum time as length of autocorrelation function */
2707 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2708 " points. Provide more pull data!", ntot);
2711 /* snew(corrSq,ncorr); */
2714 snew(window[i].tau, window[i].nPull);
2715 restart = gmx::roundToInt(opt->acTrestart/dt);
2721 for (ig = 0; ig < window[i].nPull; ig++)
2723 if (ntot != window[i].Ntot[ig])
2725 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2726 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2728 ztime = window[i].ztime[ig];
2730 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2731 for (j = 0, av = 0; (j < ntot); j++)
2736 for (k = 0; (k < ncorr); k++)
2741 for (j = 0; (j < ntot); j += restart)
2743 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2745 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2747 /* corrSq[k] += tmp*tmp; */
2751 /* divide by nr of frames for each time displacement */
2752 for (k = 0; (k < ncorr); k++)
2754 /* count probably = (ncorr-k+(restart-1))/restart; */
2755 corr[k] = corr[k]/count[k];
2756 /* variance of autocorrelation function */
2757 /* corrSq[k]=corrSq[k]/count[k]; */
2759 /* normalize such that corr[0] == 0 */
2761 for (k = 0; (k < ncorr); k++)
2764 /* corrSq[k]*=c0*c0; */
2767 /* write ACFs in verbose mode */
2770 for (k = 0; (k < ncorr); k++)
2772 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2774 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2777 /* esimate integrated correlation time, fitting is too unstable */
2778 tausteps = 0.5*corr[0];
2779 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2780 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2782 tausteps += corr[j];
2785 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2786 Kumar et al, eq. 28 ff. */
2787 window[i].tau[ig] = tausteps*dt;
2788 window[i].g[ig] = 1+2*tausteps;
2789 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2800 /* plot IACT along reaction coordinate */
2801 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2802 if (output_env_get_print_xvgr_codes(opt->oenv))
2804 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2805 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2806 for (i = 0; i < nwins; i++)
2808 fprintf(fp, "# %3d ", i);
2809 for (ig = 0; ig < window[i].nPull; ig++)
2811 fprintf(fp, " %11g", window[i].tau[ig]);
2816 for (i = 0; i < nwins; i++)
2818 for (ig = 0; ig < window[i].nPull; ig++)
2820 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2823 if (opt->sigSmoothIact > 0.0)
2825 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2826 opt->sigSmoothIact);
2827 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2828 smoothIact(window, nwins, opt);
2829 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2830 if (output_env_get_print_xvgr_codes(opt->oenv))
2832 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2833 fprintf(fp, "@ s1 symbol color 2\n");
2835 for (i = 0; i < nwins; i++)
2837 for (ig = 0; ig < window[i].nPull; ig++)
2839 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2844 printf("Wrote %s\n", fn);
2848 * compute average and sigma of each umbrella histogram
2850 static void averageSigma(t_UmbrellaWindow *window, int nwins)
2853 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2855 for (i = 0; i < nwins; i++)
2857 snew(window[i].aver, window[i].nPull);
2858 snew(window[i].sigma, window[i].nPull);
2860 ntot = window[i].Ntot[0];
2861 for (ig = 0; ig < window[i].nPull; ig++)
2863 ztime = window[i].ztime[ig];
2864 for (k = 0, av = 0.; k < ntot; k++)
2869 for (k = 0, sum2 = 0.; k < ntot; k++)
2874 sig = std::sqrt(sum2/ntot);
2875 window[i].aver[ig] = av;
2877 /* Note: This estimate for sigma is biased from the limited sampling.
2878 Correct sigma by n/(n-1) where n = number of independent
2879 samples. Only possible if IACT is known.
2883 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2884 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2888 window[i].sigma[ig] = sig;
2890 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2897 * Use histograms to compute average force on pull group.
2899 static void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2901 int i, j, bins = opt->bins, k;
2902 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2905 dz = (max-min)/bins;
2906 ztot = opt->max-min;
2909 /* Compute average displacement from histograms */
2910 for (j = 0; j < nWindows; ++j)
2912 snew(window[j].forceAv, window[j].nPull);
2913 for (k = 0; k < window[j].nPull; ++k)
2917 for (i = 0; i < opt->bins; ++i)
2919 temp = (1.0*i+0.5)*dz+min;
2920 distance = temp - window[j].pos[k];
2922 { /* in cyclic wham: */
2923 if (distance > ztot_half) /* |distance| < ztot_half */
2927 else if (distance < -ztot_half)
2932 w = window[j].Histo[k][i]/window[j].g[k];
2933 displAv += w*distance;
2935 /* Are we near min or max? We are getting wrong forces from the histgrams since
2936 the histograms are zero outside [min,max). Therefore, assume that the position
2937 on the other side of the histomgram center is equally likely. */
2940 posmirrored = window[j].pos[k]-distance;
2941 if (posmirrored >= max || posmirrored < min)
2943 displAv += -w*distance;
2950 /* average force from average displacement */
2951 window[j].forceAv[k] = displAv*window[j].k[k];
2952 /* sigma from average square displacement */
2953 /* window[j].sigma [k] = sqrt(displAv2); */
2954 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2960 * Check if the complete reaction coordinate is covered by the histograms
2962 static void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2963 t_UmbrellaOptions *opt)
2965 int i, ig, j, bins = opt->bins;
2967 real avcount = 0, z, relcount, *count;
2968 snew(count, opt->bins);
2970 for (j = 0; j < opt->bins; ++j)
2972 for (i = 0; i < nwins; i++)
2974 for (ig = 0; ig < window[i].nPull; ig++)
2976 count[j] += window[i].Histo[ig][j];
2979 avcount += 1.0*count[j];
2982 for (j = 0; j < bins; ++j)
2984 relcount = count[j]/avcount;
2985 z = (j+0.5)*opt->dz+opt->min;
2986 bBoundary = j<bins/20 || (bins-j)>bins/20;
2987 /* check for bins with no data */
2990 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2991 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2993 /* and check for poor sampling */
2994 else if (relcount < 0.005 && !bBoundary)
2996 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3002 /*! \brief Compute initial potential by integrating the average force
3004 * This speeds up the convergence by roughly a factor of 2
3006 static void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3008 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3009 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3012 dz = (opt->max-min)/bins;
3014 printf("Getting initial potential by integration.\n");
3016 /* Compute average displacement from histograms */
3017 computeAverageForce(window, nWindows, opt);
3019 /* Get force for each bin from all histograms in this bin, or, alternatively,
3020 if no histograms are inside this bin, from the closest histogram */
3023 for (j = 0; j < opt->bins; ++j)
3025 pos = (1.0*j+0.5)*dz+min;
3029 groupmin = winmin = 0;
3030 for (i = 0; i < nWindows; i++)
3032 for (ig = 0; ig < window[i].nPull; ig++)
3034 hispos = window[i].pos[ig];
3035 dist = std::abs(hispos-pos);
3036 /* average force within bin */
3040 fAv += window[i].forceAv[ig];
3042 /* at the same time, remember closest histogram */
3051 /* if no histogram found in this bin, use closest histogram */
3058 fAv = window[winmin].forceAv[groupmin];
3062 for (j = 1; j < opt->bins; ++j)
3064 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3067 /* cyclic wham: linearly correct possible offset */
3070 diff = (pot[bins-1]-pot[0])/(bins-1);
3071 for (j = 1; j < opt->bins; ++j)
3078 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3079 for (j = 0; j < opt->bins; ++j)
3081 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3084 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3087 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3088 energy offsets which are usually determined by wham
3089 First: turn pot into probabilities:
3091 for (j = 0; j < opt->bins; ++j)
3093 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3095 calc_z(pot, window, nWindows, opt, TRUE);
3101 //! Count number of words in an line
3102 static int wordcount(char *ptr)
3107 if (std::strlen(ptr) == 0)
3111 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3113 for (i = 0; (ptr[i] != '\0'); i++)
3115 is[cur] = isspace(ptr[i]);
3116 if ((i > 0) && (is[cur] && !is[1-cur]))
3125 /*! \brief Read input file for pull group selection (option -is)
3127 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3129 static void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3132 int i, iline, n, len = STRLEN, temp;
3133 char *ptr = nullptr, *tmpbuf = nullptr;
3134 char fmt[1024], fmtign[1024];
3135 int block = 1, sizenow;
3137 fp = gmx_ffopen(opt->fnCoordSel, "r");
3138 opt->coordsel = nullptr;
3143 while ( (ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3148 if (iline >= sizenow)
3151 srenew(opt->coordsel, sizenow);
3153 opt->coordsel[iline].n = n;
3154 opt->coordsel[iline].nUse = 0;
3155 snew(opt->coordsel[iline].bUse, n);
3158 for (i = 0; i < n; i++)
3160 std::strcpy(fmt, fmtign);
3161 std::strcat(fmt, "%d");
3162 if (sscanf(ptr, fmt, &temp))
3164 opt->coordsel[iline].bUse[i] = (temp > 0);
3165 if (opt->coordsel[iline].bUse[i])
3167 opt->coordsel[iline].nUse++;
3170 std::strcat(fmtign, "%*s");
3174 opt->nCoordsel = iline;
3175 if (nTpr != opt->nCoordsel)
3177 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3181 printf("\nUse only these pull coordinates:\n");
3182 for (iline = 0; iline < nTpr; iline++)
3184 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3185 for (i = 0; i < opt->coordsel[iline].n; i++)
3187 if (opt->coordsel[iline].bUse[i])
3200 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3202 //! Number of elements in fnm (used for command line parsing)
3203 #define NFILE asize(fnm)
3205 //! The main gmx wham routine
3206 int gmx_wham(int argc, char *argv[])
3208 const char *desc[] = {
3209 "[THISMODULE] is an analysis program that implements the Weighted",
3210 "Histogram Analysis Method (WHAM). It is intended to analyze",
3211 "output files generated by umbrella sampling simulations to ",
3212 "compute a potential of mean force (PMF).[PAR]",
3214 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3215 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3216 "and, if multiple coordinates need to be analyzed, all used the same",
3217 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3218 "At present, three input modes are supported.",
3220 "* With option [TT]-it[tt], the user provides a file which contains the",
3221 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3222 " AND, with option [TT]-ix[tt], a file which contains file names of",
3223 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3224 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3225 " first pullx, etc.",
3226 "* Same as the previous input mode, except that the the user",
3227 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3228 " From the pull force the position in the umbrella potential is",
3229 " computed. This does not work with tabulated umbrella potentials.",
3230 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3231 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3232 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3233 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3234 " must be similar to the following::",
3237 " # Component selection: 0 0 1",
3239 " # Ref. Group 'TestAtom'",
3240 " # Nr. of pull groups 2",
3241 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3242 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3245 " The number of pull groups, umbrella positions, force constants, and names ",
3246 " may (of course) differ. Following the header, a time column and ",
3247 " a data column for each pull group follows (i.e. the displacement",
3248 " with respect to the umbrella center). Up to four pull groups are possible ",
3249 " per [REF].pdo[ref] file at present.[PAR]",
3250 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3251 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3252 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3253 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3254 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3255 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3256 "used, coordsel.dat looks like this::",
3262 "By default, the output files are::",
3264 " [TT]-o[tt] PMF output file",
3265 " [TT]-hist[tt] Histograms output file",
3267 "Always check whether the histograms sufficiently overlap.[PAR]",
3268 "The umbrella potential is assumed to be harmonic and the force constants are ",
3269 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3270 "a tabulated potential can be provided with [TT]-tab[tt].",
3275 "* [TT]-bins[tt] Number of bins used in analysis",
3276 "* [TT]-temp[tt] Temperature in the simulations",
3277 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3278 "* [TT]-auto[tt] Automatic determination of boundaries",
3279 "* [TT]-min,-max[tt] Boundaries of the profile",
3281 "The data points that are used to compute the profile",
3282 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3283 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3284 "umbrella window.[PAR]",
3285 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3286 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3287 "With energy output, the energy in the first bin is defined to be zero. ",
3288 "If you want the free energy at a different ",
3289 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3290 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3291 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3292 "[THISMODULE] will make use of the",
3293 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3294 "reaction coordinate will assumed be be neighbors.[PAR]",
3295 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3296 "which may be useful for, e.g. membranes.",
3301 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3302 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3307 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3308 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3309 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3310 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3311 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3312 "Because the IACTs can be severely underestimated in case of limited ",
3313 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3314 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3315 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3316 "integration of the ACFs while the ACFs are larger 0.05.",
3317 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3318 "less robust) method such as fitting to a double exponential, you can ",
3319 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3320 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3321 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3326 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3327 "otherwise the statistical error may be substantially underestimated. ",
3328 "More background and examples for the bootstrap technique can be found in ",
3329 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3330 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3331 "Four bootstrapping methods are supported and ",
3332 "selected with [TT]-bs-method[tt].",
3334 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3335 " data points, and the bootstrap is carried out by assigning random weights to the ",
3336 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3337 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3338 " statistical error is underestimated.",
3339 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3340 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3341 " (allowing duplication, i.e. sampling with replacement).",
3342 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3343 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3344 " divided into blocks and only histograms within each block are mixed. Note that ",
3345 " the histograms within each block must be representative for all possible histograms, ",
3346 " otherwise the statistical error is underestimated.",
3347 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3348 " such that the generated data points are distributed according the given histograms ",
3349 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3350 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3351 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3352 " Note that this method may severely underestimate the error in case of limited sampling, ",
3353 " that is if individual histograms do not represent the complete phase space at ",
3354 " the respective positions.",
3355 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3356 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3357 " and width of the umbrella histograms. That method yields similar error estimates ",
3358 " like method [TT]traj[tt].",
3360 "Bootstrapping output:",
3362 "* [TT]-bsres[tt] Average profile and standard deviations",
3363 "* [TT]-bsprof[tt] All bootstrapping profiles",
3365 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3366 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3370 const char *en_unit[] = {nullptr, "kJ", "kCal", "kT", nullptr};
3371 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr};
3372 const char *en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3373 static t_UmbrellaOptions opt;
3376 { "-min", FALSE, etREAL, {&opt.min},
3377 "Minimum coordinate in profile"},
3378 { "-max", FALSE, etREAL, {&opt.max},
3379 "Maximum coordinate in profile"},
3380 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3381 "Determine min and max automatically"},
3382 { "-bins", FALSE, etINT, {&opt.bins},
3383 "Number of bins in profile"},
3384 { "-temp", FALSE, etREAL, {&opt.Temperature},
3386 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3388 { "-v", FALSE, etBOOL, {&opt.verbose},
3390 { "-b", FALSE, etREAL, {&opt.tmin},
3391 "First time to analyse (ps)"},
3392 { "-e", FALSE, etREAL, {&opt.tmax},
3393 "Last time to analyse (ps)"},
3394 { "-dt", FALSE, etREAL, {&opt.dt},
3395 "Analyse only every dt ps"},
3396 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3397 "Write histograms and exit"},
3398 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3399 "Determine min and max and exit (with [TT]-auto[tt])"},
3400 { "-log", FALSE, etBOOL, {&opt.bLog},
3401 "Calculate the log of the profile before printing"},
3402 { "-unit", FALSE, etENUM, {en_unit},
3403 "Energy unit in case of log output" },
3404 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3405 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3406 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3407 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3408 { "-sym", FALSE, etBOOL, {&opt.bSym},
3409 "Symmetrize profile around z=0"},
3410 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3411 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3412 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3413 "Calculate integrated autocorrelation times and use in wham"},
3414 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3415 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3416 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3417 "When computing autocorrelation functions, restart computing every .. (ps)"},
3418 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3419 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3420 "during smoothing"},
3421 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3422 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3423 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3424 "Bootstrap method" },
3425 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3426 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3427 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3428 "Seed for bootstrapping. (-1 = use time)"},
3429 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3430 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3431 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3432 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3433 { "-stepout", FALSE, etINT, {&opt.stepchange},
3434 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3435 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3436 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3440 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3441 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3442 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3443 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3444 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3445 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3446 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3447 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3448 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3449 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3450 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3451 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3454 int i, j, l, nfiles, nwins, nfiles2;
3455 t_UmbrellaHeader header;
3456 t_UmbrellaWindow * window = nullptr;
3457 double *profile, maxchange = 1e20;
3458 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3459 char **fninTpr, **fninPull, **fninPdo;
3461 FILE *histout, *profout;
3462 char xlabel[STRLEN], ylabel[256], title[256];
3465 opt.verbose = FALSE;
3466 opt.bHistOnly = FALSE;
3475 opt.coordsel = nullptr;
3477 /* bootstrapping stuff */
3479 opt.bsMethod = bsMethod_hist;
3480 opt.tauBootStrap = 0.0;
3482 opt.histBootStrapBlockLength = 8;
3483 opt.bs_verbose = FALSE;
3488 opt.Temperature = 298;
3489 opt.Tolerance = 1e-6;
3490 opt.bBoundsOnly = FALSE;
3492 opt.bCalcTauInt = FALSE;
3493 opt.sigSmoothIact = 0.0;
3494 opt.bAllowReduceIact = TRUE;
3495 opt.bInitPotByIntegration = TRUE;
3496 opt.acTrestart = 1.0;
3497 opt.stepchange = 100;
3498 opt.stepUpdateContrib = 100;
3500 if (!parse_common_args(&argc, argv, 0,
3501 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3506 opt.unit = nenum(en_unit);
3507 opt.bsMethod = nenum(en_bsMethod);
3509 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3511 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3512 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3513 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3514 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3515 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3516 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3517 if (opt.bTab && opt.bPullf)
3519 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3520 "Provide pullx.xvg or pdo files!");
3523 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3525 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3527 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3529 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3530 "\n\n Check gmx wham -h !");
3533 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3534 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3535 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3536 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3537 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3539 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3540 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3541 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3542 if ( (bMinSet || bMaxSet) && bAutoSet)
3544 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3547 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3549 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3552 if (bMinSet && opt.bAuto)
3554 printf("Note: min and max given, switching off -auto.\n");
3558 if (opt.bTauIntGiven && opt.bCalcTauInt)
3560 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3561 "the autocorrelation times. Not both.");
3564 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3566 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3567 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3569 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3571 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3572 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3575 /* Reading gmx4/gmx5 pull output and tpr files */
3576 if (opt.bTpr || opt.bPullf || opt.bPullx)
3578 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3580 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3581 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3582 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3583 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3584 if (nfiles != nfiles2)
3586 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3587 opt.fnTpr, nfiles2, fnPull);
3590 /* Read file that selects the pull group to be used */
3591 if (opt.fnCoordSel != nullptr)
3593 readPullCoordSelection(&opt, fninTpr, nfiles);
3596 window = initUmbrellaWindows(nfiles);
3597 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3600 { /* reading pdo files */
3601 if (opt.fnCoordSel != nullptr)
3603 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3604 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3606 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3607 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3608 window = initUmbrellaWindows(nfiles);
3609 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3612 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3613 We can therefore get the units for the xlabel from the first coordinate. */
3614 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3618 /* enforce equal weight for all histograms? */
3621 enforceEqualWeights(window, nwins);
3624 /* write histograms */
3625 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3626 xlabel, "count", opt.oenv);
3627 for (l = 0; l < opt.bins; ++l)
3629 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3630 for (i = 0; i < nwins; ++i)
3632 for (j = 0; j < window[i].nPull; ++j)
3634 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3637 fprintf(histout, "\n");
3640 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3643 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3647 /* Using tabulated umbrella potential */
3650 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3653 /* Integrated autocorrelation times provided ? */
3654 if (opt.bTauIntGiven)
3656 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3659 /* Compute integrated autocorrelation times */
3660 if (opt.bCalcTauInt)
3662 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3665 /* calc average and sigma for each histogram
3666 (maybe required for bootstrapping. If not, this is fast anyhow) */
3667 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3669 averageSigma(window, nwins);
3672 /* Get initial potential by simple integration */
3673 if (opt.bInitPotByIntegration)
3675 guessPotByIntegration(window, nwins, &opt, xlabel);
3678 /* Check if complete reaction coordinate is covered */
3679 checkReactionCoordinateCovered(window, nwins, &opt);
3681 /* Calculate profile */
3682 snew(profile, opt.bins);
3690 if ( (i%opt.stepUpdateContrib) == 0)
3692 setup_acc_wham(profile, window, nwins, &opt);
3694 if (maxchange < opt.Tolerance)
3697 /* if (opt.verbose) */
3698 printf("Switched to exact iteration in iteration %d\n", i);
3700 calc_profile(profile, window, nwins, &opt, bExact);
3701 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3703 printf("\t%4d) Maximum change %e\n", i, maxchange);
3707 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3708 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3710 /* calc error from Kumar's formula */
3711 /* Unclear how the error propagates along reaction coordinate, therefore
3713 /* calc_error_kumar(profile,window, nwins,&opt); */
3715 /* Write profile in energy units? */
3718 prof_normalization_and_unit(profile, &opt);
3719 std::strcpy(ylabel, en_unit_label[opt.unit]);
3720 std::strcpy(title, "Umbrella potential");
3724 std::strcpy(ylabel, "Density of states");
3725 std::strcpy(title, "Density of states");
3728 /* symmetrize profile around z=0? */
3731 symmetrizeProfile(profile, &opt);
3734 /* write profile or density of states */
3735 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3736 for (i = 0; i < opt.bins; ++i)
3738 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3741 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3743 /* Bootstrap Method */
3746 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3747 opt2fn("-hist", NFILE, fnm),
3748 xlabel, ylabel, profile, window, nwins, &opt);
3752 freeUmbrellaWindows(window, nfiles);
3754 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3755 please_cite(stdout, "Hub2010");