2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/tpxio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxana/gmx_ana.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdrunutility/mdmodules.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull-params.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/random/tabulatednormaldistribution.h"
69 #include "gromacs/random/threefry.h"
70 #include "gromacs/random/uniformintdistribution.h"
71 #include "gromacs/random/uniformrealdistribution.h"
72 #include "gromacs/utility/arraysize.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/futil.h"
77 #include "gromacs/utility/gmxomp.h"
78 #include "gromacs/utility/pleasecite.h"
79 #include "gromacs/utility/smalloc.h"
81 //! longest file names allowed in input files
82 #define WHAM_MAXFILELEN 2048
85 * enum for energy units
88 enSel, en_kJ, en_kCal, en_kT, enNr
91 * enum for type of input files (pdos, tpr, or pullf)
94 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
97 /*! \brief enum for bootstrapping method
99 * These bootstrap methods are supported:
100 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
101 * (bsMethod_BayesianHist)
102 * - bootstrap complete histograms (bsMethod_hist)
103 * - bootstrap trajectories from given umbrella histograms. This generates new
104 * "synthetic" histograms (bsMethod_traj)
105 * - bootstrap trajectories from Gaussian with mu/sigma computed from
106 * the respective histogram (bsMethod_trajGauss). This gives very similar
107 * results compared to bsMethod_traj.
109 * ********************************************************************
110 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
111 * JS Hub, BL de Groot, D van der Spoel
112 * g_wham - A free weighted histogram analysis implementation including
113 * robust error and autocorrelation estimates,
114 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
115 * ********************************************************************
118 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
119 bsMethod_traj, bsMethod_trajGauss
122 //! Parameters of one pull coodinate
125 int pull_type; //!< such as constraint, umbrella, ...
126 int geometry; //!< such as distance, direction, cylinder
127 ivec dim; //!< pull dimension with geometry distance
128 int ndim; //!< nr of pull_dim != 0
129 real k; //!< force constants in tpr file
130 real init_dist; //!< reference displacement
131 char coord_unit[256]; //!< unit of the displacement
134 //! Parameters of the umbrella potentials
138 * \name Using umbrella pull code since gromacs 4.x
141 int npullcrds; //!< nr of umbrella pull coordinates for reading
142 t_pullcoord *pcrd; //!< the pull coordinates
143 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
144 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
145 int nCOMGrpsPullx; //!< Number of grps of which the COM is listed in pullx.xvg (COM of grp 1 or grp 2 or both)
149 * \name Using PDO files common until gromacs 3.x
157 char PullName[4][256];
159 double UmbCons[4][3];
163 //! Data in the umbrella histograms
166 int nPull; //!< nr of pull groups in this pdo or pullf/x file
167 double **Histo; //!< nPull histograms
168 double **cum; //!< nPull cumulative distribution functions
169 int nBin; //!< nr of bins. identical to opt->bins
170 double *k; //!< force constants for the nPull coords
171 double *pos; //!< umbrella positions for the nPull coords
172 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
173 int *N; //!< nr of data points in nPull histograms.
174 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
176 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
178 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
179 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
182 double *tau; //!< intetrated autocorrelation time (IACT)
183 double *tausmooth; //!< smoothed IACT
185 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
187 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
189 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
191 /*! \brief average force estimated from average displacement, fAv=dzAv*k
193 * Used for integration to guess the potential.
196 real *aver; //!< average of histograms
197 real *sigma; //!< stddev of histograms
198 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
201 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
204 int n; //!< total nr of pull coords in this tpr file
205 int nUse; //!< nr of pull coords used
206 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
209 //! Parameters of WHAM
216 const char *fnTpr, *fnPullf, *fnCoordSel;
217 const char *fnPdo, *fnPullx; //!< file names of input
218 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
219 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
221 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
222 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
223 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
224 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
227 * \name Basic WHAM options
230 int bins; //!< nr of bins, min, max, and dz of profile
232 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
233 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
236 * \name Output control
239 gmx_bool bLog; //!< energy output (instead of probability) for profile
240 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
241 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
242 /*! \brief after wham, set prof to zero at this z-position.
243 * When bootstrapping, set zProf0 to a "stable" reference position.
246 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
248 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
249 gmx_bool bAuto; //!< determine min and max automatically but do not exit
251 gmx_bool verbose; //!< more noisy wham mode
252 int stepchange; //!< print maximum change in prof after how many interations
253 gmx_output_env_t *oenv; //!< xvgr options
256 * \name Autocorrelation stuff
259 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
260 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
261 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
262 real acTrestart; //!< when computing ACT, time between restarting points
264 /* \brief Enforce the same weight for each umbella window, that is
265 * calculate with the same number of data points for
266 * each window. That can be reasonable, if the histograms
267 * have different length, but due to autocorrelation,
268 * a longer simulation should not have larger weightin wham.
274 * \name Bootstrapping stuff
277 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
279 /* \brief bootstrap method
281 * if == bsMethod_hist, consider complete histograms as independent
282 * data points and, hence, only mix complete histograms.
283 * if == bsMethod_BayesianHist, consider complete histograms
284 * as independent data points, but assign random weights
285 * to the histograms during the bootstrapping ("Bayesian bootstrap")
286 * In case of long correlations (e.g., inside a channel), these
287 * will yield a more realistic error.
288 * if == bsMethod_traj(Gauss), generate synthetic histograms
290 * histogram by generating an autocorrelated random sequence
291 * that is distributed according to the respective given
292 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
293 * (instead of from the umbrella histogram) to generate a new
298 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
301 /* \brief when mixing histograms, mix only histograms withing blocks
302 long the reaction coordinate xi. Avoids gaps along xi. */
303 int histBootStrapBlockLength;
305 int bsSeed; //!< random seed for bootstrapping
307 /* \brief Write cumulative distribution functions (CDFs) of histograms
308 and write the generated histograms for each bootstrap */
312 * \name tabulated umbrella potential stuff
316 double *tabX, *tabY, tabMin, tabMax, tabDz;
319 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
320 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
323 //! Make an umbrella window (may contain several histograms)
324 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
326 t_UmbrellaWindow *win;
329 for (i = 0; i < nwin; i++)
331 win[i].Histo = win[i].cum = 0;
332 win[i].k = win[i].pos = win[i].z = 0;
333 win[i].N = win[i].Ntot = 0;
334 win[i].g = win[i].tau = win[i].tausmooth = 0;
338 win[i].aver = win[i].sigma = 0;
344 //! Delete an umbrella window (may contain several histograms)
345 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
348 for (i = 0; i < nwin; i++)
352 for (j = 0; j < win[i].nPull; j++)
354 sfree(win[i].Histo[j]);
359 for (j = 0; j < win[i].nPull; j++)
361 sfree(win[i].cum[j]);
366 for (j = 0; j < win[i].nPull; j++)
368 sfree(win[i].bContrib[j]);
380 sfree(win[i].tausmooth);
381 sfree(win[i].bContrib);
383 sfree(win[i].forceAv);
386 sfree(win[i].bsWeight);
392 * Read and setup tabulated umbrella potential
394 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
399 printf("Setting up tabulated potential from file %s\n", fn);
400 nl = read_xvg(fn, &y, &ny);
404 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
406 opt->tabMin = y[0][0];
407 opt->tabMax = y[0][nl-1];
408 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
411 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
412 "ascending z-direction", fn);
414 for (i = 0; i < nl-1; i++)
416 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
418 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
423 for (i = 0; i < nl; i++)
425 opt->tabX[i] = y[0][i];
426 opt->tabY[i] = y[1][i];
428 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
429 opt->tabMin, opt->tabMax, opt->tabDz);
432 //! Read the header of an PDO file (position, force const, nr of groups)
433 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
436 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
438 std::istringstream ist;
441 if (fgets(line, 2048, file) == NULL)
443 gmx_fatal(FARGS, "Error reading header from pdo file\n");
446 ist >> Buffer0 >> Buffer1 >> Buffer2;
447 if (std::strcmp(Buffer1, "UMBRELLA"))
449 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
450 "(Found in first line: `%s')\n",
451 Buffer1, "UMBRELLA", line);
453 if (std::strcmp(Buffer2, "3.0"))
455 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
459 if (fgets(line, 2048, file) == NULL)
461 gmx_fatal(FARGS, "Error reading header from pdo file\n");
464 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
465 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
467 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
468 if (header->nDim != 1)
470 gmx_fatal(FARGS, "Currently only supports one dimension");
474 if (fgets(line, 2048, file) == NULL)
476 gmx_fatal(FARGS, "Error reading header from pdo file\n");
479 ist >> Buffer0 >> Buffer1 >> header->nSkip;
482 if (fgets(line, 2048, file) == NULL)
484 gmx_fatal(FARGS, "Error reading header from pdo file\n");
487 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
490 if (fgets(line, 2048, file) == NULL)
492 gmx_fatal(FARGS, "Error reading header from pdo file\n");
495 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
499 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
503 for (i = 0; i < header->nPull; ++i)
505 if (fgets(line, 2048, file) == NULL)
507 gmx_fatal(FARGS, "Error reading header from pdo file\n");
510 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
511 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
512 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
516 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
517 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
521 if (fgets(line, 2048, file) == NULL)
523 gmx_fatal(FARGS, "Cannot read from file\n");
527 if (std::strcmp(Buffer3, "#####") != 0)
529 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
534 static char *fgets3(FILE *fp, char ptr[], int *len)
539 if (fgets(ptr, *len-1, fp) == NULL)
544 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
546 /* This line is longer than len characters, let's increase len! */
550 if (fgets(p-1, STRLEN, fp) == NULL)
555 slen = std::strlen(ptr);
556 if (ptr[slen-1] == '\n')
564 /*! \brief Read the data columns of and PDO file.
566 * TO DO: Get rid of the scanf function to avoid the clang warning.
567 * At the moment, this warning is avoided by hiding the format string
568 * the variable fmtlf.
570 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
571 int fileno, t_UmbrellaWindow * win,
572 t_UmbrellaOptions *opt,
573 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
575 int i, inttemp, bins, count, ntot;
576 real minval, maxval, minfound = 1e20, maxfound = -1e20;
577 double temp, time, time0 = 0, dt;
579 t_UmbrellaWindow * window = 0;
580 gmx_bool timeok, dt_ok = 1;
581 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
582 int len = STRLEN, dstep = 1;
583 const int blocklen = 4096;
593 /* Need to alocate memory and set up structure */
594 window->nPull = header->nPull;
597 snew(window->Histo, window->nPull);
598 snew(window->z, window->nPull);
599 snew(window->k, window->nPull);
600 snew(window->pos, window->nPull);
601 snew(window->N, window->nPull);
602 snew(window->Ntot, window->nPull);
603 snew(window->g, window->nPull);
604 snew(window->bsWeight, window->nPull);
606 window->bContrib = 0;
608 if (opt->bCalcTauInt)
610 snew(window->ztime, window->nPull);
616 snew(lennow, window->nPull);
618 for (i = 0; i < window->nPull; ++i)
621 window->bsWeight[i] = 1.;
622 snew(window->Histo[i], bins);
623 window->k[i] = header->UmbCons[i][0];
624 window->pos[i] = header->UmbPos[i][0];
628 if (opt->bCalcTauInt)
630 window->ztime[i] = 0;
634 /* Done with setup */
640 minval = maxval = bins = 0; /* Get rid of warnings */
645 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
649 if (ptr[0] == '#' || std::strlen(ptr) < 2)
654 /* Initiate format string */
656 std::strcat(fmtign, "%*s");
658 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
659 /* Round time to fs */
660 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
662 /* get time step of pdo file */
672 dstep = static_cast<int>(opt->dt/dt+0.5);
680 window->dt = dt*dstep;
685 dt_ok = ((count-1)%dstep == 0);
686 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
688 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
689 time,opt->tmin, opt->tmax, dt_ok,timeok); */
693 for (i = 0; i < header->nPull; ++i)
695 std::strcpy(fmt, fmtign);
696 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
697 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
698 if (sscanf(ptr, fmt, &temp))
700 temp += header->UmbPos[i][0];
714 if (opt->bCalcTauInt)
716 /* save time series for autocorrelation analysis */
717 ntot = window->Ntot[i];
718 if (ntot >= lennow[i])
720 lennow[i] += blocklen;
721 srenew(window->ztime[i], lennow[i]);
723 window->ztime[i][ntot] = temp;
727 temp /= (maxval-minval);
729 temp = std::floor(temp);
731 inttemp = static_cast<int> (temp);
738 else if (inttemp >= bins)
744 if (inttemp >= 0 && inttemp < bins)
746 window->Histo[i][inttemp] += 1.;
754 if (time > opt->tmax)
758 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
774 /*! \brief Set identical weights for all histograms
776 * Normally, the weight is given by the number data points in each
777 * histogram, together with the autocorrelation time. This can be overwritten
778 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
779 * an appropriate autocorrelation times instead of using this function.
781 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
783 int i, k, j, NEnforced;
786 NEnforced = window[0].Ntot[0];
787 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
788 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
789 /* enforce all histograms to have the same weight as the very first histogram */
791 for (j = 0; j < nWindows; ++j)
793 for (k = 0; k < window[j].nPull; ++k)
795 ratio = 1.0*NEnforced/window[j].Ntot[k];
796 for (i = 0; i < window[0].nBin; ++i)
798 window[j].Histo[k][i] *= ratio;
800 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
805 /*! \brief Simple linear interpolation between two given tabulated points
807 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
810 double pl, pu, dz, dp;
812 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
814 if (jl < 0 || ju >= opt->tabNbins)
816 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
817 "Provide an extended table.", dist, jl, ju);
821 dz = dist-opt->tabX[jl];
822 dp = (pu-pl)*dz/opt->tabDz;
828 * Check which bins substiantially contribute (accelerates WHAM)
830 * Don't worry, that routine does not mean we compute the PMF in limited precision.
831 * After rapid convergence (using only substiantal contributions), we always switch to
834 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
835 t_UmbrellaOptions *opt)
837 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
838 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
839 gmx_bool bAnyContrib;
840 static int bFirst = 1;
841 static double wham_contrib_lim;
845 for (i = 0; i < nWindows; ++i)
847 nGrptot += window[i].nPull;
849 wham_contrib_lim = opt->Tolerance/nGrptot;
852 ztot = opt->max-opt->min;
855 for (i = 0; i < nWindows; ++i)
857 if (!window[i].bContrib)
859 snew(window[i].bContrib, window[i].nPull);
861 for (j = 0; j < window[i].nPull; ++j)
863 if (!window[i].bContrib[j])
865 snew(window[i].bContrib[j], opt->bins);
868 for (k = 0; k < opt->bins; ++k)
870 temp = (1.0*k+0.5)*dz+min;
871 distance = temp - window[i].pos[j]; /* distance to umbrella center */
873 { /* in cyclic wham: */
874 if (distance > ztot_half) /* |distance| < ztot_half */
878 else if (distance < -ztot_half)
883 /* Note: there are two contributions to bin k in the wham equations:
884 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
885 ii) exp(- U/(BOLTZ*opt->Temperature))
886 where U is the umbrella potential
887 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
892 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
896 U = tabulated_pot(distance, opt); /* Use tabulated potential */
898 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
899 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
900 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
901 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
902 if (window[i].bContrib[j][k])
908 /* If this histo is far outside min and max all bContrib may be FALSE,
909 causing a floating point exception later on. To avoid that, switch
913 for (k = 0; k < opt->bins; ++k)
915 window[i].bContrib[j][k] = TRUE;
922 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
923 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
928 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
934 //! Compute the PMF (one of the two main WHAM routines)
935 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
936 t_UmbrellaOptions *opt, gmx_bool bExact)
938 double ztot_half, ztot, min = opt->min, dz = opt->dz;
940 ztot = opt->max-opt->min;
947 int nthreads = gmx_omp_get_max_threads();
948 int thread_id = gmx_omp_get_thread_num();
950 int i0 = thread_id*opt->bins/nthreads;
951 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
953 for (i = i0; i < i1; ++i)
956 double num, denom, invg, temp = 0, distance, U = 0;
958 for (j = 0; j < nWindows; ++j)
960 for (k = 0; k < window[j].nPull; ++k)
962 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
963 temp = (1.0*i+0.5)*dz+min;
964 num += invg*window[j].Histo[k][i];
966 if (!(bExact || window[j].bContrib[k][i]))
970 distance = temp - window[j].pos[k]; /* distance to umbrella center */
972 { /* in cyclic wham: */
973 if (distance > ztot_half) /* |distance| < ztot_half */
977 else if (distance < -ztot_half)
985 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
989 U = tabulated_pot(distance, opt); /* Use tabulated potential */
991 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
994 profile[i] = num/denom;
997 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1001 //! Compute the free energy offsets z (one of the two main WHAM routines)
1002 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
1003 t_UmbrellaOptions *opt, gmx_bool bExact)
1005 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1006 double maxglob = -1e20;
1008 ztot = opt->max-opt->min;
1011 #pragma omp parallel
1015 int nthreads = gmx_omp_get_max_threads();
1016 int thread_id = gmx_omp_get_thread_num();
1018 int i0 = thread_id*nWindows/nthreads;
1019 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1020 double maxloc = -1e20;
1022 for (i = i0; i < i1; ++i)
1024 double total = 0, temp, distance, U = 0;
1027 for (j = 0; j < window[i].nPull; ++j)
1030 for (k = 0; k < window[i].nBin; ++k)
1032 if (!(bExact || window[i].bContrib[j][k]))
1036 temp = (1.0*k+0.5)*dz+min;
1037 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1039 { /* in cyclic wham: */
1040 if (distance > ztot_half) /* |distance| < ztot_half */
1044 else if (distance < -ztot_half)
1052 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1056 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1058 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1060 /* Avoid floating point exception if window is far outside min and max */
1063 total = -std::log(total);
1069 temp = std::abs(total - window[i].z[j]);
1074 window[i].z[j] = total;
1077 /* Now get maximum maxloc from the threads and put in maxglob */
1078 if (maxloc > maxglob)
1080 #pragma omp critical
1082 if (maxloc > maxglob)
1089 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1095 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1096 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1098 int i, j, bins = opt->bins;
1099 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1102 if (min > 0. || max < 0.)
1104 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1105 opt->min, opt->max);
1110 for (i = 0; i < bins; i++)
1114 /* bin left of zsym */
1115 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1116 if (j >= 0 && (j+1) < bins)
1118 /* interpolate profile linearly between bins j and j+1 */
1119 z1 = min+(j+0.5)*dz;
1121 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1122 /* average between left and right */
1123 prof2[i] = 0.5*(profsym+profile[i]);
1127 prof2[i] = profile[i];
1131 std::memcpy(profile, prof2, bins*sizeof(double));
1135 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1136 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1139 double unit_factor = 1., diff;
1143 /* Not log? Nothing to do! */
1149 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1150 if (opt->unit == en_kT)
1154 else if (opt->unit == en_kJ)
1156 unit_factor = BOLTZ*opt->Temperature;
1158 else if (opt->unit == en_kCal)
1160 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1164 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1167 for (i = 0; i < bins; i++)
1169 if (profile[i] > 0.0)
1171 profile[i] = -std::log(profile[i])*unit_factor;
1175 /* shift to zero at z=opt->zProf0 */
1176 if (!opt->bProf0Set)
1182 /* Get bin with shortest distance to opt->zProf0
1183 (-0.5 from bin position and +0.5 from rounding cancel) */
1184 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1189 else if (imin >= bins)
1193 diff = profile[imin];
1197 for (i = 0; i < bins; i++)
1203 //! Make an array of random integers (used for bootstrapping)
1204 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1206 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1208 int ipull, blockBase, nr, ipullRandom;
1210 if (blockLength == 0)
1212 blockLength = nPull;
1215 for (ipull = 0; ipull < nPull; ipull++)
1217 blockBase = (ipull/blockLength)*blockLength;
1219 { /* make sure nothing bad happens in the last block */
1220 nr = dist(*rng); // [0,blockLength-1]
1221 ipullRandom = blockBase + nr;
1223 while (ipullRandom >= nPull);
1224 if (ipullRandom < 0 || ipullRandom >= nPull)
1226 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1227 "blockLength = %d, blockBase = %d\n",
1228 ipullRandom, nPull, nr, blockLength, blockBase);
1230 randomArray[ipull] = ipullRandom;
1232 /*for (ipull=0; ipull<nPull; ipull++)
1233 printf("%d ",randomArray[ipull]); printf("\n"); */
1236 /*! \brief Set pull group information of a synthetic histogram
1238 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1239 * but it is not required if we bootstrap complete histograms.
1241 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1242 t_UmbrellaWindow *thisWindow, int pullid)
1244 synthWindow->N [0] = thisWindow->N [pullid];
1245 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1246 synthWindow->pos [0] = thisWindow->pos [pullid];
1247 synthWindow->z [0] = thisWindow->z [pullid];
1248 synthWindow->k [0] = thisWindow->k [pullid];
1249 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1250 synthWindow->g [0] = thisWindow->g [pullid];
1251 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1254 /*! \brief Calculate cumulative distribution function of of all histograms.
1256 * This allow to create random number sequences
1257 * which are distributed according to the histograms. Required to generate
1258 * the "synthetic" histograms for the Bootstrap method
1260 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1261 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1265 char *fn = 0, *buf = 0;
1268 if (opt->bs_verbose)
1270 snew(fn, std::strlen(fnhist)+10);
1271 snew(buf, std::strlen(fnhist)+10);
1272 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1273 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1277 for (i = 0; i < nWindows; i++)
1279 snew(window[i].cum, window[i].nPull);
1280 for (j = 0; j < window[i].nPull; j++)
1282 snew(window[i].cum[j], nbin+1);
1283 window[i].cum[j][0] = 0.;
1284 for (k = 1; k <= nbin; k++)
1286 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1289 /* normalize CDFs. Ensure cum[nbin]==1 */
1290 last = window[i].cum[j][nbin];
1291 for (k = 0; k <= nbin; k++)
1293 window[i].cum[j][k] /= last;
1298 printf("Cumulative distribution functions of all histograms created.\n");
1299 if (opt->bs_verbose)
1301 for (k = 0; k <= nbin; k++)
1303 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1304 for (i = 0; i < nWindows; i++)
1306 for (j = 0; j < window[i].nPull; j++)
1308 fprintf(fp, "%g\t", window[i].cum[j][k]);
1313 printf("Wrote cumulative distribution functions to %s\n", fn);
1321 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1323 * This is used to generate a random sequence distributed according to a histogram
1325 void searchCumulative(double xx[], int n, double x, int *j)
1347 else if (x == xx[n-1])
1357 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1358 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1359 int pullid, t_UmbrellaOptions *opt)
1361 int N, i, nbins, r_index, ibin;
1362 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1365 N = thisWindow->N[pullid];
1366 dt = thisWindow->dt;
1367 nbins = thisWindow->nBin;
1369 /* tau = autocorrelation time */
1370 if (opt->tauBootStrap > 0.0)
1372 tausteps = opt->tauBootStrap/dt;
1374 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1376 /* calc tausteps from g=1+2tausteps */
1377 g = thisWindow->g[pullid];
1383 "When generating hypothetical trajectories from given umbrella histograms,\n"
1384 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1385 "cannot be predicted. You have 3 options:\n"
1386 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1387 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1389 " with option -iiact for all umbrella windows.\n"
1390 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1391 " Use option (3) only if you are sure what you're doing, you may severely\n"
1392 " underestimate the error if a too small ACT is given.\n");
1393 gmx_fatal(FARGS, errstr);
1396 synthWindow->N [0] = N;
1397 synthWindow->pos [0] = thisWindow->pos[pullid];
1398 synthWindow->z [0] = thisWindow->z[pullid];
1399 synthWindow->k [0] = thisWindow->k[pullid];
1400 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1401 synthWindow->g [0] = thisWindow->g [pullid];
1402 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1404 for (i = 0; i < nbins; i++)
1406 synthWindow->Histo[0][i] = 0.;
1409 if (opt->bsMethod == bsMethod_trajGauss)
1411 sig = thisWindow->sigma [pullid];
1412 mu = thisWindow->aver [pullid];
1415 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1417 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1419 z = a*x + sqrt(1-a^2)*y
1420 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1421 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1423 C(t) = exp(-t/tau) with tau=-1/ln(a)
1425 Then, use error function to turn the Gaussian random variable into a uniformly
1426 distributed one in [0,1]. Eventually, use cumulative distribution function of
1427 histogram to get random variables distributed according to histogram.
1428 Note: The ACT of the flat distribution and of the generated histogram is not
1429 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1431 a = std::exp(-1.0/tausteps);
1432 ap = std::sqrt(1.0-a*a);
1433 invsqrt2 = 1.0/std::sqrt(2.0);
1435 /* init random sequence */
1436 x = opt->normalDistribution(opt->rng);
1438 if (opt->bsMethod == bsMethod_traj)
1440 /* bootstrap points from the umbrella histograms */
1441 for (i = 0; i < N; i++)
1443 y = opt->normalDistribution(opt->rng);
1445 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1446 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1448 r = 0.5*(1+std::erf(x*invsqrt2));
1449 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1450 synthWindow->Histo[0][r_index] += 1.;
1453 else if (opt->bsMethod == bsMethod_trajGauss)
1455 /* bootstrap points from a Gaussian with the same average and sigma
1456 as the respective umbrella histogram. The idea was, that -given
1457 limited sampling- the bootstrapped histograms are otherwise biased
1458 from the limited sampling of the US histos. However, bootstrapping from
1459 the Gaussian seems to yield a similar estimate. */
1463 y = opt->normalDistribution(opt->rng);
1466 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1471 while ( (ibin += nbins) < 0)
1476 else if (ibin >= nbins)
1478 while ( (ibin -= nbins) >= nbins)
1485 if (ibin >= 0 && ibin < nbins)
1487 synthWindow->Histo[0][ibin] += 1.;
1494 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1498 /*! \brief Write all histograms to a file
1500 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1501 * sets of bootstrapped histograms.
1503 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1504 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1506 char *fn = 0, *buf = 0, title[256];
1512 snew(fn, std::strlen(fnhist)+10);
1513 snew(buf, std::strlen(fnhist)+1);
1514 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1515 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1519 fn = gmx_strdup(fnhist);
1520 std::strcpy(title, "Umbrella histograms");
1523 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1526 /* Write histograms */
1527 for (l = 0; l < bins; ++l)
1529 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1530 for (i = 0; i < nWindows; ++i)
1532 for (j = 0; j < window[i].nPull; ++j)
1534 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1541 printf("Wrote %s\n", fn);
1549 //! Used for qsort to sort random numbers
1550 int func_wham_is_larger(const void *a, const void *b)
1569 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1570 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1574 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1578 /* generate ordered random numbers between 0 and nAllPull */
1579 for (i = 0; i < nAllPull-1; i++)
1581 r[i] = dist(opt->rng);
1583 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1584 r[nAllPull-1] = 1.0*nAllPull;
1586 synthwin[0].bsWeight[0] = r[0];
1587 for (i = 1; i < nAllPull; i++)
1589 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1592 /* avoid to have zero weight by adding a tiny value */
1593 for (i = 0; i < nAllPull; i++)
1595 if (synthwin[i].bsWeight[0] < 1e-5)
1597 synthwin[i].bsWeight[0] = 1e-5;
1604 //! The main bootstrapping routine
1605 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1606 const char *xlabel, char* ylabel, double *profile,
1607 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1609 t_UmbrellaWindow * synthWindow;
1610 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1611 int i, j, *randomArray = 0, winid, pullid, ib;
1612 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1614 gmx_bool bExact = FALSE;
1616 /* init random generator */
1617 if (opt->bsSeed == 0)
1619 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1621 opt->rng.seed(opt->bsSeed);
1623 snew(bsProfile, opt->bins);
1624 snew(bsProfiles_av, opt->bins);
1625 snew(bsProfiles_av2, opt->bins);
1627 /* Create array of all pull groups. Note that different windows
1628 may have different nr of pull groups
1629 First: Get total nr of pull groups */
1631 for (i = 0; i < nWindows; i++)
1633 nAllPull += window[i].nPull;
1635 snew(allPull_winId, nAllPull);
1636 snew(allPull_pullId, nAllPull);
1638 /* Setup one array of all pull groups */
1639 for (i = 0; i < nWindows; i++)
1641 for (j = 0; j < window[i].nPull; j++)
1643 allPull_winId[iAllPull] = i;
1644 allPull_pullId[iAllPull] = j;
1649 /* setup stuff for synthetic windows */
1650 snew(synthWindow, nAllPull);
1651 for (i = 0; i < nAllPull; i++)
1653 synthWindow[i].nPull = 1;
1654 synthWindow[i].nBin = opt->bins;
1655 snew(synthWindow[i].Histo, 1);
1656 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1658 snew(synthWindow[i].Histo[0], opt->bins);
1660 snew(synthWindow[i].N, 1);
1661 snew(synthWindow[i].pos, 1);
1662 snew(synthWindow[i].z, 1);
1663 snew(synthWindow[i].k, 1);
1664 snew(synthWindow[i].bContrib, 1);
1665 snew(synthWindow[i].g, 1);
1666 snew(synthWindow[i].bsWeight, 1);
1669 switch (opt->bsMethod)
1672 snew(randomArray, nAllPull);
1673 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1674 please_cite(stdout, "Hub2006");
1676 case bsMethod_BayesianHist:
1677 /* just copy all histogams into synthWindow array */
1678 for (i = 0; i < nAllPull; i++)
1680 winid = allPull_winId [i];
1681 pullid = allPull_pullId[i];
1682 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1686 case bsMethod_trajGauss:
1687 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1690 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1693 /* do bootstrapping */
1694 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1695 for (ib = 0; ib < opt->nBootStrap; ib++)
1697 printf(" *******************************************\n"
1698 " ******** Start bootstrap nr %d ************\n"
1699 " *******************************************\n", ib+1);
1701 switch (opt->bsMethod)
1704 /* bootstrap complete histograms from given histograms */
1705 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1706 for (i = 0; i < nAllPull; i++)
1708 winid = allPull_winId [randomArray[i]];
1709 pullid = allPull_pullId[randomArray[i]];
1710 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1713 case bsMethod_BayesianHist:
1714 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1715 setRandomBsWeights(synthWindow, nAllPull, opt);
1718 case bsMethod_trajGauss:
1719 /* create new histos from given histos, that is generate new hypothetical
1721 for (i = 0; i < nAllPull; i++)
1723 winid = allPull_winId[i];
1724 pullid = allPull_pullId[i];
1725 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1730 /* write histos in case of verbose output */
1731 if (opt->bs_verbose)
1733 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1740 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1743 if ( (i%opt->stepUpdateContrib) == 0)
1745 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1747 if (maxchange < opt->Tolerance)
1751 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1753 printf("\t%4d) Maximum change %e\n", i, maxchange);
1755 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1758 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1759 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1763 prof_normalization_and_unit(bsProfile, opt);
1766 /* symmetrize profile around z=0 */
1769 symmetrizeProfile(bsProfile, opt);
1772 /* save stuff to get average and stddev */
1773 for (i = 0; i < opt->bins; i++)
1776 bsProfiles_av[i] += tmp;
1777 bsProfiles_av2[i] += tmp*tmp;
1778 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1780 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1784 /* write average and stddev */
1785 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1786 if (output_env_get_print_xvgr_codes(opt->oenv))
1788 fprintf(fp, "@TYPE xydy\n");
1790 for (i = 0; i < opt->bins; i++)
1792 bsProfiles_av [i] /= opt->nBootStrap;
1793 bsProfiles_av2[i] /= opt->nBootStrap;
1794 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1795 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1796 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1799 printf("Wrote boot strap result to %s\n", fnres);
1802 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1803 int whaminFileType(char *fn)
1806 len = std::strlen(fn);
1807 if (std::strcmp(fn+len-3, "tpr") == 0)
1811 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1813 return whamin_pullxf;
1815 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1821 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1823 return whamin_unknown;
1826 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1827 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1828 t_UmbrellaOptions *opt)
1830 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1831 int nread, sizenow, i, block = 1;
1834 fp = gmx_ffopen(fn, "r");
1837 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1839 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1841 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1843 if (nread >= sizenow)
1846 srenew(filename, sizenow);
1847 for (i = sizenow-block; i < sizenow; i++)
1849 snew(filename[i], WHAM_MAXFILELEN);
1852 /* remove newline if there is one */
1853 if (tmp[std::strlen(tmp)-1] == '\n')
1855 tmp[std::strlen(tmp)-1] = '\0';
1857 std::strcpy(filename[nread], tmp);
1860 printf("Found file %s in %s\n", filename[nread], fn);
1864 *filenamesRet = filename;
1868 //! Open a file or a pipe to a gzipped file
1869 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1871 char Buffer[1024], gunzip[1024], *Path = 0;
1873 static gmx_bool bFirst = 1;
1875 /* gzipped pdo file? */
1876 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1878 /* search gunzip executable */
1879 if (!(Path = getenv("GMX_PATH_GZIP")))
1881 if (gmx_fexist("/bin/gunzip"))
1883 sprintf(gunzip, "%s", "/bin/gunzip");
1885 else if (gmx_fexist("/usr/bin/gunzip"))
1887 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1891 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1892 "You may want to define the path to gunzip "
1893 "with the environment variable GMX_PATH_GZIP.", gunzip);
1898 sprintf(gunzip, "%s/gunzip", Path);
1899 if (!gmx_fexist(gunzip))
1901 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1902 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1907 printf("Using gunzip executable %s\n", gunzip);
1910 if (!gmx_fexist(fn))
1912 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1914 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1917 printf("Executing command '%s'\n", Buffer);
1920 if ((pipe = popen(Buffer, "r")) == NULL)
1922 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1925 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1931 pipe = gmx_ffopen(fn, "r");
1938 //! Close file or pipe
1939 void pdo_close_file(FILE *fp)
1948 //! Reading all pdo files
1949 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1950 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1953 real mintmp, maxtmp, done = 0.;
1956 /* char Buffer0[1000]; */
1960 gmx_fatal(FARGS, "No files found. Hick.");
1963 /* if min and max are not given, get min and max from the input files */
1966 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1969 for (i = 0; i < nfiles; ++i)
1971 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1972 /*fgets(Buffer0,999,file);
1973 fprintf(stderr,"First line '%s'\n",Buffer0); */
1974 done = 100.0*(i+1)/nfiles;
1975 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1980 read_pdo_header(file, header, opt);
1981 /* here only determine min and max of this window */
1982 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1983 if (maxtmp > opt->max)
1987 if (mintmp < opt->min)
1993 pdo_close_file(file);
2001 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2002 if (opt->bBoundsOnly)
2004 printf("Found option -boundsonly, now exiting.\n");
2008 /* store stepsize in profile */
2009 opt->dz = (opt->max-opt->min)/opt->bins;
2011 /* Having min and max, we read in all files */
2012 /* Loop over all files */
2013 for (i = 0; i < nfiles; ++i)
2015 done = 100.0*(i+1)/nfiles;
2016 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2021 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2022 read_pdo_header(file, header, opt);
2023 /* load data into window */
2024 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2025 if ((window+i)->Ntot[0] == 0)
2027 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2031 pdo_close_file(file);
2039 for (i = 0; i < nfiles; ++i)
2046 //! translate 0/1 to N/Y to write pull dimensions
2047 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2049 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2050 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2052 gmx::MDModules mdModules;
2053 t_inputrec *ir = mdModules.inputrec();
2055 static int first = 1;
2057 /* printf("Reading %s \n",fn); */
2058 read_tpx_state(fn, ir, &state, NULL);
2062 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2064 if (ir->pull->ncoord == 0)
2066 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2069 /* Read overall pull info */
2070 header->npullcrds = ir->pull->ncoord;
2071 header->nCOMGrpsPullx = 0;
2072 if (ir->pull->bPrintCOM)
2074 header->nCOMGrpsPullx += ir->pull->ngroup;
2076 header->bPrintRefValue = ir->pull->bPrintRefValue;
2077 header->bPrintComp = ir->pull->bPrintComp;
2079 /* Read pull coordinates */
2080 snew(header->pcrd, header->npullcrds);
2081 for (int i = 0; i < ir->pull->ncoord; i++)
2083 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2084 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2085 header->pcrd[i].k = ir->pull->coord[i].k;
2086 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2088 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2089 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2091 std::strcpy(header->pcrd[i].coord_unit,
2092 pull_coordinate_units(&ir->pull->coord[i]));
2094 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2096 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2097 " This is not supported.\n");
2099 if (coordsel && (coordsel->n != ir->pull->ncoord))
2101 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2102 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2106 /* Check pull coords for consistency */
2108 ivec thedim = { 0, 0, 0 };
2109 bool geometryIsSet = false;
2110 for (int i = 0; i < ir->pull->ncoord; i++)
2112 if (coordsel == NULL || coordsel->bUse[i])
2114 if (header->pcrd[i].pull_type != epullUMBRELLA)
2116 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2117 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2118 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2122 geom = header->pcrd[i].geometry;
2123 copy_ivec(header->pcrd[i].dim, thedim);
2124 geometryIsSet = true;
2126 if (geom != header->pcrd[i].geometry)
2128 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2129 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2130 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2132 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2134 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2135 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2136 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2137 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2139 if (header->pcrd[i].geometry == epullgCYL)
2141 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2143 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2144 "However, found dimensions [%s %s %s]\n",
2145 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2146 int2YN(header->pcrd[i].dim[ZZ]));
2149 if (header->pcrd[i].k <= 0.0)
2151 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g in %s.\n"
2152 "That doesn't seem to be an Umbrella tpr.\n",
2153 fn, i+1, header->pcrd[i].k);
2158 if (opt->verbose || first)
2160 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2162 for (int i = 0; i < ir->pull->ncoord; i++)
2164 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2165 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2168 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2170 for (int i = 0; i < ir->pull->ncoord; i++)
2172 bool use = (coordsel == NULL || coordsel->bUse[i]);
2174 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2175 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2176 header->pcrd[i].ndim, use ? "Yes" : "No");
2178 switch (header->nCOMGrpsPullx)
2181 printf("\tNo pull group coordinates expected in pullx files.\n");
2184 printf("\tPull group coordinates of one group expected in pullx files.\n");
2187 printf("\tPull group coordinates of two groups expected in pullx files.\n");
2190 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2191 header->bPrintRefValue ? "" : " not");
2193 if (!opt->verbose && first)
2195 printf("\tUse option -v to see this output for all input tpr files\n\n");
2201 //! 2-norm in a ndim-dimensional space
2202 double dist_ndim(double **dx, int ndim, int line)
2206 for (i = 0; i < ndim; i++)
2208 r2 += gmx::square(dx[i][line]);
2210 return std::sqrt(r2);
2213 //! Read pullx.xvg or pullf.xvg
2214 void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2215 t_UmbrellaWindow * window,
2216 t_UmbrellaOptions *opt,
2217 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2218 t_coordselection *coordsel)
2220 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2221 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2222 int nColExpect, ntot, column;
2223 real min, max, minfound = 1e20, maxfound = -1e20;
2224 gmx_bool dt_ok, timeok;
2225 const char *quantity;
2226 const int blocklen = 4096;
2228 static gmx_bool bFirst = TRUE;
2231 * Data columns in pull output:
2232 * - in force output pullf.xvg:
2233 * No reference columns, one column per pull coordinate
2235 * - in position output pullx.xvg:
2236 * * optionally, ndim columns for COM of of grp1 or grp2, depending on on mdp options
2237 * pull-print-com1 & pull-print-com2);
2238 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2239 * be written separately into pullx file, but this is not supported and throws an error below;
2240 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2243 if (header->bPrintComp && opt->bPullx)
2245 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2246 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2249 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2250 snew(nColThisCrd, header->npullcrds);
2251 snew(nColCOMCrd, header->npullcrds);
2252 snew(nColRefCrd, header->npullcrds);
2254 if (opt->bPullx == FALSE)
2256 /* pullf reading: simply one column per coordinate */
2257 for (g = 0; g < header->npullcrds; g++)
2266 /* pullx reading. Note explanation above. */
2267 for (g = 0; g < header->npullcrds; g++)
2269 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2270 nColCOMCrd[g] = header->pcrd[g].ndim*header->nCOMGrpsPullx;
2271 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2275 nColExpect = 1; /* time column */
2276 for (g = 0; g < header->npullcrds; g++)
2278 nColExpect += nColThisCrd[g];
2281 /* read pullf or pullx. Could be optimized if min and max are given. */
2282 nt = read_xvg(fn, &y, &ny);
2284 /* Check consistency */
2285 quantity = opt->bPullx ? "position" : "force";
2288 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2290 if (bFirst || opt->verbose)
2292 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2293 for (i = 0; i < header->npullcrds; i++)
2295 printf("\tColumns for pull coordinate %d\n", i+1);
2296 printf("\t\tcenter-of-mass of groups: %d\n"
2297 "\t\tdisplacement wrt. reference: %d\n"
2298 "\t\treference position column: %s\n",
2299 nColCOMCrd[i], 1, (header->bPrintRefValue ? "Yes" : "No"));
2301 printf("\tFound %d times in %s\n", nt, fn);
2304 if (nColExpect != ny)
2306 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2307 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2317 window->dt = y[0][1]-y[0][0];
2319 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2321 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2324 /* Need to alocate memory and set up structure for windows */
2327 /* Use only groups selected with option -is file */
2328 if (header->npullcrds != coordsel->n)
2330 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2331 header->npullcrds, coordsel->n);
2333 window->nPull = coordsel->nUse;
2337 window->nPull = header->npullcrds;
2340 window->nBin = bins;
2341 snew(window->Histo, window->nPull);
2342 snew(window->z, window->nPull);
2343 snew(window->k, window->nPull);
2344 snew(window->pos, window->nPull);
2345 snew(window->N, window->nPull);
2346 snew(window->Ntot, window->nPull);
2347 snew(window->g, window->nPull);
2348 snew(window->bsWeight, window->nPull);
2349 window->bContrib = 0;
2351 if (opt->bCalcTauInt)
2353 snew(window->ztime, window->nPull);
2357 window->ztime = NULL;
2359 snew(lennow, window->nPull);
2361 for (g = 0; g < window->nPull; ++g)
2364 window->bsWeight [g] = 1.;
2366 window->Ntot [g] = 0;
2368 snew(window->Histo[g], bins);
2370 if (opt->bCalcTauInt)
2372 window->ztime[g] = NULL;
2376 /* Copying umbrella center and force const is more involved since not
2377 all pull groups from header (tpr file) may be used in window variable */
2378 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2380 if (coordsel && (coordsel->bUse[g] == FALSE))
2384 window->k [gUsed] = header->pcrd[g].k;
2385 window->pos[gUsed] = header->pcrd[g].init_dist;
2390 { /* only determine min and max */
2393 min = max = bins = 0; /* Get rid of warnings */
2397 for (i = 0; i < nt; i++)
2399 /* Do you want that time frame? */
2400 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2402 /* get time step of pdo file and get dstep from opt->dt */
2412 dstep = static_cast<int>(opt->dt/dt+0.5);
2420 window->dt = dt*dstep;
2424 dt_ok = (i%dstep == 0);
2425 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2427 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2428 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2431 /* Note: if coordsel == NULL:
2432 * all groups in pullf/x file are stored in this window, and gUsed == g
2433 * if coordsel != NULL:
2434 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2437 for (g = 0; g < header->npullcrds; ++g)
2439 /* was this group selected for application in WHAM? */
2440 if (coordsel && (coordsel->bUse[g] == FALSE))
2448 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2450 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2454 /* Pick the correct column index.
2455 Note that there is always exactly one displacement column.
2458 for (int j = 0; j < g; j++)
2460 column += nColThisCrd[j];
2462 column += nColCOMCrd[g];
2466 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2480 if (gUsed >= window->nPull)
2482 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2483 gUsed, window->nPull);
2486 if (opt->bCalcTauInt && !bGetMinMax)
2488 /* save time series for autocorrelation analysis */
2489 ntot = window->Ntot[gUsed];
2490 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2491 if (ntot >= lennow[gUsed])
2493 lennow[gUsed] += blocklen;
2494 srenew(window->ztime[gUsed], lennow[gUsed]);
2496 window->ztime[gUsed][ntot] = pos;
2499 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2504 while ( (ibin += bins) < 0)
2509 else if (ibin >= bins)
2511 while ( (ibin -= bins) >= bins)
2517 if (ibin >= 0 && ibin < bins)
2519 window->Histo[gUsed][ibin] += 1.;
2522 window->Ntot[gUsed]++;
2526 else if (t > opt->tmax)
2530 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2542 for (i = 0; i < ny; i++)
2548 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2549 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2550 t_UmbrellaHeader* header,
2551 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2554 real mintmp, maxtmp;
2556 printf("Reading %d tpr and pullf files\n", nfiles);
2558 /* min and max not given? */
2561 printf("Automatic determination of boundaries...\n");
2564 for (i = 0; i < nfiles; i++)
2566 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2568 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2570 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2571 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2573 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2575 read_pull_xf(fnPull[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2576 (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2577 if (maxtmp > opt->max)
2581 if (mintmp < opt->min)
2586 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2587 if (opt->bBoundsOnly)
2589 printf("Found option -boundsonly, now exiting.\n");
2593 /* store stepsize in profile */
2594 opt->dz = (opt->max-opt->min)/opt->bins;
2596 bool foundData = false;
2597 for (i = 0; i < nfiles; i++)
2599 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2601 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2603 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2604 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2606 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2608 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, NULL, NULL,
2609 (opt->nCoordsel > 0) ? &opt->coordsel[i] : NULL);
2610 if (window[i].Ntot[0] == 0)
2612 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2621 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2624 for (i = 0; i < nfiles; i++)
2633 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2635 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2636 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2638 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2640 int nlines, ny, i, ig;
2643 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2644 nlines = read_xvg(fn, &iact, &ny);
2645 if (nlines != nwins)
2647 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2650 for (i = 0; i < nlines; i++)
2652 if (window[i].nPull != ny)
2654 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2655 "number of pull groups is different in different simulations. That is not\n"
2656 "supported yet. Sorry.\n");
2658 for (ig = 0; ig < window[i].nPull; ig++)
2660 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2661 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2663 if (iact[ig][i] <= 0.0)
2665 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2672 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2675 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2676 * that -in case of limited sampling- the ACT may be severely underestimated.
2677 * Note: the g=1+2tau are overwritten.
2678 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2681 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2684 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2686 /* only evaluate within +- 3sigma of the Gausian */
2687 siglim = 3.0*opt->sigSmoothIact;
2688 siglim2 = gmx::square(siglim);
2689 /* pre-factor of Gaussian */
2690 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2691 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2693 for (i = 0; i < nwins; i++)
2695 snew(window[i].tausmooth, window[i].nPull);
2696 for (ig = 0; ig < window[i].nPull; ig++)
2700 pos = window[i].pos[ig];
2701 for (j = 0; j < nwins; j++)
2703 for (jg = 0; jg < window[j].nPull; jg++)
2705 dpos2 = gmx::square(window[j].pos[jg]-pos);
2706 if (dpos2 < siglim2)
2708 w = gaufact*std::exp(-dpos2*invtwosig2);
2710 tausm += w*window[j].tau[jg];
2711 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2712 w,dpos2,pos,gaufact,invtwosig2); */
2717 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2719 window[i].tausmooth[ig] = tausm;
2723 window[i].tausmooth[ig] = window[i].tau[ig];
2725 window[i].g[ig] = 1+2*tausm/window[i].dt;
2730 //! Stop integrating autoccorelation function when ACF drops under this value
2731 #define WHAM_AC_ZERO_LIMIT 0.05
2733 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2735 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2736 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2738 int i, ig, ncorr, ntot, j, k, *count, restart;
2739 real *corr, c0, dt, tmp;
2740 real *ztime, av, tausteps;
2741 FILE *fp, *fpcorr = 0;
2745 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2746 "time [ps]", "autocorrelation function", opt->oenv);
2750 for (i = 0; i < nwins; i++)
2752 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2754 ntot = window[i].Ntot[0];
2756 /* using half the maximum time as length of autocorrelation function */
2760 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2761 " points. Provide more pull data!", ntot);
2764 /* snew(corrSq,ncorr); */
2767 snew(window[i].tau, window[i].nPull);
2768 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2774 for (ig = 0; ig < window[i].nPull; ig++)
2776 if (ntot != window[i].Ntot[ig])
2778 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2779 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2781 ztime = window[i].ztime[ig];
2783 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2784 for (j = 0, av = 0; (j < ntot); j++)
2789 for (k = 0; (k < ncorr); k++)
2794 for (j = 0; (j < ntot); j += restart)
2796 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2798 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2800 /* corrSq[k] += tmp*tmp; */
2804 /* divide by nr of frames for each time displacement */
2805 for (k = 0; (k < ncorr); k++)
2807 /* count probably = (ncorr-k+(restart-1))/restart; */
2808 corr[k] = corr[k]/count[k];
2809 /* variance of autocorrelation function */
2810 /* corrSq[k]=corrSq[k]/count[k]; */
2812 /* normalize such that corr[0] == 0 */
2814 for (k = 0; (k < ncorr); k++)
2817 /* corrSq[k]*=c0*c0; */
2820 /* write ACFs in verbose mode */
2823 for (k = 0; (k < ncorr); k++)
2825 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2827 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2830 /* esimate integrated correlation time, fitting is too unstable */
2831 tausteps = 0.5*corr[0];
2832 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2833 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2835 tausteps += corr[j];
2838 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2839 Kumar et al, eq. 28 ff. */
2840 window[i].tau[ig] = tausteps*dt;
2841 window[i].g[ig] = 1+2*tausteps;
2842 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2853 /* plot IACT along reaction coordinate */
2854 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2855 if (output_env_get_print_xvgr_codes(opt->oenv))
2857 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2858 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2859 for (i = 0; i < nwins; i++)
2861 fprintf(fp, "# %3d ", i);
2862 for (ig = 0; ig < window[i].nPull; ig++)
2864 fprintf(fp, " %11g", window[i].tau[ig]);
2869 for (i = 0; i < nwins; i++)
2871 for (ig = 0; ig < window[i].nPull; ig++)
2873 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2876 if (opt->sigSmoothIact > 0.0)
2878 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2879 opt->sigSmoothIact);
2880 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2881 smoothIact(window, nwins, opt);
2882 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2883 if (output_env_get_print_xvgr_codes(opt->oenv))
2885 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2886 fprintf(fp, "@ s1 symbol color 2\n");
2888 for (i = 0; i < nwins; i++)
2890 for (ig = 0; ig < window[i].nPull; ig++)
2892 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2897 printf("Wrote %s\n", fn);
2901 * compute average and sigma of each umbrella histogram
2903 void averageSigma(t_UmbrellaWindow *window, int nwins)
2906 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2908 for (i = 0; i < nwins; i++)
2910 snew(window[i].aver, window[i].nPull);
2911 snew(window[i].sigma, window[i].nPull);
2913 ntot = window[i].Ntot[0];
2914 for (ig = 0; ig < window[i].nPull; ig++)
2916 ztime = window[i].ztime[ig];
2917 for (k = 0, av = 0.; k < ntot; k++)
2922 for (k = 0, sum2 = 0.; k < ntot; k++)
2927 sig = std::sqrt(sum2/ntot);
2928 window[i].aver[ig] = av;
2930 /* Note: This estimate for sigma is biased from the limited sampling.
2931 Correct sigma by n/(n-1) where n = number of independent
2932 samples. Only possible if IACT is known.
2936 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2937 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2941 window[i].sigma[ig] = sig;
2943 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2950 * Use histograms to compute average force on pull group.
2952 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2954 int i, j, bins = opt->bins, k;
2955 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2958 dz = (max-min)/bins;
2959 ztot = opt->max-min;
2962 /* Compute average displacement from histograms */
2963 for (j = 0; j < nWindows; ++j)
2965 snew(window[j].forceAv, window[j].nPull);
2966 for (k = 0; k < window[j].nPull; ++k)
2970 for (i = 0; i < opt->bins; ++i)
2972 temp = (1.0*i+0.5)*dz+min;
2973 distance = temp - window[j].pos[k];
2975 { /* in cyclic wham: */
2976 if (distance > ztot_half) /* |distance| < ztot_half */
2980 else if (distance < -ztot_half)
2985 w = window[j].Histo[k][i]/window[j].g[k];
2986 displAv += w*distance;
2988 /* Are we near min or max? We are getting wrong forces from the histgrams since
2989 the histograms are zero outside [min,max). Therefore, assume that the position
2990 on the other side of the histomgram center is equally likely. */
2993 posmirrored = window[j].pos[k]-distance;
2994 if (posmirrored >= max || posmirrored < min)
2996 displAv += -w*distance;
3003 /* average force from average displacement */
3004 window[j].forceAv[k] = displAv*window[j].k[k];
3005 /* sigma from average square displacement */
3006 /* window[j].sigma [k] = sqrt(displAv2); */
3007 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
3013 * Check if the complete reaction coordinate is covered by the histograms
3015 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
3016 t_UmbrellaOptions *opt)
3018 int i, ig, j, bins = opt->bins, bBoundary;
3019 real avcount = 0, z, relcount, *count;
3020 snew(count, opt->bins);
3022 for (j = 0; j < opt->bins; ++j)
3024 for (i = 0; i < nwins; i++)
3026 for (ig = 0; ig < window[i].nPull; ig++)
3028 count[j] += window[i].Histo[ig][j];
3031 avcount += 1.0*count[j];
3034 for (j = 0; j < bins; ++j)
3036 relcount = count[j]/avcount;
3037 z = (j+0.5)*opt->dz+opt->min;
3038 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3039 /* check for bins with no data */
3042 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3043 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3045 /* and check for poor sampling */
3046 else if (relcount < 0.005 && !bBoundary)
3048 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3054 /*! \brief Compute initial potential by integrating the average force
3056 * This speeds up the convergence by roughly a factor of 2
3058 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3060 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3061 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3064 dz = (opt->max-min)/bins;
3066 printf("Getting initial potential by integration.\n");
3068 /* Compute average displacement from histograms */
3069 computeAverageForce(window, nWindows, opt);
3071 /* Get force for each bin from all histograms in this bin, or, alternatively,
3072 if no histograms are inside this bin, from the closest histogram */
3075 for (j = 0; j < opt->bins; ++j)
3077 pos = (1.0*j+0.5)*dz+min;
3081 groupmin = winmin = 0;
3082 for (i = 0; i < nWindows; i++)
3084 for (ig = 0; ig < window[i].nPull; ig++)
3086 hispos = window[i].pos[ig];
3087 dist = std::abs(hispos-pos);
3088 /* average force within bin */
3092 fAv += window[i].forceAv[ig];
3094 /* at the same time, remember closest histogram */
3103 /* if no histogram found in this bin, use closest histogram */
3110 fAv = window[winmin].forceAv[groupmin];
3114 for (j = 1; j < opt->bins; ++j)
3116 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3119 /* cyclic wham: linearly correct possible offset */
3122 diff = (pot[bins-1]-pot[0])/(bins-1);
3123 for (j = 1; j < opt->bins; ++j)
3130 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3131 for (j = 0; j < opt->bins; ++j)
3133 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3136 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3139 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3140 energy offsets which are usually determined by wham
3141 First: turn pot into probabilities:
3143 for (j = 0; j < opt->bins; ++j)
3145 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3147 calc_z(pot, window, nWindows, opt, TRUE);
3153 //! Count number of words in an line
3154 static int wordcount(char *ptr)
3159 if (std::strlen(ptr) == 0)
3163 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3165 for (i = 0; (ptr[i] != '\0'); i++)
3167 is[cur] = isspace(ptr[i]);
3168 if ((i > 0) && (is[cur] && !is[1-cur]))
3177 /*! \brief Read input file for pull group selection (option -is)
3179 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3181 void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3184 int i, iline, n, len = STRLEN, temp;
3185 char *ptr = 0, *tmpbuf = 0;
3186 char fmt[1024], fmtign[1024];
3187 int block = 1, sizenow;
3189 fp = gmx_ffopen(opt->fnCoordSel, "r");
3190 opt->coordsel = NULL;
3195 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3200 if (iline >= sizenow)
3203 srenew(opt->coordsel, sizenow);
3205 opt->coordsel[iline].n = n;
3206 opt->coordsel[iline].nUse = 0;
3207 snew(opt->coordsel[iline].bUse, n);
3210 for (i = 0; i < n; i++)
3212 std::strcpy(fmt, fmtign);
3213 std::strcat(fmt, "%d");
3214 if (sscanf(ptr, fmt, &temp))
3216 opt->coordsel[iline].bUse[i] = (temp > 0);
3217 if (opt->coordsel[iline].bUse[i])
3219 opt->coordsel[iline].nUse++;
3222 std::strcat(fmtign, "%*s");
3226 opt->nCoordsel = iline;
3227 if (nTpr != opt->nCoordsel)
3229 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3233 printf("\nUse only these pull coordinates:\n");
3234 for (iline = 0; iline < nTpr; iline++)
3236 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3237 for (i = 0; i < opt->coordsel[iline].n; i++)
3239 if (opt->coordsel[iline].bUse[i])
3252 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3254 //! Number of elements in fnm (used for command line parsing)
3255 #define NFILE asize(fnm)
3257 //! The main gmx wham routine
3258 int gmx_wham(int argc, char *argv[])
3260 const char *desc[] = {
3261 "[THISMODULE] is an analysis program that implements the Weighted",
3262 "Histogram Analysis Method (WHAM). It is intended to analyze",
3263 "output files generated by umbrella sampling simulations to ",
3264 "compute a potential of mean force (PMF).[PAR]",
3266 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3267 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3268 "and, if multiple coordinates need to be analyzed, all used the same",
3269 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3270 "At present, three input modes are supported.",
3272 "* With option [TT]-it[tt], the user provides a file which contains the",
3273 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3274 " AND, with option [TT]-ix[tt], a file which contains file names of",
3275 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3276 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3277 " first pullx, etc.",
3278 "* Same as the previous input mode, except that the the user",
3279 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3280 " From the pull force the position in the umbrella potential is",
3281 " computed. This does not work with tabulated umbrella potentials.",
3282 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3283 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3284 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3285 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3286 " must be similar to the following::",
3289 " # Component selection: 0 0 1",
3291 " # Ref. Group 'TestAtom'",
3292 " # Nr. of pull groups 2",
3293 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3294 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3297 " The number of pull groups, umbrella positions, force constants, and names ",
3298 " may (of course) differ. Following the header, a time column and ",
3299 " a data column for each pull group follows (i.e. the displacement",
3300 " with respect to the umbrella center). Up to four pull groups are possible ",
3301 " per [REF].pdo[ref] file at present.[PAR]",
3302 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3303 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3304 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3305 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3306 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3307 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3308 "used, coordsel.dat looks like this::",
3314 "By default, the output files are::",
3316 " [TT]-o[tt] PMF output file",
3317 " [TT]-hist[tt] Histograms output file",
3319 "Always check whether the histograms sufficiently overlap.[PAR]",
3320 "The umbrella potential is assumed to be harmonic and the force constants are ",
3321 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3322 "a tabulated potential can be provided with [TT]-tab[tt].",
3327 "* [TT]-bins[tt] Number of bins used in analysis",
3328 "* [TT]-temp[tt] Temperature in the simulations",
3329 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3330 "* [TT]-auto[tt] Automatic determination of boundaries",
3331 "* [TT]-min,-max[tt] Boundaries of the profile",
3333 "The data points that are used to compute the profile",
3334 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3335 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3336 "umbrella window.[PAR]",
3337 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3338 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3339 "With energy output, the energy in the first bin is defined to be zero. ",
3340 "If you want the free energy at a different ",
3341 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3342 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3343 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3344 "[THISMODULE] will make use of the",
3345 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3346 "reaction coordinate will assumed be be neighbors.[PAR]",
3347 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3348 "which may be useful for, e.g. membranes.",
3353 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3358 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3359 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3360 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3361 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3362 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3363 "Because the IACTs can be severely underestimated in case of limited ",
3364 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3365 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3366 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3367 "integration of the ACFs while the ACFs are larger 0.05.",
3368 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3369 "less robust) method such as fitting to a double exponential, you can ",
3370 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3371 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3372 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3377 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3378 "otherwise the statistical error may be substantially underestimated. ",
3379 "More background and examples for the bootstrap technique can be found in ",
3380 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3381 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3382 "Four bootstrapping methods are supported and ",
3383 "selected with [TT]-bs-method[tt].",
3385 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3386 " data points, and the bootstrap is carried out by assigning random weights to the ",
3387 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3388 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3389 " statistical error is underestimated.",
3390 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3391 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3392 " (allowing duplication, i.e. sampling with replacement).",
3393 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3394 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3395 " divided into blocks and only histograms within each block are mixed. Note that ",
3396 " the histograms within each block must be representative for all possible histograms, ",
3397 " otherwise the statistical error is underestimated.",
3398 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3399 " such that the generated data points are distributed according the given histograms ",
3400 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3401 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3402 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3403 " Note that this method may severely underestimate the error in case of limited sampling, ",
3404 " that is if individual histograms do not represent the complete phase space at ",
3405 " the respective positions.",
3406 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3407 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3408 " and width of the umbrella histograms. That method yields similar error estimates ",
3409 " like method [TT]traj[tt].",
3411 "Bootstrapping output:",
3413 "* [TT]-bsres[tt] Average profile and standard deviations",
3414 "* [TT]-bsprof[tt] All bootstrapping profiles",
3416 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3417 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3421 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3422 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3423 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3424 static t_UmbrellaOptions opt;
3427 { "-min", FALSE, etREAL, {&opt.min},
3428 "Minimum coordinate in profile"},
3429 { "-max", FALSE, etREAL, {&opt.max},
3430 "Maximum coordinate in profile"},
3431 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3432 "Determine min and max automatically"},
3433 { "-bins", FALSE, etINT, {&opt.bins},
3434 "Number of bins in profile"},
3435 { "-temp", FALSE, etREAL, {&opt.Temperature},
3437 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3439 { "-v", FALSE, etBOOL, {&opt.verbose},
3441 { "-b", FALSE, etREAL, {&opt.tmin},
3442 "First time to analyse (ps)"},
3443 { "-e", FALSE, etREAL, {&opt.tmax},
3444 "Last time to analyse (ps)"},
3445 { "-dt", FALSE, etREAL, {&opt.dt},
3446 "Analyse only every dt ps"},
3447 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3448 "Write histograms and exit"},
3449 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3450 "Determine min and max and exit (with [TT]-auto[tt])"},
3451 { "-log", FALSE, etBOOL, {&opt.bLog},
3452 "Calculate the log of the profile before printing"},
3453 { "-unit", FALSE, etENUM, {en_unit},
3454 "Energy unit in case of log output" },
3455 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3456 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3457 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3458 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3459 { "-sym", FALSE, etBOOL, {&opt.bSym},
3460 "Symmetrize profile around z=0"},
3461 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3462 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3463 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3464 "Calculate integrated autocorrelation times and use in wham"},
3465 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3466 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3467 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3468 "When computing autocorrelation functions, restart computing every .. (ps)"},
3469 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3470 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3471 "during smoothing"},
3472 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3473 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3474 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3475 "Bootstrap method" },
3476 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3477 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3478 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3479 "Seed for bootstrapping. (-1 = use time)"},
3480 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3481 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3482 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3483 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3484 { "-stepout", FALSE, etINT, {&opt.stepchange},
3485 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3486 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3487 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3491 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3492 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3493 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3494 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3495 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3496 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3497 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3498 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3499 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3500 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3501 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3502 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3505 int i, j, l, nfiles, nwins, nfiles2;
3506 t_UmbrellaHeader header;
3507 t_UmbrellaWindow * window = NULL;
3508 double *profile, maxchange = 1e20;
3509 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3510 char **fninTpr, **fninPull, **fninPdo;
3512 FILE *histout, *profout;
3513 char xlabel[STRLEN], ylabel[256], title[256];
3516 opt.verbose = FALSE;
3517 opt.bHistOnly = FALSE;
3526 opt.coordsel = NULL;
3528 /* bootstrapping stuff */
3530 opt.bsMethod = bsMethod_hist;
3531 opt.tauBootStrap = 0.0;
3533 opt.histBootStrapBlockLength = 8;
3534 opt.bs_verbose = FALSE;
3539 opt.Temperature = 298;
3540 opt.Tolerance = 1e-6;
3541 opt.bBoundsOnly = FALSE;
3543 opt.bCalcTauInt = FALSE;
3544 opt.sigSmoothIact = 0.0;
3545 opt.bAllowReduceIact = TRUE;
3546 opt.bInitPotByIntegration = TRUE;
3547 opt.acTrestart = 1.0;
3548 opt.stepchange = 100;
3549 opt.stepUpdateContrib = 100;
3551 if (!parse_common_args(&argc, argv, 0,
3552 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3557 opt.unit = nenum(en_unit);
3558 opt.bsMethod = nenum(en_bsMethod);
3560 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3562 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3563 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3564 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3565 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3566 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3567 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3568 if (opt.bTab && opt.bPullf)
3570 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3571 "Provide pullx.xvg or pdo files!");
3574 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3576 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3578 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3580 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3581 "\n\n Check gmx wham -h !");
3584 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3585 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3586 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3587 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3588 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3590 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3591 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3592 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3593 if ( (bMinSet || bMaxSet) && bAutoSet)
3595 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3598 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3600 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3603 if (bMinSet && opt.bAuto)
3605 printf("Note: min and max given, switching off -auto.\n");
3609 if (opt.bTauIntGiven && opt.bCalcTauInt)
3611 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3612 "the autocorrelation times. Not both.");
3615 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3617 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3618 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3620 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3622 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3623 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3626 /* Reading gmx4/gmx5 pull output and tpr files */
3627 if (opt.bTpr || opt.bPullf || opt.bPullx)
3629 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3631 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3632 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3633 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3634 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3635 if (nfiles != nfiles2)
3637 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3638 opt.fnTpr, nfiles2, fnPull);
3641 /* Read file that selects the pull group to be used */
3642 if (opt.fnCoordSel != NULL)
3644 readPullCoordSelection(&opt, fninTpr, nfiles);
3647 window = initUmbrellaWindows(nfiles);
3648 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3651 { /* reading pdo files */
3652 if (opt.fnCoordSel != NULL)
3654 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3655 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3657 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3658 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3659 window = initUmbrellaWindows(nfiles);
3660 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3663 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3664 We can therefore get the units for the xlabel from the first coordinate. */
3665 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3669 /* enforce equal weight for all histograms? */
3672 enforceEqualWeights(window, nwins);
3675 /* write histograms */
3676 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3677 xlabel, "count", opt.oenv);
3678 for (l = 0; l < opt.bins; ++l)
3680 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3681 for (i = 0; i < nwins; ++i)
3683 for (j = 0; j < window[i].nPull; ++j)
3685 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3688 fprintf(histout, "\n");
3691 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3694 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3698 /* Using tabulated umbrella potential */
3701 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3704 /* Integrated autocorrelation times provided ? */
3705 if (opt.bTauIntGiven)
3707 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3710 /* Compute integrated autocorrelation times */
3711 if (opt.bCalcTauInt)
3713 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3716 /* calc average and sigma for each histogram
3717 (maybe required for bootstrapping. If not, this is fast anyhow) */
3718 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3720 averageSigma(window, nwins);
3723 /* Get initial potential by simple integration */
3724 if (opt.bInitPotByIntegration)
3726 guessPotByIntegration(window, nwins, &opt, xlabel);
3729 /* Check if complete reaction coordinate is covered */
3730 checkReactionCoordinateCovered(window, nwins, &opt);
3732 /* Calculate profile */
3733 snew(profile, opt.bins);
3741 if ( (i%opt.stepUpdateContrib) == 0)
3743 setup_acc_wham(profile, window, nwins, &opt);
3745 if (maxchange < opt.Tolerance)
3748 /* if (opt.verbose) */
3749 printf("Switched to exact iteration in iteration %d\n", i);
3751 calc_profile(profile, window, nwins, &opt, bExact);
3752 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3754 printf("\t%4d) Maximum change %e\n", i, maxchange);
3758 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3759 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3761 /* calc error from Kumar's formula */
3762 /* Unclear how the error propagates along reaction coordinate, therefore
3764 /* calc_error_kumar(profile,window, nwins,&opt); */
3766 /* Write profile in energy units? */
3769 prof_normalization_and_unit(profile, &opt);
3770 std::strcpy(ylabel, en_unit_label[opt.unit]);
3771 std::strcpy(title, "Umbrella potential");
3775 std::strcpy(ylabel, "Density of states");
3776 std::strcpy(title, "Density of states");
3779 /* symmetrize profile around z=0? */
3782 symmetrizeProfile(profile, &opt);
3785 /* write profile or density of states */
3786 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3787 for (i = 0; i < opt.bins; ++i)
3789 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3792 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3794 /* Bootstrap Method */
3797 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3798 opt2fn("-hist", NFILE, fnm),
3799 xlabel, ylabel, profile, window, nwins, &opt);
3803 freeUmbrellaWindows(window, nfiles);
3805 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3806 please_cite(stdout, "Hub2010");