2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull_params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/path.h"
80 #include "gromacs/utility/pleasecite.h"
81 #include "gromacs/utility/smalloc.h"
83 //! longest file names allowed in input files
84 #define WHAM_MAXFILELEN 2048
87 * enum for energy units
98 * enum for type of input files (pdos, tpr, or pullf)
108 /*! \brief enum for bootstrapping method
110 * These bootstrap methods are supported:
111 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
112 * (bsMethod_BayesianHist)
113 * - bootstrap complete histograms (bsMethod_hist)
114 * - bootstrap trajectories from given umbrella histograms. This generates new
115 * "synthetic" histograms (bsMethod_traj)
116 * - bootstrap trajectories from Gaussian with mu/sigma computed from
117 * the respective histogram (bsMethod_trajGauss). This gives very similar
118 * results compared to bsMethod_traj.
120 * ********************************************************************
121 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
122 * JS Hub, BL de Groot, D van der Spoel
123 * g_wham - A free weighted histogram analysis implementation including
124 * robust error and autocorrelation estimates,
125 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
126 * ********************************************************************
131 bsMethod_BayesianHist,
137 //! Parameters of one pull coodinate
140 int pull_type; //!< such as constraint, umbrella, ...
141 int geometry; //!< such as distance, direction, cylinder
142 int ngroup; //!< the number of pull groups involved
143 ivec dim; //!< pull dimension with geometry distance
144 int ndim; //!< nr of pull_dim != 0
145 real k; //!< force constants in tpr file
146 real init_dist; //!< reference displacement
147 char coord_unit[256]; //!< unit of the displacement
150 //! Parameters of the umbrella potentials
154 * \name Using umbrella pull code since gromacs 4.x
157 int npullcrds; //!< nr of umbrella pull coordinates for reading
158 t_pullcoord* pcrd; //!< the pull coordinates
159 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
160 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
161 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
165 * \name Using PDO files common until gromacs 3.x
173 char PullName[4][256];
175 double UmbCons[4][3];
179 //! Data in the umbrella histograms
182 int nPull; //!< nr of pull groups in this pdo or pullf/x file
183 double** Histo; //!< nPull histograms
184 double** cum; //!< nPull cumulative distribution functions
185 int nBin; //!< nr of bins. identical to opt->bins
186 double* k; //!< force constants for the nPull coords
187 double* pos; //!< umbrella positions for the nPull coords
188 double* z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
189 int* N; //!< nr of data points in nPull histograms.
190 int* Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
192 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
194 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
195 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
198 double* tau; //!< intetrated autocorrelation time (IACT)
199 double* tausmooth; //!< smoothed IACT
201 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
203 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
205 real** ztime; //!< input data z(t) as a function of time. Required to compute ACTs
207 /*! \brief average force estimated from average displacement, fAv=dzAv*k
209 * Used for integration to guess the potential.
212 real* aver; //!< average of histograms
213 real* sigma; //!< stddev of histograms
214 double* bsWeight; //!< for bootstrapping complete histograms with continuous weights
217 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
220 int n; //!< total nr of pull coords in this tpr file
221 int nUse; //!< nr of pull coords used
222 gmx_bool* bUse; //!< boolean array of size n. =1 if used, =0 if not
225 //! Parameters of WHAM
226 typedef struct // NOLINT(clang-analyzer-optin.performance.Padding)
232 const char *fnTpr, *fnPullf, *fnCoordSel;
233 const char *fnPdo, *fnPullx; //!< file names of input
234 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
235 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
237 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
238 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
239 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
240 t_coordselection* coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
243 * \name Basic WHAM options
246 int bins; //!< nr of bins, min, max, and dz of profile
248 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
249 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
252 * \name Output control
255 gmx_bool bLog; //!< energy output (instead of probability) for profile
256 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
257 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
258 /*! \brief after wham, set prof to zero at this z-position.
259 * When bootstrapping, set zProf0 to a "stable" reference position.
262 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
264 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
265 gmx_bool bAuto; //!< determine min and max automatically but do not exit
267 gmx_bool verbose; //!< more noisy wham mode
268 int stepchange; //!< print maximum change in prof after how many interations
269 gmx_output_env_t* oenv; //!< xvgr options
272 * \name Autocorrelation stuff
275 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
276 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
277 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
278 real acTrestart; //!< when computing ACT, time between restarting points
280 /* \brief Enforce the same weight for each umbella window, that is
281 * calculate with the same number of data points for
282 * each window. That can be reasonable, if the histograms
283 * have different length, but due to autocorrelation,
284 * a longer simulation should not have larger weightin wham.
290 * \name Bootstrapping stuff
293 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
295 /* \brief bootstrap method
297 * if == bsMethod_hist, consider complete histograms as independent
298 * data points and, hence, only mix complete histograms.
299 * if == bsMethod_BayesianHist, consider complete histograms
300 * as independent data points, but assign random weights
301 * to the histograms during the bootstrapping ("Bayesian bootstrap")
302 * In case of long correlations (e.g., inside a channel), these
303 * will yield a more realistic error.
304 * if == bsMethod_traj(Gauss), generate synthetic histograms
306 * histogram by generating an autocorrelated random sequence
307 * that is distributed according to the respective given
308 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
309 * (instead of from the umbrella histogram) to generate a new
314 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
317 /* \brief when mixing histograms, mix only histograms withing blocks
318 long the reaction coordinate xi. Avoids gaps along xi. */
319 int histBootStrapBlockLength;
321 int bsSeed; //!< random seed for bootstrapping
323 /* \brief Write cumulative distribution functions (CDFs) of histograms
324 and write the generated histograms for each bootstrap */
328 * \name tabulated umbrella potential stuff
332 double * tabX, *tabY, tabMin, tabMax, tabDz;
335 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
336 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
339 //! Make an umbrella window (may contain several histograms)
340 static t_UmbrellaWindow* initUmbrellaWindows(int nwin)
342 t_UmbrellaWindow* win;
345 for (i = 0; i < nwin; i++)
347 win[i].Histo = win[i].cum = nullptr;
348 win[i].k = win[i].pos = win[i].z = nullptr;
349 win[i].N = win[i].Ntot = nullptr;
350 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
351 win[i].bContrib = nullptr;
352 win[i].ztime = nullptr;
353 win[i].forceAv = nullptr;
354 win[i].aver = win[i].sigma = nullptr;
355 win[i].bsWeight = nullptr;
360 //! Delete an umbrella window (may contain several histograms)
361 static void freeUmbrellaWindows(t_UmbrellaWindow* win, int nwin)
364 for (i = 0; i < nwin; i++)
368 for (j = 0; j < win[i].nPull; j++)
370 sfree(win[i].Histo[j]);
375 for (j = 0; j < win[i].nPull; j++)
377 sfree(win[i].cum[j]);
382 for (j = 0; j < win[i].nPull; j++)
384 sfree(win[i].bContrib[j]);
396 sfree(win[i].tausmooth);
397 sfree(win[i].bContrib);
399 sfree(win[i].forceAv);
402 sfree(win[i].bsWeight);
408 * Read and setup tabulated umbrella potential
410 static void setup_tab(const char* fn, t_UmbrellaOptions* opt)
415 printf("Setting up tabulated potential from file %s\n", fn);
416 nl = read_xvg(fn, &y, &ny);
420 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
422 opt->tabMin = y[0][0];
423 opt->tabMax = y[0][nl - 1];
424 opt->tabDz = (opt->tabMax - opt->tabMin) / (nl - 1);
428 "The tabulated potential in %s must be provided in \n"
429 "ascending z-direction",
432 for (i = 0; i < nl - 1; i++)
434 if (std::abs(y[0][i + 1] - y[0][i] - opt->tabDz) > opt->tabDz / 1e6)
436 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
441 for (i = 0; i < nl; i++)
443 opt->tabX[i] = y[0][i];
444 opt->tabY[i] = y[1][i];
446 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n", opt->tabMin,
447 opt->tabMax, opt->tabDz);
450 //! Read the header of an PDO file (position, force const, nr of groups)
451 static void read_pdo_header(FILE* file, t_UmbrellaHeader* header, t_UmbrellaOptions* opt)
454 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
456 std::istringstream ist;
459 if (fgets(line, 2048, file) == nullptr)
461 gmx_fatal(FARGS, "Error reading header from pdo file\n");
464 ist >> Buffer0 >> Buffer1 >> Buffer2;
465 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
468 "This does not appear to be a valid pdo file. Found %s, expected %s\n"
469 "(Found in first line: `%s')\n",
470 Buffer1, "UMBRELLA", line);
472 if (std::strcmp(Buffer2, "3.0") != 0)
474 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
478 if (fgets(line, 2048, file) == nullptr)
480 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
484 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
486 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
487 if (header->nDim != 1)
489 gmx_fatal(FARGS, "Currently only supports one dimension");
493 if (fgets(line, 2048, file) == nullptr)
495 gmx_fatal(FARGS, "Error reading header from pdo file\n");
498 ist >> Buffer0 >> Buffer1 >> header->nSkip;
501 if (fgets(line, 2048, file) == nullptr)
503 gmx_fatal(FARGS, "Error reading header from pdo file\n");
506 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
509 if (fgets(line, 2048, file) == nullptr)
511 gmx_fatal(FARGS, "Error reading header from pdo file\n");
514 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
518 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip, header->Reference);
521 for (i = 0; i < header->nPull; ++i)
523 if (fgets(line, 2048, file) == nullptr)
525 gmx_fatal(FARGS, "Error reading header from pdo file\n");
528 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
529 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
530 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
534 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n", i,
535 header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
539 if (fgets(line, 2048, file) == nullptr)
541 gmx_fatal(FARGS, "Cannot read from file\n");
545 if (std::strcmp(Buffer3, "#####") != 0)
547 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
552 static char* fgets3(FILE* fp, char ptr[], int* len)
557 if (fgets(ptr, *len - 1, fp) == nullptr)
562 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
564 /* This line is longer than len characters, let's increase len! */
568 if (fgets(p - 1, STRLEN, fp) == nullptr)
573 slen = std::strlen(ptr);
574 if (ptr[slen - 1] == '\n')
576 ptr[slen - 1] = '\0';
582 /*! \brief Read the data columns of and PDO file.
584 * TO DO: Get rid of the scanf function to avoid the clang warning.
585 * At the moment, this warning is avoided by hiding the format string
586 * the variable fmtlf.
588 static void read_pdo_data(FILE* file,
589 t_UmbrellaHeader* header,
591 t_UmbrellaWindow* win,
592 t_UmbrellaOptions* opt,
597 int i, inttemp, bins, count, ntot;
598 real minval, maxval, minfound = 1e20, maxfound = -1e20;
599 double temp, time, time0 = 0, dt;
601 t_UmbrellaWindow* window = nullptr;
602 gmx_bool timeok, dt_ok = true;
603 char * tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
604 int len = STRLEN, dstep = 1;
605 const int blocklen = 4096;
606 int* lennow = nullptr;
614 window = win + fileno;
615 /* Need to alocate memory and set up structure */
616 window->nPull = header->nPull;
619 snew(window->Histo, window->nPull);
620 snew(window->z, window->nPull);
621 snew(window->k, window->nPull);
622 snew(window->pos, window->nPull);
623 snew(window->N, window->nPull);
624 snew(window->Ntot, window->nPull);
625 snew(window->g, window->nPull);
626 snew(window->bsWeight, window->nPull);
628 window->bContrib = nullptr;
630 if (opt->bCalcTauInt)
632 snew(window->ztime, window->nPull);
636 window->ztime = nullptr;
638 snew(lennow, window->nPull);
640 for (i = 0; i < window->nPull; ++i)
643 window->bsWeight[i] = 1.;
644 snew(window->Histo[i], bins);
645 window->k[i] = header->UmbCons[i][0];
646 window->pos[i] = header->UmbPos[i][0];
650 if (opt->bCalcTauInt)
652 window->ztime[i] = nullptr;
656 /* Done with setup */
662 minval = maxval = bins = 0; /* Get rid of warnings */
667 while ((ptr = fgets3(file, tmpbuf, &len)) != nullptr)
671 if (ptr[0] == '#' || std::strlen(ptr) < 2)
676 /* Initiate format string */
678 std::strcat(fmtign, "%*s");
680 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
681 /* Round time to fs */
682 time = 1.0 / 1000 * (gmx::roundToInt64(time * 1000));
684 /* get time step of pdo file */
694 dstep = gmx::roundToInt(opt->dt / dt);
702 window->dt = dt * dstep;
707 dt_ok = ((count - 1) % dstep == 0);
708 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
710 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
711 time,opt->tmin, opt->tmax, dt_ok,timeok); */
715 for (i = 0; i < header->nPull; ++i)
717 std::strcpy(fmt, fmtign);
718 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
719 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
720 if (sscanf(ptr, fmt, &temp))
722 temp += header->UmbPos[i][0];
736 if (opt->bCalcTauInt)
738 /* save time series for autocorrelation analysis */
739 ntot = window->Ntot[i];
740 if (ntot >= lennow[i])
742 lennow[i] += blocklen;
743 srenew(window->ztime[i], lennow[i]);
745 window->ztime[i][ntot] = temp;
749 temp /= (maxval - minval);
751 temp = std::floor(temp);
753 inttemp = static_cast<int>(temp);
760 else if (inttemp >= bins)
766 if (inttemp >= 0 && inttemp < bins)
768 window->Histo[i][inttemp] += 1.;
776 if (time > opt->tmax)
780 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
796 /*! \brief Set identical weights for all histograms
798 * Normally, the weight is given by the number data points in each
799 * histogram, together with the autocorrelation time. This can be overwritten
800 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
801 * an appropriate autocorrelation times instead of using this function.
803 static void enforceEqualWeights(t_UmbrellaWindow* window, int nWindows)
805 int i, k, j, NEnforced;
808 NEnforced = window[0].Ntot[0];
809 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
810 "non-weighted histogram analysis method. Ndata = %d\n",
812 /* enforce all histograms to have the same weight as the very first histogram */
814 for (j = 0; j < nWindows; ++j)
816 for (k = 0; k < window[j].nPull; ++k)
818 ratio = 1.0 * NEnforced / window[j].Ntot[k];
819 for (i = 0; i < window[0].nBin; ++i)
821 window[j].Histo[k][i] *= ratio;
823 window[j].N[k] = gmx::roundToInt(ratio * window[j].N[k]);
828 /*! \brief Simple linear interpolation between two given tabulated points
830 static double tabulated_pot(double dist, t_UmbrellaOptions* opt)
833 double pl, pu, dz, dp;
835 jl = static_cast<int>(std::floor((dist - opt->tabMin) / opt->tabDz));
837 if (jl < 0 || ju >= opt->tabNbins)
840 "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
841 "Provide an extended table.",
846 dz = dist - opt->tabX[jl];
847 dp = (pu - pl) * dz / opt->tabDz;
853 * Check which bins substiantially contribute (accelerates WHAM)
855 * Don't worry, that routine does not mean we compute the PMF in limited precision.
856 * After rapid convergence (using only substiantal contributions), we always switch to
859 static void setup_acc_wham(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
861 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
862 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
863 gmx_bool bAnyContrib;
864 static int bFirst = 1;
865 static double wham_contrib_lim;
869 for (i = 0; i < nWindows; ++i)
871 nGrptot += window[i].nPull;
873 wham_contrib_lim = opt->Tolerance / nGrptot;
876 ztot = opt->max - opt->min;
877 ztot_half = ztot / 2;
879 for (i = 0; i < nWindows; ++i)
881 if (!window[i].bContrib)
883 snew(window[i].bContrib, window[i].nPull);
885 for (j = 0; j < window[i].nPull; ++j)
887 if (!window[i].bContrib[j])
889 snew(window[i].bContrib[j], opt->bins);
892 for (k = 0; k < opt->bins; ++k)
894 temp = (1.0 * k + 0.5) * dz + min;
895 distance = temp - window[i].pos[j]; /* distance to umbrella center */
897 { /* in cyclic wham: */
898 if (distance > ztot_half) /* |distance| < ztot_half */
902 else if (distance < -ztot_half)
907 /* Note: there are two contributions to bin k in the wham equations:
908 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
909 ii) exp(- U/(BOLTZ*opt->Temperature))
910 where U is the umbrella potential
911 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
916 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
920 U = tabulated_pot(distance, opt); /* Use tabulated potential */
922 contrib1 = profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
923 contrib2 = window[i].N[j] * std::exp(-U / (BOLTZ * opt->Temperature) + window[i].z[j]);
924 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
925 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
926 if (window[i].bContrib[j][k])
932 /* If this histo is far outside min and max all bContrib may be FALSE,
933 causing a floating point exception later on. To avoid that, switch
937 for (k = 0; k < opt->bins; ++k)
939 window[i].bContrib[j][k] = TRUE;
946 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
947 "Evaluating only %d of %d expressions.\n\n",
948 wham_contrib_lim, nContrib, nTot);
953 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n", nContrib, nTot);
958 //! Compute the PMF (one of the two main WHAM routines)
959 static void calc_profile(double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
961 double ztot_half, ztot, min = opt->min, dz = opt->dz;
963 ztot = opt->max - opt->min;
964 ztot_half = ztot / 2;
970 int nthreads = gmx_omp_get_max_threads();
971 int thread_id = gmx_omp_get_thread_num();
973 int i0 = thread_id * opt->bins / nthreads;
974 int i1 = std::min(opt->bins, ((thread_id + 1) * opt->bins) / nthreads);
976 for (i = i0; i < i1; ++i)
979 double num, denom, invg, temp = 0, distance, U = 0;
981 for (j = 0; j < nWindows; ++j)
983 for (k = 0; k < window[j].nPull; ++k)
985 invg = 1.0 / window[j].g[k] * window[j].bsWeight[k];
986 temp = (1.0 * i + 0.5) * dz + min;
987 num += invg * window[j].Histo[k][i];
989 if (!(bExact || window[j].bContrib[k][i]))
993 distance = temp - window[j].pos[k]; /* distance to umbrella center */
995 { /* in cyclic wham: */
996 if (distance > ztot_half) /* |distance| < ztot_half */
1000 else if (distance < -ztot_half)
1008 U = 0.5 * window[j].k[k] * gmx::square(distance); /* harmonic potential assumed. */
1012 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1014 denom += invg * window[j].N[k]
1015 * std::exp(-U / (BOLTZ * opt->Temperature) + window[j].z[k]);
1018 profile[i] = num / denom;
1021 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1025 //! Compute the free energy offsets z (one of the two main WHAM routines)
1026 static double calc_z(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
1028 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1029 double maxglob = -1e20;
1031 ztot = opt->max - opt->min;
1032 ztot_half = ztot / 2;
1034 #pragma omp parallel
1038 int nthreads = gmx_omp_get_max_threads();
1039 int thread_id = gmx_omp_get_thread_num();
1041 int i0 = thread_id * nWindows / nthreads;
1042 int i1 = std::min(nWindows, ((thread_id + 1) * nWindows) / nthreads);
1043 double maxloc = -1e20;
1045 for (i = i0; i < i1; ++i)
1047 double total = 0, temp, distance, U = 0;
1050 for (j = 0; j < window[i].nPull; ++j)
1053 for (k = 0; k < window[i].nBin; ++k)
1055 if (!(bExact || window[i].bContrib[j][k]))
1059 temp = (1.0 * k + 0.5) * dz + min;
1060 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1062 { /* in cyclic wham: */
1063 if (distance > ztot_half) /* |distance| < ztot_half */
1067 else if (distance < -ztot_half)
1075 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
1079 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1081 total += profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
1083 /* Avoid floating point exception if window is far outside min and max */
1086 total = -std::log(total);
1092 temp = std::abs(total - window[i].z[j]);
1097 window[i].z[j] = total;
1100 /* Now get maximum maxloc from the threads and put in maxglob */
1101 if (maxloc > maxglob)
1103 #pragma omp critical
1105 if (maxloc > maxglob)
1112 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1118 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1119 static void symmetrizeProfile(double* profile, t_UmbrellaOptions* opt)
1121 int i, j, bins = opt->bins;
1122 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1125 if (min > 0. || max < 0.)
1127 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n", opt->min,
1133 for (i = 0; i < bins; i++)
1135 z = min + (i + 0.5) * dz;
1137 /* bin left of zsym */
1138 j = gmx::roundToInt((zsym - min) / dz) - 1;
1139 if (j >= 0 && (j + 1) < bins)
1141 /* interpolate profile linearly between bins j and j+1 */
1142 z1 = min + (j + 0.5) * dz;
1144 profsym = profile[j] + (profile[j + 1] - profile[j]) / dz * deltaz;
1145 /* average between left and right */
1146 prof2[i] = 0.5 * (profsym + profile[i]);
1150 prof2[i] = profile[i];
1154 std::memcpy(profile, prof2, bins * sizeof(double));
1158 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1159 static void prof_normalization_and_unit(double* profile, t_UmbrellaOptions* opt)
1162 double unit_factor = 1., diff;
1166 /* Not log? Nothing to do! */
1172 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1173 if (opt->unit == en_kT)
1177 else if (opt->unit == en_kJ)
1179 unit_factor = BOLTZ * opt->Temperature;
1181 else if (opt->unit == en_kCal)
1183 unit_factor = (BOLTZ / CAL2JOULE) * opt->Temperature;
1187 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1190 for (i = 0; i < bins; i++)
1192 if (profile[i] > 0.0)
1194 profile[i] = -std::log(profile[i]) * unit_factor;
1198 /* shift to zero at z=opt->zProf0 */
1199 if (!opt->bProf0Set)
1205 /* Get bin with shortest distance to opt->zProf0
1206 (-0.5 from bin position and +0.5 from rounding cancel) */
1207 imin = static_cast<int>((opt->zProf0 - opt->min) / opt->dz);
1212 else if (imin >= bins)
1216 diff = profile[imin];
1220 for (i = 0; i < bins; i++)
1226 //! Make an array of random integers (used for bootstrapping)
1227 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine* rng)
1229 gmx::UniformIntDistribution<int> dist(0, blockLength - 1);
1231 int ipull, blockBase, nr, ipullRandom;
1233 if (blockLength == 0)
1235 blockLength = nPull;
1238 for (ipull = 0; ipull < nPull; ipull++)
1240 blockBase = (ipull / blockLength) * blockLength;
1242 { /* make sure nothing bad happens in the last block */
1243 nr = dist(*rng); // [0,blockLength-1]
1244 ipullRandom = blockBase + nr;
1245 } while (ipullRandom >= nPull);
1246 if (ipullRandom < 0 || ipullRandom >= nPull)
1249 "Ups, random iWin = %d, nPull = %d, nr = %d, "
1250 "blockLength = %d, blockBase = %d\n",
1251 ipullRandom, nPull, nr, blockLength, blockBase);
1253 randomArray[ipull] = ipullRandom;
1257 /*! \brief Set pull group information of a synthetic histogram
1259 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1260 * but it is not required if we bootstrap complete histograms.
1262 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow* synthWindow, t_UmbrellaWindow* thisWindow, int pullid)
1264 synthWindow->N[0] = thisWindow->N[pullid];
1265 synthWindow->Histo[0] = thisWindow->Histo[pullid];
1266 synthWindow->pos[0] = thisWindow->pos[pullid];
1267 synthWindow->z[0] = thisWindow->z[pullid];
1268 synthWindow->k[0] = thisWindow->k[pullid];
1269 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1270 synthWindow->g[0] = thisWindow->g[pullid];
1271 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1274 /*! \brief Calculate cumulative distribution function of of all histograms.
1276 * This allow to create random number sequences
1277 * which are distributed according to the histograms. Required to generate
1278 * the "synthetic" histograms for the Bootstrap method
1280 static void calc_cumulatives(t_UmbrellaWindow* window,
1282 t_UmbrellaOptions* opt,
1291 if (opt->bs_verbose)
1293 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1294 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1298 for (i = 0; i < nWindows; i++)
1300 snew(window[i].cum, window[i].nPull);
1301 for (j = 0; j < window[i].nPull; j++)
1303 snew(window[i].cum[j], nbin + 1);
1304 window[i].cum[j][0] = 0.;
1305 for (k = 1; k <= nbin; k++)
1307 window[i].cum[j][k] = window[i].cum[j][k - 1] + window[i].Histo[j][k - 1];
1310 /* normalize CDFs. Ensure cum[nbin]==1 */
1311 last = window[i].cum[j][nbin];
1312 for (k = 0; k <= nbin; k++)
1314 window[i].cum[j][k] /= last;
1319 printf("Cumulative distribution functions of all histograms created.\n");
1320 if (opt->bs_verbose)
1322 for (k = 0; k <= nbin; k++)
1324 fprintf(fp, "%g\t", opt->min + k * opt->dz);
1325 for (i = 0; i < nWindows; i++)
1327 for (j = 0; j < window[i].nPull; j++)
1329 fprintf(fp, "%g\t", window[i].cum[j][k]);
1334 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1340 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1342 * This is used to generate a random sequence distributed according to a histogram
1344 static void searchCumulative(const double xx[], int n, double x, int* j)
1352 jm = (ju + jl) >> 1;
1366 else if (x == xx[n - 1])
1376 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1377 static void create_synthetic_histo(t_UmbrellaWindow* synthWindow,
1378 t_UmbrellaWindow* thisWindow,
1380 t_UmbrellaOptions* opt)
1382 int N, i, nbins, r_index, ibin;
1383 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1386 N = thisWindow->N[pullid];
1387 dt = thisWindow->dt;
1388 nbins = thisWindow->nBin;
1390 /* tau = autocorrelation time */
1391 if (opt->tauBootStrap > 0.0)
1393 tausteps = opt->tauBootStrap / dt;
1395 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1397 /* calc tausteps from g=1+2tausteps */
1398 g = thisWindow->g[pullid];
1399 tausteps = (g - 1) / 2;
1404 "When generating hypothetical trajectories from given umbrella histograms,\n"
1405 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1406 "cannot be predicted. You have 3 options:\n"
1407 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1408 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1410 " with option -iiact for all umbrella windows.\n"
1411 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1412 " Use option (3) only if you are sure what you're doing, you may severely\n"
1413 " underestimate the error if a too small ACT is given.\n");
1414 gmx_fatal(FARGS, "%s", errstr);
1417 synthWindow->N[0] = N;
1418 synthWindow->pos[0] = thisWindow->pos[pullid];
1419 synthWindow->z[0] = thisWindow->z[pullid];
1420 synthWindow->k[0] = thisWindow->k[pullid];
1421 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1422 synthWindow->g[0] = thisWindow->g[pullid];
1423 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1425 for (i = 0; i < nbins; i++)
1427 synthWindow->Histo[0][i] = 0.;
1430 if (opt->bsMethod == bsMethod_trajGauss)
1432 sig = thisWindow->sigma[pullid];
1433 mu = thisWindow->aver[pullid];
1436 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1438 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1440 z = a*x + sqrt(1-a^2)*y
1441 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1442 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1444 C(t) = exp(-t/tau) with tau=-1/ln(a)
1446 Then, use error function to turn the Gaussian random variable into a uniformly
1447 distributed one in [0,1]. Eventually, use cumulative distribution function of
1448 histogram to get random variables distributed according to histogram.
1449 Note: The ACT of the flat distribution and of the generated histogram is not
1450 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1452 a = std::exp(-1.0 / tausteps);
1453 ap = std::sqrt(1.0 - a * a);
1454 invsqrt2 = 1.0 / std::sqrt(2.0);
1456 /* init random sequence */
1457 x = opt->normalDistribution(opt->rng);
1459 if (opt->bsMethod == bsMethod_traj)
1461 /* bootstrap points from the umbrella histograms */
1462 for (i = 0; i < N; i++)
1464 y = opt->normalDistribution(opt->rng);
1466 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1467 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1469 r = 0.5 * (1 + std::erf(x * invsqrt2));
1470 searchCumulative(thisWindow->cum[pullid], nbins + 1, r, &r_index);
1471 synthWindow->Histo[0][r_index] += 1.;
1474 else if (opt->bsMethod == bsMethod_trajGauss)
1476 /* bootstrap points from a Gaussian with the same average and sigma
1477 as the respective umbrella histogram. The idea was, that -given
1478 limited sampling- the bootstrapped histograms are otherwise biased
1479 from the limited sampling of the US histos. However, bootstrapping from
1480 the Gaussian seems to yield a similar estimate. */
1484 y = opt->normalDistribution(opt->rng);
1487 ibin = static_cast<int>(std::floor((z - opt->min) / opt->dz));
1492 while ((ibin += nbins) < 0) {}
1494 else if (ibin >= nbins)
1496 while ((ibin -= nbins) >= nbins) {}
1500 if (ibin >= 0 && ibin < nbins)
1502 synthWindow->Histo[0][ibin] += 1.;
1509 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1513 /*! \brief Write all histograms to a file
1515 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1516 * sets of bootstrapped histograms.
1518 static void print_histograms(const char* fnhist,
1519 t_UmbrellaWindow* window,
1522 t_UmbrellaOptions* opt,
1525 std::string fn, title;
1531 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1532 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1536 fn = gmx_strdup(fnhist);
1537 title = gmx::formatString("Umbrella histograms");
1540 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1543 /* Write histograms */
1544 for (l = 0; l < bins; ++l)
1546 fprintf(fp, "%e\t", (l + 0.5) * opt->dz + opt->min);
1547 for (i = 0; i < nWindows; ++i)
1549 for (j = 0; j < window[i].nPull; ++j)
1551 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1558 printf("Wrote %s\n", fn.c_str());
1561 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1562 static void setRandomBsWeights(t_UmbrellaWindow* synthwin, int nAllPull, t_UmbrellaOptions* opt)
1566 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1570 /* generate ordered random numbers between 0 and nAllPull */
1571 for (i = 0; i < nAllPull - 1; i++)
1573 r[i] = dist(opt->rng);
1575 std::sort(r, r + nAllPull - 1);
1576 r[nAllPull - 1] = 1.0 * nAllPull;
1578 synthwin[0].bsWeight[0] = r[0];
1579 for (i = 1; i < nAllPull; i++)
1581 synthwin[i].bsWeight[0] = r[i] - r[i - 1];
1584 /* avoid to have zero weight by adding a tiny value */
1585 for (i = 0; i < nAllPull; i++)
1587 if (synthwin[i].bsWeight[0] < 1e-5)
1589 synthwin[i].bsWeight[0] = 1e-5;
1596 //! The main bootstrapping routine
1597 static void do_bootstrapping(const char* fnres,
1603 t_UmbrellaWindow* window,
1605 t_UmbrellaOptions* opt)
1607 t_UmbrellaWindow* synthWindow;
1608 double * bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1609 int i, j, *randomArray = nullptr, winid, pullid, ib;
1610 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1612 gmx_bool bExact = FALSE;
1614 /* init random generator */
1615 if (opt->bsSeed == 0)
1617 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1619 opt->rng.seed(opt->bsSeed);
1621 snew(bsProfile, opt->bins);
1622 snew(bsProfiles_av, opt->bins);
1623 snew(bsProfiles_av2, opt->bins);
1625 /* Create array of all pull groups. Note that different windows
1626 may have different nr of pull groups
1627 First: Get total nr of pull groups */
1629 for (i = 0; i < nWindows; i++)
1631 nAllPull += window[i].nPull;
1633 snew(allPull_winId, nAllPull);
1634 snew(allPull_pullId, nAllPull);
1636 /* Setup one array of all pull groups */
1637 for (i = 0; i < nWindows; i++)
1639 for (j = 0; j < window[i].nPull; j++)
1641 allPull_winId[iAllPull] = i;
1642 allPull_pullId[iAllPull] = j;
1647 /* setup stuff for synthetic windows */
1648 snew(synthWindow, nAllPull);
1649 for (i = 0; i < nAllPull; i++)
1651 synthWindow[i].nPull = 1;
1652 synthWindow[i].nBin = opt->bins;
1653 snew(synthWindow[i].Histo, 1);
1654 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1656 snew(synthWindow[i].Histo[0], opt->bins);
1658 snew(synthWindow[i].N, 1);
1659 snew(synthWindow[i].pos, 1);
1660 snew(synthWindow[i].z, 1);
1661 snew(synthWindow[i].k, 1);
1662 snew(synthWindow[i].bContrib, 1);
1663 snew(synthWindow[i].g, 1);
1664 snew(synthWindow[i].bsWeight, 1);
1667 switch (opt->bsMethod)
1670 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1671 please_cite(stdout, "Hub2006");
1673 case bsMethod_BayesianHist:
1674 /* just copy all histogams into synthWindow array */
1675 for (i = 0; i < nAllPull; i++)
1677 winid = allPull_winId[i];
1678 pullid = allPull_pullId[i];
1679 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1683 case bsMethod_trajGauss: calc_cumulatives(window, nWindows, opt, fnhist, xlabel); break;
1684 default: gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1687 /* do bootstrapping */
1688 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1689 for (ib = 0; ib < opt->nBootStrap; ib++)
1691 printf(" *******************************************\n"
1692 " ******** Start bootstrap nr %d ************\n"
1693 " *******************************************\n",
1696 switch (opt->bsMethod)
1699 /* bootstrap complete histograms from given histograms */
1700 srenew(randomArray, nAllPull);
1701 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1702 for (i = 0; i < nAllPull; i++)
1704 winid = allPull_winId[randomArray[i]];
1705 pullid = allPull_pullId[randomArray[i]];
1706 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1709 case bsMethod_BayesianHist:
1710 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1711 setRandomBsWeights(synthWindow, nAllPull, opt);
1714 case bsMethod_trajGauss:
1715 /* create new histos from given histos, that is generate new hypothetical
1717 for (i = 0; i < nAllPull; i++)
1719 winid = allPull_winId[i];
1720 pullid = allPull_pullId[i];
1721 create_synthetic_histo(synthWindow + i, window + winid, pullid, opt);
1726 /* write histos in case of verbose output */
1727 if (opt->bs_verbose)
1729 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1736 std::memcpy(bsProfile, profile, opt->bins * sizeof(double)); /* use profile as guess */
1739 if ((i % opt->stepUpdateContrib) == 0)
1741 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1743 if (maxchange < opt->Tolerance)
1747 if (((i % opt->stepchange) == 0 || i == 1) && i != 0)
1749 printf("\t%4d) Maximum change %e\n", i, maxchange);
1751 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1753 } while ((maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance
1755 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1759 prof_normalization_and_unit(bsProfile, opt);
1762 /* symmetrize profile around z=0 */
1765 symmetrizeProfile(bsProfile, opt);
1768 /* save stuff to get average and stddev */
1769 for (i = 0; i < opt->bins; i++)
1772 bsProfiles_av[i] += tmp;
1773 bsProfiles_av2[i] += tmp * tmp;
1774 fprintf(fp, "%e\t%e\n", (i + 0.5) * opt->dz + opt->min, tmp);
1776 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1780 /* write average and stddev */
1781 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1782 if (output_env_get_print_xvgr_codes(opt->oenv))
1784 fprintf(fp, "@TYPE xydy\n");
1786 for (i = 0; i < opt->bins; i++)
1788 bsProfiles_av[i] /= opt->nBootStrap;
1789 bsProfiles_av2[i] /= opt->nBootStrap;
1790 tmp = bsProfiles_av2[i] - gmx::square(bsProfiles_av[i]);
1791 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1792 fprintf(fp, "%e\t%e\t%e\n", (i + 0.5) * opt->dz + opt->min, bsProfiles_av[i], stddev);
1795 printf("Wrote boot strap result to %s\n", fnres);
1798 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1799 static int whaminFileType(char* fn)
1802 len = std::strlen(fn);
1803 if (std::strcmp(fn + len - 3, "tpr") == 0)
1807 else if (std::strcmp(fn + len - 3, "xvg") == 0 || std::strcmp(fn + len - 6, "xvg.gz") == 0)
1809 return whamin_pullxf;
1811 else if (std::strcmp(fn + len - 3, "pdo") == 0 || std::strcmp(fn + len - 6, "pdo.gz") == 0)
1817 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1821 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1822 static void read_wham_in(const char* fn, char*** filenamesRet, int* nfilesRet, t_UmbrellaOptions* opt)
1824 char **filename = nullptr, tmp[WHAM_MAXFILELEN + 2];
1825 int nread, sizenow, i, block = 1;
1828 fp = gmx_ffopen(fn, "r");
1831 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1833 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1835 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1837 if (nread >= sizenow)
1840 srenew(filename, sizenow);
1841 for (i = sizenow - block; i < sizenow; i++)
1843 snew(filename[i], WHAM_MAXFILELEN);
1846 /* remove newline if there is one */
1847 if (tmp[std::strlen(tmp) - 1] == '\n')
1849 tmp[std::strlen(tmp) - 1] = '\0';
1851 std::strcpy(filename[nread], tmp);
1854 printf("Found file %s in %s\n", filename[nread], fn);
1858 *filenamesRet = filename;
1862 //! Open a file or a pipe to a gzipped file
1863 static FILE* open_pdo_pipe(const char* fn, t_UmbrellaOptions* opt, gmx_bool* bPipeOpen)
1865 char Buffer[2048], gunzip[1024], *Path = nullptr;
1866 FILE* pipe = nullptr;
1867 static gmx_bool bFirst = true;
1869 /* gzipped pdo file? */
1870 if ((std::strcmp(fn + std::strlen(fn) - 3, ".gz") == 0))
1872 /* search gunzip executable */
1873 if (!(Path = getenv("GMX_PATH_GZIP")))
1875 if (gmx_fexist("/bin/gunzip"))
1877 sprintf(gunzip, "%s", "/bin/gunzip");
1879 else if (gmx_fexist("/usr/bin/gunzip"))
1881 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1886 "Cannot find executable gunzip in /bin or /usr/bin.\n"
1887 "You may want to define the path to gunzip "
1888 "with the environment variable GMX_PATH_GZIP.");
1893 sprintf(gunzip, "%s/gunzip", Path);
1894 if (!gmx_fexist(gunzip))
1897 "Cannot find executable %s. Please define the path to gunzip"
1898 " in the environmental varialbe GMX_PATH_GZIP.",
1904 printf("Using gunzip executable %s\n", gunzip);
1907 if (!gmx_fexist(fn))
1909 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1911 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1914 printf("Executing command '%s'\n", Buffer);
1917 if ((pipe = popen(Buffer, "r")) == nullptr)
1919 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1922 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1928 pipe = gmx_ffopen(fn, "r");
1935 //! Close file or pipe
1936 static void pdo_close_file(FILE* fp)
1945 //! Reading all pdo files
1946 static void read_pdo_files(char** fn, int nfiles, t_UmbrellaHeader* header, t_UmbrellaWindow* window, t_UmbrellaOptions* opt)
1949 real mintmp, maxtmp, done = 0.;
1952 /* char Buffer0[1000]; */
1956 gmx_fatal(FARGS, "No files found. Hick.");
1959 /* if min and max are not given, get min and max from the input files */
1962 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1965 for (i = 0; i < nfiles; ++i)
1967 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1968 /*fgets(Buffer0,999,file);
1969 fprintf(stderr,"First line '%s'\n",Buffer0); */
1970 done = 100.0 * (i + 1) / nfiles;
1971 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
1977 read_pdo_header(file, header, opt);
1978 /* here only determine min and max of this window */
1979 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1980 if (maxtmp > opt->max)
1984 if (mintmp < opt->min)
1990 pdo_close_file(file);
1998 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1999 if (opt->bBoundsOnly)
2001 printf("Found option -boundsonly, now exiting.\n");
2005 /* store stepsize in profile */
2006 opt->dz = (opt->max - opt->min) / opt->bins;
2008 /* Having min and max, we read in all files */
2009 /* Loop over all files */
2010 for (i = 0; i < nfiles; ++i)
2012 done = 100.0 * (i + 1) / nfiles;
2013 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
2019 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2020 read_pdo_header(file, header, opt);
2021 /* load data into window */
2022 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
2023 if ((window + i)->Ntot[0] == 0)
2025 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2029 pdo_close_file(file);
2037 for (i = 0; i < nfiles; ++i)
2044 //! translate 0/1 to N/Y to write pull dimensions
2045 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2047 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2048 static void read_tpr_header(const char* fn, t_UmbrellaHeader* header, t_UmbrellaOptions* opt, t_coordselection* coordsel)
2050 t_inputrec irInstance;
2051 t_inputrec* ir = &irInstance;
2053 static int first = 1;
2055 /* printf("Reading %s \n",fn); */
2056 read_tpx_state(fn, ir, &state, nullptr);
2060 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2062 if (ir->pull->ncoord == 0)
2064 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2067 /* Read overall pull info */
2068 header->npullcrds = ir->pull->ncoord;
2069 header->bPrintCOM = ir->pull->bPrintCOM;
2070 header->bPrintRefValue = ir->pull->bPrintRefValue;
2071 header->bPrintComp = ir->pull->bPrintComp;
2073 /* Read pull coordinates */
2074 snew(header->pcrd, header->npullcrds);
2075 for (int i = 0; i < ir->pull->ncoord; i++)
2077 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2078 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2079 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2080 /* Angle type coordinates are handled fully in degrees in gmx wham.
2081 * The pull "distance" appears with a power of -2 in the force constant,
2082 * so we need to multiply with the internal units (radians for angle)
2083 * to user units (degrees for an angle) with the same power.
2086 ir->pull->coord[i].k
2087 / gmx::square(pull_conversion_factor_internal2userinput(&ir->pull->coord[i]));
2088 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2090 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2091 header->pcrd[i].ndim =
2092 header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2094 std::strcpy(header->pcrd[i].coord_unit, pull_coordinate_units(&ir->pull->coord[i]));
2096 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2099 "Seems like you did free-energy perturbation, and you perturbed the force "
2101 " This is not supported.\n");
2103 if (coordsel && (coordsel->n != ir->pull->ncoord))
2106 "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2107 "coordinate selection file (option -is)\n",
2108 ir->pull->ncoord, fn, coordsel->n);
2112 /* Check pull coords for consistency */
2114 ivec thedim = { 0, 0, 0 };
2115 bool geometryIsSet = false;
2116 for (int i = 0; i < ir->pull->ncoord; i++)
2118 if (coordsel == nullptr || coordsel->bUse[i])
2120 if (header->pcrd[i].pull_type != epullUMBRELLA)
2123 "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only "
2124 "umbrella coodinates can enter WHAM.\n"
2125 "If you have umrella and non-umbrella coordinates, you can select the "
2126 "umbrella coordinates with gmx wham -is\n",
2127 fn, i + 1, epull_names[header->pcrd[i].pull_type]);
2131 geom = header->pcrd[i].geometry;
2132 copy_ivec(header->pcrd[i].dim, thedim);
2133 geometryIsSet = true;
2135 if (geom != header->pcrd[i].geometry)
2138 "%s: Your pull coordinates have different pull geometry (coordinate 1: "
2139 "%s, coordinate %d: %s)\n"
2140 "If you want to use only some pull coordinates in WHAM, please select "
2141 "them with option gmx wham -is\n",
2142 fn, epullg_names[geom], i + 1, epullg_names[header->pcrd[i].geometry]);
2144 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY]
2145 || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2148 "%s: Your pull coordinates have different pull dimensions (coordinate 1: "
2149 "%s %s %s, coordinate %d: %s %s %s)\n"
2150 "If you want to use only some pull coordinates in WHAM, please select "
2151 "them with option gmx wham -is\n",
2152 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i + 1,
2153 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2154 int2YN(header->pcrd[i].dim[ZZ]));
2156 if (header->pcrd[i].geometry == epullgCYL)
2158 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2162 "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2163 "However, found dimensions [%s %s %s]\n",
2164 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2165 int2YN(header->pcrd[i].dim[ZZ]));
2168 if (header->pcrd[i].k <= 0.0)
2171 "%s: Pull coordinate %d has force constant of of %g.\n"
2172 "That doesn't seem to be an Umbrella tpr.\n",
2173 fn, i + 1, header->pcrd[i].k);
2178 if (opt->verbose || first)
2180 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2182 for (int i = 0; i < ir->pull->ncoord; i++)
2184 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2185 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2189 "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d "
2190 "dimensions). Used: %%s\n",
2192 for (int i = 0; i < ir->pull->ncoord; i++)
2194 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2195 printf(fmt, epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2196 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2197 int2YN(header->pcrd[i].dim[ZZ]), header->pcrd[i].ndim, use ? "Yes" : "No");
2198 printf("\tPull group coordinates of %d groups expected in pullx files.\n",
2199 ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2201 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2202 header->bPrintRefValue ? "" : " not");
2204 if (!opt->verbose && first)
2206 printf("\tUse option -v to see this output for all input tpr files\n\n");
2212 //! Read pullx.xvg or pullf.xvg
2213 static void read_pull_xf(const char* fn,
2214 t_UmbrellaHeader* header,
2215 t_UmbrellaWindow* window,
2216 t_UmbrellaOptions* opt,
2217 gmx_bool bGetMinMax,
2220 t_coordselection* coordsel)
2222 double ** y = nullptr, pos = 0., t, force, time0 = 0., dt;
2223 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2224 int nColExpect, ntot, column;
2225 real min, max, minfound = 1e20, maxfound = -1e20;
2226 gmx_bool dt_ok, timeok;
2227 const char* quantity;
2228 const int blocklen = 4096;
2229 int* lennow = nullptr;
2230 static gmx_bool bFirst = TRUE;
2233 * Data columns in pull output:
2234 * - in force output pullf.xvg:
2235 * No reference columns, one column per pull coordinate
2237 * - in position output pullx.xvg:
2238 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2239 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2240 * be written separately into pullx file, but this is not supported and throws an error below;
2241 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2244 if (header->bPrintComp && opt->bPullx)
2248 "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2249 "(mdp option pull-print-components). Provide the pull force files instead (with "
2253 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2254 snew(nColThisCrd, header->npullcrds);
2255 snew(nColCOMCrd, header->npullcrds);
2256 snew(nColRefCrd, header->npullcrds);
2260 /* pullf reading: simply one column per coordinate */
2261 for (g = 0; g < header->npullcrds; g++)
2270 /* pullx reading. Note explanation above. */
2271 for (g = 0; g < header->npullcrds; g++)
2273 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2274 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim * header->pcrd[g].ngroup : 0);
2275 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2279 nColExpect = 1; /* time column */
2280 for (g = 0; g < header->npullcrds; g++)
2282 nColExpect += nColThisCrd[g];
2285 /* read pullf or pullx. Could be optimized if min and max are given. */
2286 nt = read_xvg(fn, &y, &ny);
2288 /* Check consistency */
2289 quantity = opt->bPullx ? "position" : "force";
2292 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2294 if (bFirst || opt->verbose)
2296 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2297 for (i = 0; i < header->npullcrds; i++)
2299 printf("\tColumns for pull coordinate %d\n", i + 1);
2300 printf("\t\treaction coordinate: %d\n"
2301 "\t\tcenter-of-mass of groups: %d\n"
2302 "\t\treference position column: %s\n",
2303 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2305 printf("\tFound %d times in %s\n", nt, fn);
2308 if (nColExpect != ny)
2311 "Expected %d columns (including time column) in %s, but found %d."
2312 " Maybe you confused options -if and -ix ?",
2313 nColExpect, fn, ny);
2324 window->dt = y[0][1] - y[0][0];
2326 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2328 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2331 /* Need to alocate memory and set up structure for windows */
2334 /* Use only groups selected with option -is file */
2335 if (header->npullcrds != coordsel->n)
2338 "tpr file contains %d pull groups, but expected %d from group selection "
2340 header->npullcrds, coordsel->n);
2342 window->nPull = coordsel->nUse;
2346 window->nPull = header->npullcrds;
2349 window->nBin = bins;
2350 snew(window->Histo, window->nPull);
2351 snew(window->z, window->nPull);
2352 snew(window->k, window->nPull);
2353 snew(window->pos, window->nPull);
2354 snew(window->N, window->nPull);
2355 snew(window->Ntot, window->nPull);
2356 snew(window->g, window->nPull);
2357 snew(window->bsWeight, window->nPull);
2358 window->bContrib = nullptr;
2360 if (opt->bCalcTauInt)
2362 snew(window->ztime, window->nPull);
2366 window->ztime = nullptr;
2368 snew(lennow, window->nPull);
2370 for (g = 0; g < window->nPull; ++g)
2373 window->bsWeight[g] = 1.;
2375 window->Ntot[g] = 0;
2377 snew(window->Histo[g], bins);
2379 if (opt->bCalcTauInt)
2381 window->ztime[g] = nullptr;
2385 /* Copying umbrella center and force const is more involved since not
2386 all pull groups from header (tpr file) may be used in window variable */
2387 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2389 if (coordsel && !coordsel->bUse[g])
2393 window->k[gUsed] = header->pcrd[g].k;
2394 window->pos[gUsed] = header->pcrd[g].init_dist;
2399 { /* only determine min and max */
2402 min = max = bins = 0; /* Get rid of warnings */
2406 for (i = 0; i < nt; i++)
2408 /* Do you want that time frame? */
2409 t = 1.0 / 1000 * (gmx::roundToInt64((y[0][i] * 1000))); /* round time to fs */
2411 /* get time step of pdo file and get dstep from opt->dt */
2421 dstep = gmx::roundToInt(opt->dt / dt);
2429 window->dt = dt * dstep;
2433 dt_ok = (i % dstep == 0);
2434 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2436 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2437 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2440 /* Note: if coordsel == NULL:
2441 * all groups in pullf/x file are stored in this window, and gUsed == g
2442 * if coordsel != NULL:
2443 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2446 for (g = 0; g < header->npullcrds; ++g)
2448 /* was this group selected for application in WHAM? */
2449 if (coordsel && !coordsel->bUse[g])
2457 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2458 force = y[g + 1][i];
2459 pos = -force / header->pcrd[g].k + header->pcrd[g].init_dist;
2463 /* Pick the correct column index.
2464 Note that there is always exactly one displacement column.
2467 for (int j = 0; j < g; j++)
2469 column += nColThisCrd[j];
2474 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2488 if (gUsed >= window->nPull)
2491 "gUsed too large (%d, nPull=%d). This error should have been "
2493 gUsed, window->nPull);
2496 if (opt->bCalcTauInt && !bGetMinMax)
2498 /* save time series for autocorrelation analysis */
2499 ntot = window->Ntot[gUsed];
2500 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2501 if (ntot >= lennow[gUsed])
2503 lennow[gUsed] += blocklen;
2504 srenew(window->ztime[gUsed], lennow[gUsed]);
2506 window->ztime[gUsed][ntot] = pos;
2509 ibin = static_cast<int>(std::floor((pos - min) / (max - min) * bins));
2514 while ((ibin += bins) < 0) {}
2516 else if (ibin >= bins)
2518 while ((ibin -= bins) >= bins) {}
2521 if (ibin >= 0 && ibin < bins)
2523 window->Histo[gUsed][ibin] += 1.;
2526 window->Ntot[gUsed]++;
2530 else if (t > opt->tmax)
2534 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2546 for (i = 0; i < ny; i++)
2552 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2553 static void read_tpr_pullxf_files(char** fnTprs,
2556 t_UmbrellaHeader* header,
2557 t_UmbrellaWindow* window,
2558 t_UmbrellaOptions* opt)
2561 real mintmp, maxtmp;
2563 printf("Reading %d tpr and pullf files\n", nfiles);
2565 /* min and max not given? */
2568 printf("Automatic determination of boundaries...\n");
2571 for (i = 0; i < nfiles; i++)
2573 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2575 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2577 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2578 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2581 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2583 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2584 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2585 if (maxtmp > opt->max)
2589 if (mintmp < opt->min)
2594 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2595 if (opt->bBoundsOnly)
2597 printf("Found option -boundsonly, now exiting.\n");
2601 /* store stepsize in profile */
2602 opt->dz = (opt->max - opt->min) / opt->bins;
2604 bool foundData = false;
2605 for (i = 0; i < nfiles; i++)
2607 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2609 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2611 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2612 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2615 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2617 read_pull_xf(fnPull[i], header, window + i, opt, FALSE, nullptr, nullptr,
2618 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2619 if (window[i].Ntot[0] == 0)
2621 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2631 "No data points were found in pullf/pullx files. Maybe you need to specify the "
2635 for (i = 0; i < nfiles; i++)
2644 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2646 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2647 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2649 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow* window, int nwins, const char* fn)
2651 int nlines, ny, i, ig;
2654 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2655 nlines = read_xvg(fn, &iact, &ny);
2656 if (nlines != nwins)
2658 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2661 for (i = 0; i < nlines; i++)
2663 if (window[i].nPull != ny)
2666 "You are providing autocorrelation times with option -iiact and the\n"
2667 "number of pull groups is different in different simulations. That is not\n"
2668 "supported yet. Sorry.\n");
2670 for (ig = 0; ig < window[i].nPull; ig++)
2672 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2673 window[i].g[ig] = 1 + 2 * iact[ig][i] / window[i].dt;
2675 if (iact[ig][i] <= 0.0)
2677 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2684 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2687 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2688 * that -in case of limited sampling- the ACT may be severely underestimated.
2689 * Note: the g=1+2tau are overwritten.
2690 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2693 static void smoothIact(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
2696 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2698 /* only evaluate within +- 3sigma of the Gausian */
2699 siglim = 3.0 * opt->sigSmoothIact;
2700 siglim2 = gmx::square(siglim);
2701 /* pre-factor of Gaussian */
2702 gaufact = 1.0 / (std::sqrt(2 * M_PI) * opt->sigSmoothIact);
2703 invtwosig2 = 0.5 / gmx::square(opt->sigSmoothIact);
2705 for (i = 0; i < nwins; i++)
2707 snew(window[i].tausmooth, window[i].nPull);
2708 for (ig = 0; ig < window[i].nPull; ig++)
2712 pos = window[i].pos[ig];
2713 for (j = 0; j < nwins; j++)
2715 for (jg = 0; jg < window[j].nPull; jg++)
2717 dpos2 = gmx::square(window[j].pos[jg] - pos);
2718 if (dpos2 < siglim2)
2720 w = gaufact * std::exp(-dpos2 * invtwosig2);
2722 tausm += w * window[j].tau[jg];
2723 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2724 w,dpos2,pos,gaufact,invtwosig2); */
2729 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2731 window[i].tausmooth[ig] = tausm;
2735 window[i].tausmooth[ig] = window[i].tau[ig];
2737 window[i].g[ig] = 1 + 2 * tausm / window[i].dt;
2742 //! Stop integrating autoccorelation function when ACF drops under this value
2743 #define WHAM_AC_ZERO_LIMIT 0.05
2745 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2747 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow* window,
2749 t_UmbrellaOptions* opt,
2753 int i, ig, ncorr, ntot, j, k, *count, restart;
2754 real *corr, c0, dt, tmp;
2755 real *ztime, av, tausteps;
2756 FILE *fp, *fpcorr = nullptr;
2760 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2761 "time [ps]", "autocorrelation function", opt->oenv);
2765 for (i = 0; i < nwins; i++)
2767 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...",
2768 100. * (i + 1) / nwins);
2770 ntot = window[i].Ntot[0];
2772 /* using half the maximum time as length of autocorrelation function */
2777 "Tryig to estimtate autocorrelation time from only %d"
2778 " points. Provide more pull data!",
2782 /* snew(corrSq,ncorr); */
2785 snew(window[i].tau, window[i].nPull);
2786 restart = gmx::roundToInt(opt->acTrestart / dt);
2792 for (ig = 0; ig < window[i].nPull; ig++)
2794 if (ntot != window[i].Ntot[ig])
2797 "Encountered different nr of frames in different pull groups.\n"
2798 "That should not happen. (%d and %d)\n",
2799 ntot, window[i].Ntot[ig]);
2801 ztime = window[i].ztime[ig];
2803 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2804 for (j = 0, av = 0; (j < ntot); j++)
2809 for (k = 0; (k < ncorr); k++)
2814 for (j = 0; (j < ntot); j += restart)
2816 for (k = 0; (k < ncorr) && (j + k < ntot); k++)
2818 tmp = (ztime[j] - av) * (ztime[j + k] - av);
2820 /* corrSq[k] += tmp*tmp; */
2824 /* divide by nr of frames for each time displacement */
2825 for (k = 0; (k < ncorr); k++)
2827 /* count probably = (ncorr-k+(restart-1))/restart; */
2828 corr[k] = corr[k] / count[k];
2829 /* variance of autocorrelation function */
2830 /* corrSq[k]=corrSq[k]/count[k]; */
2832 /* normalize such that corr[0] == 0 */
2834 for (k = 0; (k < ncorr); k++)
2837 /* corrSq[k]*=c0*c0; */
2840 /* write ACFs in verbose mode */
2843 for (k = 0; (k < ncorr); k++)
2845 fprintf(fpcorr, "%g %g\n", k * dt, corr[k]);
2847 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2850 /* esimate integrated correlation time, fitting is too unstable */
2851 tausteps = 0.5 * corr[0];
2852 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2853 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2855 tausteps += corr[j];
2858 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2859 Kumar et al, eq. 28 ff. */
2860 window[i].tau[ig] = tausteps * dt;
2861 window[i].g[ig] = 1 + 2 * tausteps;
2862 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2873 /* plot IACT along reaction coordinate */
2874 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2875 if (output_env_get_print_xvgr_codes(opt->oenv))
2877 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2878 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2879 for (i = 0; i < nwins; i++)
2881 fprintf(fp, "# %3d ", i);
2882 for (ig = 0; ig < window[i].nPull; ig++)
2884 fprintf(fp, " %11g", window[i].tau[ig]);
2889 for (i = 0; i < nwins; i++)
2891 for (ig = 0; ig < window[i].nPull; ig++)
2893 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2896 if (opt->sigSmoothIact > 0.0)
2898 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = "
2900 opt->sigSmoothIact);
2901 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2902 smoothIact(window, nwins, opt);
2903 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2904 if (output_env_get_print_xvgr_codes(opt->oenv))
2906 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2907 fprintf(fp, "@ s1 symbol color 2\n");
2909 for (i = 0; i < nwins; i++)
2911 for (ig = 0; ig < window[i].nPull; ig++)
2913 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2918 printf("Wrote %s\n", fn);
2922 * compute average and sigma of each umbrella histogram
2924 static void averageSigma(t_UmbrellaWindow* window, int nwins)
2927 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2929 for (i = 0; i < nwins; i++)
2931 snew(window[i].aver, window[i].nPull);
2932 snew(window[i].sigma, window[i].nPull);
2934 ntot = window[i].Ntot[0];
2935 for (ig = 0; ig < window[i].nPull; ig++)
2937 ztime = window[i].ztime[ig];
2938 for (k = 0, av = 0.; k < ntot; k++)
2943 for (k = 0, sum2 = 0.; k < ntot; k++)
2945 diff = ztime[k] - av;
2946 sum2 += diff * diff;
2948 sig = std::sqrt(sum2 / ntot);
2949 window[i].aver[ig] = av;
2951 /* Note: This estimate for sigma is biased from the limited sampling.
2952 Correct sigma by n/(n-1) where n = number of independent
2953 samples. Only possible if IACT is known.
2957 nSamplesIndep = window[i].N[ig] / (window[i].tau[ig] / window[i].dt);
2958 window[i].sigma[ig] = sig * nSamplesIndep / (nSamplesIndep - 1);
2962 window[i].sigma[ig] = sig;
2964 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2971 * Use histograms to compute average force on pull group.
2973 static void computeAverageForce(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
2975 int i, j, bins = opt->bins, k;
2976 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2979 dz = (max - min) / bins;
2980 ztot = opt->max - min;
2981 ztot_half = ztot / 2;
2983 /* Compute average displacement from histograms */
2984 for (j = 0; j < nWindows; ++j)
2986 snew(window[j].forceAv, window[j].nPull);
2987 for (k = 0; k < window[j].nPull; ++k)
2991 for (i = 0; i < opt->bins; ++i)
2993 temp = (1.0 * i + 0.5) * dz + min;
2994 distance = temp - window[j].pos[k];
2996 { /* in cyclic wham: */
2997 if (distance > ztot_half) /* |distance| < ztot_half */
3001 else if (distance < -ztot_half)
3006 w = window[j].Histo[k][i] / window[j].g[k];
3007 displAv += w * distance;
3009 /* Are we near min or max? We are getting wrong forces from the histgrams since
3010 the histograms are zero outside [min,max). Therefore, assume that the position
3011 on the other side of the histomgram center is equally likely. */
3014 posmirrored = window[j].pos[k] - distance;
3015 if (posmirrored >= max || posmirrored < min)
3017 displAv += -w * distance;
3024 /* average force from average displacement */
3025 window[j].forceAv[k] = displAv * window[j].k[k];
3026 /* sigma from average square displacement */
3027 /* window[j].sigma [k] = sqrt(displAv2); */
3028 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
3034 * Check if the complete reaction coordinate is covered by the histograms
3036 static void checkReactionCoordinateCovered(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
3038 int i, ig, j, bins = opt->bins;
3040 real avcount = 0, z, relcount, *count;
3041 snew(count, opt->bins);
3043 for (j = 0; j < opt->bins; ++j)
3045 for (i = 0; i < nwins; i++)
3047 for (ig = 0; ig < window[i].nPull; ig++)
3049 count[j] += window[i].Histo[ig][j];
3052 avcount += 1.0 * count[j];
3055 for (j = 0; j < bins; ++j)
3057 relcount = count[j] / avcount;
3058 z = (j + 0.5) * opt->dz + opt->min;
3059 bBoundary = j < bins / 20 || (bins - j) > bins / 20;
3060 /* check for bins with no data */
3064 "\nWARNING, no data point in bin %d (z=%g) !\n"
3065 "You may not get a reasonable profile. Check your histograms!\n",
3068 /* and check for poor sampling */
3069 else if (relcount < 0.005 && !bBoundary)
3071 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3077 /*! \brief Compute initial potential by integrating the average force
3079 * This speeds up the convergence by roughly a factor of 2
3081 static void guessPotByIntegration(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, const char* xlabel)
3083 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3084 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3087 dz = (opt->max - min) / bins;
3089 printf("Getting initial potential by integration.\n");
3091 /* Compute average displacement from histograms */
3092 computeAverageForce(window, nWindows, opt);
3094 /* Get force for each bin from all histograms in this bin, or, alternatively,
3095 if no histograms are inside this bin, from the closest histogram */
3098 for (j = 0; j < opt->bins; ++j)
3100 pos = (1.0 * j + 0.5) * dz + min;
3104 groupmin = winmin = 0;
3105 for (i = 0; i < nWindows; i++)
3107 for (ig = 0; ig < window[i].nPull; ig++)
3109 hispos = window[i].pos[ig];
3110 dist = std::abs(hispos - pos);
3111 /* average force within bin */
3115 fAv += window[i].forceAv[ig];
3117 /* at the same time, remember closest histogram */
3126 /* if no histogram found in this bin, use closest histogram */
3133 fAv = window[winmin].forceAv[groupmin];
3137 for (j = 1; j < opt->bins; ++j)
3139 pot[j] = pot[j - 1] - 0.5 * dz * (f[j - 1] + f[j]);
3142 /* cyclic wham: linearly correct possible offset */
3145 diff = (pot[bins - 1] - pot[0]) / (bins - 1);
3146 for (j = 1; j < opt->bins; ++j)
3153 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3154 for (j = 0; j < opt->bins; ++j)
3156 fprintf(fp, "%g %g\n", (j + 0.5) * dz + opt->min, pot[j]);
3159 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3162 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3163 energy offsets which are usually determined by wham
3164 First: turn pot into probabilities:
3166 for (j = 0; j < opt->bins; ++j)
3168 pot[j] = std::exp(-pot[j] / (BOLTZ * opt->Temperature));
3170 calc_z(pot, window, nWindows, opt, TRUE);
3176 //! Count number of words in an line
3177 static int wordcount(char* ptr)
3182 if (std::strlen(ptr) == 0)
3186 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3188 for (i = 0; (ptr[i] != '\0'); i++)
3190 is[cur] = isspace(ptr[i]);
3191 if ((i > 0) && (is[cur] && !is[1 - cur]))
3200 /*! \brief Read input file for pull group selection (option -is)
3202 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3204 static void readPullCoordSelection(t_UmbrellaOptions* opt, char** fnTpr, int nTpr)
3207 int i, iline, n, len = STRLEN, temp;
3208 char *ptr = nullptr, *tmpbuf = nullptr;
3209 char fmt[1024], fmtign[1024];
3210 int block = 1, sizenow;
3212 fp = gmx_ffopen(opt->fnCoordSel, "r");
3213 opt->coordsel = nullptr;
3218 while ((ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3223 if (iline >= sizenow)
3226 srenew(opt->coordsel, sizenow);
3228 opt->coordsel[iline].n = n;
3229 opt->coordsel[iline].nUse = 0;
3230 snew(opt->coordsel[iline].bUse, n);
3233 for (i = 0; i < n; i++)
3235 std::strcpy(fmt, fmtign);
3236 std::strcat(fmt, "%d");
3237 if (sscanf(ptr, fmt, &temp))
3239 opt->coordsel[iline].bUse[i] = (temp > 0);
3240 if (opt->coordsel[iline].bUse[i])
3242 opt->coordsel[iline].nUse++;
3245 std::strcat(fmtign, "%*s");
3249 opt->nCoordsel = iline;
3250 if (nTpr != opt->nCoordsel)
3252 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel, opt->fnCoordSel);
3255 printf("\nUse only these pull coordinates:\n");
3256 for (iline = 0; iline < nTpr; iline++)
3258 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse,
3259 opt->coordsel[iline].n);
3260 for (i = 0; i < opt->coordsel[iline].n; i++)
3262 if (opt->coordsel[iline].bUse[i])
3264 printf(" %d", i + 1);
3275 #define WHAMBOOLXOR(a, b) (((!(a)) && (b)) || ((a) && (!(b))))
3277 //! Number of elements in fnm (used for command line parsing)
3278 #define NFILE asize(fnm)
3280 //! The main gmx wham routine
3281 int gmx_wham(int argc, char* argv[])
3283 const char* desc[] = {
3284 "[THISMODULE] is an analysis program that implements the Weighted",
3285 "Histogram Analysis Method (WHAM). It is intended to analyze",
3286 "output files generated by umbrella sampling simulations to ",
3287 "compute a potential of mean force (PMF).[PAR]",
3289 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3290 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3291 "and, if multiple coordinates need to be analyzed, all used the same",
3292 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3293 "At present, three input modes are supported.",
3295 "* With option [TT]-it[tt], the user provides a file which contains the",
3296 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3297 " AND, with option [TT]-ix[tt], a file which contains file names of",
3298 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3299 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3300 " first pullx, etc.",
3301 "* Same as the previous input mode, except that the user",
3302 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3303 " From the pull force the position in the umbrella potential is",
3304 " computed. This does not work with tabulated umbrella potentials.",
3305 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] ",
3307 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3308 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3309 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file ",
3310 " header must be similar to the following::",
3313 " # Component selection: 0 0 1",
3315 " # Ref. Group 'TestAtom'",
3316 " # Nr. of pull groups 2",
3317 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3318 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3321 " The number of pull groups, umbrella positions, force constants, and names ",
3322 " may (of course) differ. Following the header, a time column and ",
3323 " a data column for each pull group follows (i.e. the displacement",
3324 " with respect to the umbrella center). Up to four pull groups are possible ",
3325 " per [REF].pdo[ref] file at present.[PAR]",
3326 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If ",
3328 "some of the pull coordinates should be used, a pull coordinate selection file (option ",
3329 "[TT]-is[tt]) can ",
3330 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3331 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr ",
3333 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. ",
3335 "If you have three tpr files, each containing 4 pull coordinates, but only pull ",
3336 "coordinates 1 and 2 should be ",
3337 "used, coordsel.dat looks like this::",
3343 "By default, the output files are::",
3345 " [TT]-o[tt] PMF output file",
3346 " [TT]-hist[tt] Histograms output file",
3348 "Always check whether the histograms sufficiently overlap.[PAR]",
3349 "The umbrella potential is assumed to be harmonic and the force constants are ",
3350 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force ",
3352 "a tabulated potential can be provided with [TT]-tab[tt].",
3357 "* [TT]-bins[tt] Number of bins used in analysis",
3358 "* [TT]-temp[tt] Temperature in the simulations",
3359 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3360 "* [TT]-auto[tt] Automatic determination of boundaries",
3361 "* [TT]-min,-max[tt] Boundaries of the profile",
3363 "The data points that are used to compute the profile",
3364 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3365 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3366 "umbrella window.[PAR]",
3367 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3368 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3369 "With energy output, the energy in the first bin is defined to be zero. ",
3370 "If you want the free energy at a different ",
3371 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3372 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3373 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3374 "[THISMODULE] will make use of the",
3375 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3376 "reaction coordinate will assumed be be neighbors.[PAR]",
3377 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3378 "which may be useful for, e.g. membranes.",
3383 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3384 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3389 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3390 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3391 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3392 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3393 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3394 "Because the IACTs can be severely underestimated in case of limited ",
3395 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3396 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3397 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3398 "integration of the ACFs while the ACFs are larger 0.05.",
3399 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3400 "less robust) method such as fitting to a double exponential, you can ",
3401 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3402 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3403 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the ",
3409 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3410 "otherwise the statistical error may be substantially underestimated. ",
3411 "More background and examples for the bootstrap technique can be found in ",
3412 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3413 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3414 "Four bootstrapping methods are supported and ",
3415 "selected with [TT]-bs-method[tt].",
3417 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3418 " data points, and the bootstrap is carried out by assigning random weights to the ",
3419 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3420 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3421 " statistical error is underestimated.",
3422 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3423 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3424 " (allowing duplication, i.e. sampling with replacement).",
3425 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3426 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3427 " divided into blocks and only histograms within each block are mixed. Note that ",
3428 " the histograms within each block must be representative for all possible histograms, ",
3429 " otherwise the statistical error is underestimated.",
3430 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3431 " such that the generated data points are distributed according the given histograms ",
3432 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3433 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3434 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3435 " Note that this method may severely underestimate the error in case of limited ",
3437 " that is if individual histograms do not represent the complete phase space at ",
3438 " the respective positions.",
3439 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3440 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3441 " and width of the umbrella histograms. That method yields similar error estimates ",
3442 " like method [TT]traj[tt].",
3444 "Bootstrapping output:",
3446 "* [TT]-bsres[tt] Average profile and standard deviations",
3447 "* [TT]-bsprof[tt] All bootstrapping profiles",
3449 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3450 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3454 const char* en_unit[] = { nullptr, "kJ", "kCal", "kT", nullptr };
3455 const char* en_unit_label[] = { "", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr };
3456 const char* en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3457 static t_UmbrellaOptions opt;
3460 { "-min", FALSE, etREAL, { &opt.min }, "Minimum coordinate in profile" },
3461 { "-max", FALSE, etREAL, { &opt.max }, "Maximum coordinate in profile" },
3462 { "-auto", FALSE, etBOOL, { &opt.bAuto }, "Determine min and max automatically" },
3463 { "-bins", FALSE, etINT, { &opt.bins }, "Number of bins in profile" },
3464 { "-temp", FALSE, etREAL, { &opt.Temperature }, "Temperature" },
3465 { "-tol", FALSE, etREAL, { &opt.Tolerance }, "Tolerance" },
3466 { "-v", FALSE, etBOOL, { &opt.verbose }, "Verbose mode" },
3467 { "-b", FALSE, etREAL, { &opt.tmin }, "First time to analyse (ps)" },
3468 { "-e", FALSE, etREAL, { &opt.tmax }, "Last time to analyse (ps)" },
3469 { "-dt", FALSE, etREAL, { &opt.dt }, "Analyse only every dt ps" },
3470 { "-histonly", FALSE, etBOOL, { &opt.bHistOnly }, "Write histograms and exit" },
3474 { &opt.bBoundsOnly },
3475 "Determine min and max and exit (with [TT]-auto[tt])" },
3476 { "-log", FALSE, etBOOL, { &opt.bLog }, "Calculate the log of the profile before printing" },
3477 { "-unit", FALSE, etENUM, { en_unit }, "Energy unit in case of log output" },
3482 "Define profile to 0.0 at this position (with [TT]-log[tt])" },
3487 "Create cyclic/periodic profile. Assumes min and max are the same point." },
3488 { "-sym", FALSE, etBOOL, { &opt.bSym }, "Symmetrize profile around z=0" },
3493 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)" },
3497 { &opt.bCalcTauInt },
3498 "Calculate integrated autocorrelation times and use in wham" },
3502 { &opt.sigSmoothIact },
3503 "Smooth autocorrelation times along reaction coordinate with Gaussian of this "
3504 "[GRK]sigma[grk]" },
3508 { &opt.acTrestart },
3509 "When computing autocorrelation functions, restart computing every .. (ps)" },
3513 { &opt.bAllowReduceIact },
3514 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3515 "during smoothing" },
3519 { &opt.nBootStrap },
3520 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3521 { "-bs-method", FALSE, etENUM, { en_bsMethod }, "Bootstrap method" },
3525 { &opt.tauBootStrap },
3526 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is "
3528 { "-bs-seed", FALSE, etINT, { &opt.bsSeed }, "Seed for bootstrapping. (-1 = use time)" },
3532 { &opt.histBootStrapBlockLength },
3533 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]." },
3537 { &opt.bs_verbose },
3538 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap." },
3542 { &opt.stepchange },
3543 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])" },
3547 { &opt.stepUpdateContrib },
3548 "HIDDENUpdate table with significan contributions to WHAM every ... iterations" },
3552 { efDAT, "-ix", "pullx-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3553 { efDAT, "-if", "pullf-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3554 { efDAT, "-it", "tpr-files", ffOPTRD }, /* wham input: tprs */
3555 { efDAT, "-ip", "pdo-files", ffOPTRD }, /* wham input: pdo files (gmx3 style) */
3556 { efDAT, "-is", "coordsel", ffOPTRD }, /* input: select pull coords to use */
3557 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3558 { efXVG, "-hist", "histo", ffWRITE }, /* output file for histograms */
3559 { efXVG, "-oiact", "iact", ffOPTWR }, /* writing integrated autocorrelation times */
3560 { efDAT, "-iiact", "iact-in", ffOPTRD }, /* reading integrated autocorrelation times */
3561 { efXVG, "-bsres", "bsResult", ffOPTWR }, /* average and errors of bootstrap analysis */
3562 { efXVG, "-bsprof", "bsProfs", ffOPTWR }, /* output file for bootstrap profiles */
3563 { efDAT, "-tab", "umb-pot", ffOPTRD }, /* Tabulated umbrella potential (if not harmonic) */
3566 int i, j, l, nfiles, nwins, nfiles2;
3567 t_UmbrellaHeader header;
3568 t_UmbrellaWindow* window = nullptr;
3569 double * profile, maxchange = 1e20;
3570 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3571 char ** fninTpr, **fninPull, **fninPdo;
3573 FILE * histout, *profout;
3574 char xlabel[STRLEN], ylabel[256], title[256];
3577 opt.verbose = FALSE;
3578 opt.bHistOnly = FALSE;
3587 opt.coordsel = nullptr;
3589 /* bootstrapping stuff */
3591 opt.bsMethod = bsMethod_hist;
3592 opt.tauBootStrap = 0.0;
3594 opt.histBootStrapBlockLength = 8;
3595 opt.bs_verbose = FALSE;
3600 opt.Temperature = 298;
3601 opt.Tolerance = 1e-6;
3602 opt.bBoundsOnly = FALSE;
3604 opt.bCalcTauInt = FALSE;
3605 opt.sigSmoothIact = 0.0;
3606 opt.bAllowReduceIact = TRUE;
3607 opt.bInitPotByIntegration = TRUE;
3608 opt.acTrestart = 1.0;
3609 opt.stepchange = 100;
3610 opt.stepUpdateContrib = 100;
3612 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr,
3618 opt.unit = nenum(en_unit);
3619 opt.bsMethod = nenum(en_bsMethod);
3621 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3623 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3624 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3625 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3626 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3627 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3628 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3629 if (opt.bTab && opt.bPullf)
3632 "Force input does not work with tabulated potentials. "
3633 "Provide pullx.xvg or pdo files!");
3636 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3638 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3640 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3643 "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3644 "\n\n Check gmx wham -h !");
3647 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3648 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3649 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3650 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3651 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3653 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3654 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3655 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3656 if ((bMinSet || bMaxSet) && bAutoSet)
3658 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3661 if ((bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3663 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3666 if (bMinSet && opt.bAuto)
3668 printf("Note: min and max given, switching off -auto.\n");
3672 if (opt.bTauIntGiven && opt.bCalcTauInt)
3675 "Either read (option -iiact) or calculate (option -ac) the\n"
3676 "the autocorrelation times. Not both.");
3679 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3682 "Either compute autocorrelation times (ACTs) (option -ac) or "
3683 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3685 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3688 "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3689 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3692 /* Reading gmx4/gmx5 pull output and tpr files */
3693 if (opt.bTpr || opt.bPullf || opt.bPullx)
3695 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3697 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3698 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3699 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n", nfiles, nfiles2,
3700 opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3701 if (nfiles != nfiles2)
3703 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles, opt.fnTpr, nfiles2, fnPull);
3706 /* Read file that selects the pull group to be used */
3707 if (opt.fnCoordSel != nullptr)
3709 readPullCoordSelection(&opt, fninTpr, nfiles);
3712 window = initUmbrellaWindows(nfiles);
3713 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3716 { /* reading pdo files */
3717 if (opt.fnCoordSel != nullptr)
3720 "Reading a -is file is not supported with PDO input files.\n"
3721 "Use awk or a similar tool to pick the required pull groups from your PDO "
3724 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3725 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3726 window = initUmbrellaWindows(nfiles);
3727 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3730 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3731 We can therefore get the units for the xlabel from the first coordinate. */
3732 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3736 /* enforce equal weight for all histograms? */
3739 enforceEqualWeights(window, nwins);
3742 /* write histograms */
3743 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms", xlabel, "count", opt.oenv);
3744 for (l = 0; l < opt.bins; ++l)
3746 fprintf(histout, "%e\t", (l + 0.5) / opt.bins * (opt.max - opt.min) + opt.min);
3747 for (i = 0; i < nwins; ++i)
3749 for (j = 0; j < window[i].nPull; ++j)
3751 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3754 fprintf(histout, "\n");
3757 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3760 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3764 /* Using tabulated umbrella potential */
3767 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3770 /* Integrated autocorrelation times provided ? */
3771 if (opt.bTauIntGiven)
3773 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3776 /* Compute integrated autocorrelation times */
3777 if (opt.bCalcTauInt)
3779 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3782 /* calc average and sigma for each histogram
3783 (maybe required for bootstrapping. If not, this is fast anyhow) */
3784 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3786 averageSigma(window, nwins);
3789 /* Get initial potential by simple integration */
3790 if (opt.bInitPotByIntegration)
3792 guessPotByIntegration(window, nwins, &opt, xlabel);
3795 /* Check if complete reaction coordinate is covered */
3796 checkReactionCoordinateCovered(window, nwins, &opt);
3798 /* Calculate profile */
3799 snew(profile, opt.bins);
3807 if ((i % opt.stepUpdateContrib) == 0)
3809 setup_acc_wham(profile, window, nwins, &opt);
3811 if (maxchange < opt.Tolerance)
3814 /* if (opt.verbose) */
3815 printf("Switched to exact iteration in iteration %d\n", i);
3817 calc_profile(profile, window, nwins, &opt, bExact);
3818 if (((i % opt.stepchange) == 0 || i == 1) && i != 0)
3820 printf("\t%4d) Maximum change %e\n", i, maxchange);
3823 } while ((maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3824 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3826 /* calc error from Kumar's formula */
3827 /* Unclear how the error propagates along reaction coordinate, therefore
3829 /* calc_error_kumar(profile,window, nwins,&opt); */
3831 /* Write profile in energy units? */
3834 prof_normalization_and_unit(profile, &opt);
3835 std::strcpy(ylabel, en_unit_label[opt.unit]);
3836 std::strcpy(title, "Umbrella potential");
3840 std::strcpy(ylabel, "Density of states");
3841 std::strcpy(title, "Density of states");
3844 /* symmetrize profile around z=0? */
3847 symmetrizeProfile(profile, &opt);
3850 /* write profile or density of states */
3851 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3852 for (i = 0; i < opt.bins; ++i)
3854 fprintf(profout, "%e\t%e\n", (i + 0.5) / opt.bins * (opt.max - opt.min) + opt.min, profile[i]);
3857 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3859 /* Bootstrap Method */
3862 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3863 opt2fn("-hist", NFILE, fnm), xlabel, ylabel, profile, window, nwins, &opt);
3867 freeUmbrellaWindows(window, nfiles);
3869 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3870 please_cite(stdout, "Hub2010");