2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/commandline/viewit.h"
46 #include "gromacs/correlationfunctions/autocorr.h"
47 #include "gromacs/correlationfunctions/expfit.h"
48 #include "gromacs/correlationfunctions/integrate.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/gmxana/gstat.h"
52 #include "gromacs/linearalgebra/matrix.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/statistics/statistics.h"
57 #include "gromacs/utility/arraysize.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/pleasecite.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/snprintf.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
75 static void power_fit(int n, int nset, real** val, real* t)
77 real *x, *y, quality, a, b, r;
85 for (i = 0; i < n; i++)
89 x[i] = std::log(t[i]);
96 "First time is not larger than 0, using index number as time for power fit\n");
97 for (i = 0; i < n; i++)
99 x[i] = std::log1p(static_cast<real>(i));
103 for (s = 0; s < nset; s++)
105 for (i = 0; i < n && val[s][i] >= 0; i++)
107 y[i] = std::log(val[s][i]);
111 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
113 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
114 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n", s + 1, quality, a, std::exp(b));
121 static real cosine_content(int nhp, int n, const real* y)
122 /* Assumes n equidistant points */
124 double fac, cosyint, yyint;
132 fac = M_PI * nhp / (n - 1);
136 for (i = 0; i < n; i++)
138 cosyint += std::cos(fac * i) * y[i];
139 yyint += y[i] * y[i];
142 return 2 * cosyint * cosyint / (n * yyint);
145 static void plot_coscont(const char* ccfile, int n, int nset, real** val, const gmx_output_env_t* oenv)
151 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content", oenv);
153 for (s = 0; s < nset; s++)
155 cc = cosine_content(s + 1, n, val[s]);
156 fprintf(fp, " %d %g\n", s + 1, cc);
157 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n", s + 1, 0.5 * (s + 1), cc);
159 fprintf(stdout, "\n");
164 static void regression_analysis(int n, gmx_bool bXYdy, real* x, int nset, real** val)
166 real S, chi2, a, b, da, db, r = 0;
169 if (bXYdy || (nset == 1))
171 printf("Fitting data to a function f(x) = ax + b\n");
172 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
173 printf("Error estimates will be given if w_i (sigma) values are given\n");
174 printf("(use option -xydy).\n\n");
177 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
179 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
189 chi2 = gmx::square((n - 2) * S);
190 printf("Chi2 = %g\n", chi2);
191 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
192 printf("Correlation coefficient = %.1f%%\n", 100 * r);
196 printf("a = %g +/- %g\n", a, da);
197 printf("b = %g +/- %g\n", b, db);
201 printf("a = %g\n", a);
202 printf("b = %g\n", b);
207 double chi2, *a, **xx, *y;
212 for (j = 0; (j < nset - 1); j++)
216 for (i = 0; (i < n); i++)
219 for (j = 1; (j < nset); j++)
221 xx[j - 1][i] = val[j][i];
225 chi2 = multi_regression(nullptr, n, y, nset - 1, xx, a);
226 printf("Fitting %d data points in %d sets\n", n, nset - 1);
227 printf("chi2 = %g\n", chi2);
229 for (i = 0; (i < nset - 1); i++)
241 static void histogram(const char* distfile, real binwidth, int n, int nset, real** val, const gmx_output_env_t* oenv)
245 double minval, maxval;
251 for (s = 0; s < nset; s++)
253 for (i = 0; i < n; i++)
255 if (val[s][i] < minval)
259 else if (val[s][i] > maxval)
266 minval = binwidth * std::floor(minval / binwidth);
267 maxval = binwidth * std::ceil(maxval / binwidth);
274 nbin = gmx::roundToInt(((maxval - minval) / binwidth) + 1);
275 fprintf(stderr, "Making distributions with %d bins\n", nbin);
277 fp = xvgropen(distfile, "Distribution", "", "", oenv);
278 for (s = 0; s < nset; s++)
280 for (i = 0; i < nbin; i++)
284 for (i = 0; i < n; i++)
286 histo[gmx::roundToInt((val[s][i] - minval) / binwidth)]++;
288 for (i = 0; i < nbin; i++)
290 fprintf(fp, " %g %g\n", minval + i * binwidth, static_cast<double>(histo[i]) / (n * binwidth));
294 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
300 static int real_comp(const void* a, const void* b)
302 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
318 static void average(const char* avfile, int avbar_opt, int n, int nset, real** val, real* t)
325 fp = gmx_ffopen(avfile, "w");
326 if ((avbar_opt == avbarERROR) && (nset == 1))
328 avbar_opt = avbarNONE;
330 if (avbar_opt != avbarNONE)
332 if (avbar_opt == avbar90)
335 fprintf(fp, "@TYPE xydydy\n");
336 edge = gmx::roundToInt(nset * 0.05);
338 "Errorbars: discarding %d points on both sides: %d%%"
340 edge, gmx::roundToInt(100. * (nset - 2 * edge) / nset));
344 fprintf(fp, "@TYPE xydy\n");
348 for (i = 0; i < n; i++)
351 for (s = 0; s < nset; s++)
356 fprintf(fp, " %g %g", t[i], av);
358 if (avbar_opt != avbarNONE)
360 if (avbar_opt == avbar90)
362 for (s = 0; s < nset; s++)
366 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
367 fprintf(fp, " %g %g", tmp[nset - 1 - edge] - av, av - tmp[edge]);
371 for (s = 0; s < nset; s++)
373 var += gmx::square(val[s][i] - av);
375 if (avbar_opt == avbarSTDDEV)
377 err = std::sqrt(var / nset);
381 err = std::sqrt(var / (nset * (nset - 1)));
383 fprintf(fp, " %g", err);
390 if (avbar_opt == avbar90)
396 /*! \brief Compute final error estimate.
398 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
400 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
402 // When sigma is zero, the fitparm data can be uninitialized
407 double ss = fitparm[1] * fitparm[0] + (1 - fitparm[1]) * fitparm[2];
408 if ((tTotal <= 0) || (ss <= 0))
410 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n", tTotal, ss);
414 return sigma * std::sqrt(2 * ss / tTotal);
417 static void estimate_error(const char* eefile,
427 gmx_bool bSingleExpFit,
428 gmx_bool bAllowNegLTCorr,
429 const gmx_output_env_t* oenv)
432 int bs, prev_bs, nbs, nb;
437 real * tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
439 real ee, a, tau1, tau2;
443 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
448 fp = xvgropen(eefile, "Error estimates", "Block size (time)", "Error estimate", oenv);
449 if (output_env_get_print_xvgr_codes(oenv))
451 fprintf(fp, "@ subtitle \"using block averaging, total time %g (%d points)\"\n", (n - 1) * dt, n);
454 xvgr_legend(fp, 2 * nset, leg, oenv);
457 spacing = std::pow(2.0, 1.0 / resol);
461 for (s = 0; s < nset; s++)
468 bs = n / static_cast<int>(nbr);
473 for (i = 0; i < nb; i++)
476 for (j = 0; j < bs; j++)
478 blav += val[s][bs * i + j];
480 var += gmx::square(av[s] - blav / bs);
489 ybs[nbs] = var / (nb * (nb - 1.0)) * (n * dt) / (sig[s] * sig[s]);
508 for (i = 0; i < nbs / 2; i++)
511 tbs[i] = tbs[nbs - 1 - i];
512 tbs[nbs - 1 - i] = rtmp;
514 ybs[i] = ybs[nbs - 1 - i];
515 ybs[nbs - 1 - i] = rtmp;
517 /* The initial slope of the normalized ybs^2 is 1.
518 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
519 * From this we take our initial guess for tau1.
521 twooe = 2.0 / std::exp(1.0);
527 } while (i < nbs - 1 && (ybs[i] > ybs[i + 1] || ybs[i] > twooe * tau1_est));
532 "Data set %d has strange time correlations:\n"
533 "the std. error using single points is larger than that of blocks of 2 "
535 "The error estimate might be inaccurate, check the fit\n",
537 /* Use the total time as tau for the fitting weights */
538 tau_sig = (n - 1) * dt;
547 fprintf(debug, "set %d tau1 estimate %f\n", s + 1, tau1_est);
550 /* Generate more or less appropriate sigma's,
551 * also taking the density of points into account.
553 for (i = 0; i < nbs; i++)
557 dens = tbs[1] / tbs[0] - 1;
559 else if (i == nbs - 1)
561 dens = tbs[nbs - 1] / tbs[nbs - 2] - 1;
565 dens = 0.5 * (tbs[i + 1] / tbs[i - 1] - 1);
567 fitsig[i] = std::sqrt((tau_sig + tbs[i]) / dens);
572 fitparm[0] = tau1_est;
574 /* We set the initial guess for tau2
575 * to halfway between tau1_est and the total time (on log scale).
577 fitparm[2] = std::sqrt(tau1_est * (n - 1) * dt);
578 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
579 fitparm, 0, nullptr);
581 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
582 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n - 1) * dt)
586 if (fitparm[2] > (n - 1) * dt)
589 "Warning: tau2 is longer than the length of the data (%g)\n"
590 " the statistics might be bad\n",
595 fprintf(stdout, "a fitted parameter is negative\n");
597 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
598 optimal_error_estimate(sig[s], fitparm, n * dt), fitparm[1], fitparm[0],
600 /* Do a fit with tau2 fixed at the total time.
601 * One could also choose any other large value for tau2.
603 fitparm[0] = tau1_est;
605 fitparm[2] = (n - 1) * dt;
606 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
607 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
608 fitparm, 4, nullptr);
610 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0 || (fitparm[1] > 1 && !bAllowNegLTCorr))
614 fprintf(stdout, "a fitted parameter is negative\n");
615 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
616 optimal_error_estimate(sig[s], fitparm, n * dt), fitparm[1],
617 fitparm[0], fitparm[2]);
619 /* Do a single exponential fit */
620 fprintf(stderr, "Will use a single exponential fit for set %d\n", s + 1);
621 fitparm[0] = tau1_est;
624 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
625 fitparm, 6, nullptr);
628 ee = optimal_error_estimate(sig[s], fitparm, n * dt);
633 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n", s + 1, ee, a, tau1, tau2);
634 if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgr)
636 fprintf(fp, "@ legend string %d \"av %f\"\n", 2 * s, av[s]);
637 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2 * s + 1,
638 optimal_error_estimate(sig[s], fitparm, n * dt));
640 else if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgrace)
642 fprintf(fp, "@ s%d legend \"av %f\"\n", 2 * s, av[s]);
643 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2 * s + 1,
644 optimal_error_estimate(sig[s], fitparm, n * dt));
646 for (i = 0; i < nbs; i++)
648 fprintf(fp, "%g %g %g\n", tbs[i], sig[s] * std::sqrt(ybs[i] / (n * dt)),
649 sig[s] * std::sqrt(fit_function(effnERREST, fitparm, tbs[i]) / (n * dt)));
659 for (i = 0; i < n; i++)
661 ac[i] = val[s][i] - av[s];
664 fitsig[i] = std::sqrt(static_cast<real>(i));
671 low_do_autocorr(nullptr, oenv, nullptr, n, 1, -1, &ac, dt, eacNormal, 1, FALSE, TRUE,
672 FALSE, 0, 0, effnNONE);
676 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
678 for (i = 1; i <= fitlen / 2; i++)
684 /* Generate more or less appropriate sigma's */
685 for (i = 0; i <= fitlen; i++)
687 fitsig[i] = std::sqrt(acint + dt * i);
690 ac_fit[0] = 0.5 * acint;
692 ac_fit[2] = 10 * acint;
693 do_lmfit(n / nb_min, ac, fitsig, dt, nullptr, 0, fitlen * dt, oenv, bDebugMode(),
694 effnEXPEXP, ac_fit, 0, nullptr);
695 ac_fit[3] = 1 - ac_fit[1];
697 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n", s + 1,
698 optimal_error_estimate(sig[s], ac_fit, n * dt), ac_fit[1], ac_fit[0], ac_fit[2]);
700 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
701 for (i = 0; i < nbs; i++)
703 fprintf(fp, "%g %g\n", tbs[i],
704 sig[s] * std::sqrt(fit_function(effnERREST, ac_fit, tbs[i])) / (n * dt));
711 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
720 static void luzar_correl(int nn, real* time, int nset, real** val, real temp, gmx_bool bError, real fit_start)
726 please_cite(stdout, "Spoel2006b");
728 /* Compute negative derivative k(t) = -dc(t)/dt */
732 compute_derivative(nn, time, val[0], kt);
733 for (j = 0; (j < nn); j++)
740 for (j = 0; (j < nn); j++)
742 d2 += gmx::square(kt[j] - val[3][j]);
744 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2 / nn));
746 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start, temp);
751 analyse_corr(nn, time, val[0], val[2], val[4], val[1], val[3], val[5], fit_start, temp);
755 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
756 printf("Not doing anything. Sorry.\n");
760 static void filter(real flen, int n, int nset, real** val, real dt)
763 double *filt, sum, vf, fluc, fluctot;
765 f = static_cast<int>(flen / (2 * dt));
769 for (i = 1; i <= f; i++)
771 filt[i] = std::cos(M_PI * dt * i / flen);
774 for (i = 0; i <= f; i++)
778 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n - 2 * f);
779 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2 * f + 1);
781 for (s = 0; s < nset; s++)
784 for (i = f; i < n - f; i++)
786 vf = filt[0] * val[s][i];
787 for (j = 1; j <= f; j++)
789 vf += filt[j] * (val[s][i - f] + val[s][i + f]);
791 fluc += gmx::square(val[s][i] - vf);
795 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s + 1, std::sqrt(fluc));
797 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot / nset));
798 fprintf(stdout, "\n");
803 static void do_fit(FILE* out,
811 const gmx_output_env_t* oenv,
812 const char* fn_fitted)
814 real * c1 = nullptr, *sig = nullptr;
816 real tendfit, tbeginfit;
817 int i, efitfn, nparm;
819 efitfn = get_acffitfn();
820 nparm = effnNparams(efitfn);
821 fprintf(out, "Will fit to the following function:\n");
822 fprintf(out, "%s\n", effnDescription(efitfn));
828 fprintf(out, "Using two columns as y and sigma values\n");
834 if (opt2parg_bSet("-beginfit", npargs, ppa))
836 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
842 if (opt2parg_bSet("-endfit", npargs, ppa))
844 tendfit = opt2parg_real("-endfit", npargs, ppa);
848 tendfit = x0[ny - 1];
851 snew(fitparm, nparm);
854 case effnEXP1: fitparm[0] = 0.5; break;
861 fitparm[1] = 0.5 * c1[0];
865 fitparm[0] = fitparm[2] = 0.5 * c1[0];
871 fitparm[0] = fitparm[2] = fitparm[4] = 0.33 * c1[0];
878 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25 * c1[0];
886 fprintf(out, "Warning: don't know how to initialize the parameters\n");
887 for (i = 0; (i < nparm); i++)
892 fprintf(out, "Starting parameters:\n");
893 for (i = 0; (i < nparm); i++)
895 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
897 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit, oenv, bDebugMode(), efitfn, fitparm, 0, fn_fitted) > 0)
899 for (i = 0; (i < nparm); i++)
901 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
906 fprintf(out, "No solution was found\n");
910 static void print_fitted_function(const char* fitfile,
911 const char* fn_fitted,
919 gmx_output_env_t* oenv)
921 FILE* out_fit = gmx_ffopen(fitfile, "w");
922 if (bXYdy && nset >= 2)
924 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv, fn_fitted);
928 char* buf2 = nullptr;
930 if (nullptr != fn_fitted)
932 buflen = std::strlen(fn_fitted) + 32;
934 std::strncpy(buf2, fn_fitted, buflen);
935 buf2[std::strlen(buf2) - 4] = '\0';
937 for (s = 0; s < nset; s++)
940 if (nullptr != fn_fitted)
943 snprintf(buf, n, "%s_%d.xvg", buf2, s);
945 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
950 gmx_ffclose(out_fit);
953 int gmx_analyze(int argc, char* argv[])
955 static const char* desc[] = {
956 "[THISMODULE] reads an ASCII file and analyzes data sets.",
957 "A line in the input file may start with a time",
958 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
959 "Multiple sets can also be",
960 "read when they are separated by & (option [TT]-n[tt]);",
961 "in this case only one [IT]y[it]-value is read from each line.",
962 "All lines starting with # and @ are skipped.",
963 "All analyses can also be done for the derivative of a set",
964 "(option [TT]-d[tt]).[PAR]",
966 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
967 "points are equidistant in time.[PAR]",
969 "[THISMODULE] always shows the average and standard deviation of each",
970 "set, as well as the relative deviation of the third",
971 "and fourth cumulant from those of a Gaussian distribution with the same",
972 "standard deviation.[PAR]",
974 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
975 "Be sure that the time interval between data points is",
976 "much shorter than the time scale of the autocorrelation.[PAR]",
978 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
979 "i/2 periods. The formula is::",
981 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 ",
982 " / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
984 "This is useful for principal components obtained from covariance",
985 "analysis, since the principal components of random diffusion are",
986 "pure cosines.[PAR]",
988 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
990 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
992 "Option [TT]-av[tt] produces the average over the sets.",
993 "Error bars can be added with the option [TT]-errbar[tt].",
994 "The errorbars can represent the standard deviation, the error",
995 "(assuming the points are independent) or the interval containing",
996 "90% of the points, by discarding 5% of the points at the top and",
999 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1000 "A set is divided in a number of blocks and averages are calculated for",
1001 "each block. The error for the total average is calculated from",
1002 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1003 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1004 "These errors are plotted as a function of the block size.",
1005 "Also an analytical block average curve is plotted, assuming",
1006 "that the autocorrelation is a sum of two exponentials.",
1007 "The analytical curve for the block average is::",
1009 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ",
1010 " ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) ",
1011 " [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1012 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] ",
1013 " (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + ",
1014 " 1)))[sqrt][math],",
1016 "where T is the total time.",
1017 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are ",
1018 "obtained by fitting f^2(t) to error^2.",
1019 "When the actual block average is very close to the analytical curve,",
1020 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] ",
1021 "+ (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1022 "The complete derivation is given in",
1023 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1025 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1026 "of each set and over all sets with respect to a filtered average.",
1027 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1028 "to len/2. len is supplied with the option [TT]-filter[tt].",
1029 "This filter reduces oscillations with period len/2 and len by a factor",
1030 "of 0.79 and 0.33 respectively.[PAR]",
1032 "Option [TT]-g[tt] fits the data to the function given with option",
1033 "[TT]-fitfn[tt].[PAR]",
1035 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1036 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1037 "zero or with a negative value are ignored.[PAR]",
1039 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1040 "on output from [gmx-hbond]. The input file can be taken directly",
1041 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1042 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1043 "curves that make sense in the context of molecular dynamics, mainly",
1044 "exponential curves. More information is in the manual. To check the output",
1045 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1046 "original data and the fitted function to a new data file. The fitting",
1047 "parameters are stored as comment in the output file."
1049 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1050 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1051 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1052 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1053 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1054 static real temp = 298.15, fit_start = 1, fit_end = 60;
1056 /* must correspond to enum avbar* declared at beginning of file */
1057 static const char* avbar_opt[avbarNR + 1] = { nullptr, "none", "stddev", "error", "90", nullptr };
1060 { "-time", FALSE, etBOOL, { &bHaveT }, "Expect a time in the input" },
1061 { "-b", FALSE, etREAL, { &tb }, "First time to read from set" },
1062 { "-e", FALSE, etREAL, { &te }, "Last time to read from set" },
1063 { "-n", FALSE, etINT, { &nsets_in }, "Read this number of sets separated by &" },
1064 { "-d", FALSE, etBOOL, { &bDer }, "Use the derivative" },
1069 "HIDDENThe derivative is the difference over this number of points" },
1070 { "-bw", FALSE, etREAL, { &binwidth }, "Binwidth for the distribution" },
1071 { "-errbar", FALSE, etENUM, { avbar_opt }, "Error bars for [TT]-av[tt]" },
1076 "Integrate data function(s) numerically using trapezium rule" },
1077 { "-aver_start", FALSE, etREAL, { &aver_start }, "Start averaging the integral from here" },
1082 "Interpret second data set as error in the y values for integrating" },
1087 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set "
1088 "will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets "
1089 "are present a multilinear regression will be performed yielding the constant A that "
1090 "minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] "
1091 "x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data "
1092 "set in the input file and x[SUB]i[sub] the others. Do read the information at the "
1093 "option [TT]-time[tt]." },
1098 "Do a Luzar and Chandler analysis on a correlation function and "
1099 "related as produced by [gmx-hbond]. When in addition the "
1100 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1101 "interpreted as errors in c(t) and n(t)." },
1106 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1111 "Time (ps) from which to start fitting the correlation functions in order to obtain the "
1112 "forward and backward rate constants for HB breaking and formation" },
1117 "Time (ps) where to stop fitting the correlation functions in order to obtain the "
1118 "forward and backward rate constants for HB breaking and formation. Only with "
1124 "HIDDENMinimum number of blocks for block averaging" },
1129 "HIDDENResolution for the block averaging, block size increases with"
1130 " a factor 2^(1/resol)" },
1135 "HIDDENAlways use a single exponential fit for the error estimate" },
1136 { "-eenlc", FALSE, etBOOL, { &bEENLC }, "HIDDENAllow a negative long-time correlation" },
1141 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1146 "Print the high-frequency fluctuation after filtering with a cosine filter of this "
1148 { "-power", FALSE, etBOOL, { &bPower }, "Fit data to: b t^a" },
1149 { "-subav", FALSE, etBOOL, { &bSubAv }, "Subtract the average before autocorrelating" },
1150 { "-oneacf", FALSE, etBOOL, { &bAverCorr }, "Calculate one ACF over all sets" },
1152 #define NPA asize(pa)
1155 int n, nlast, s, nset, i, j = 0;
1156 real ** val, *t, dt, tot, error;
1157 double * av, *sig, cum1, cum2, cum3, cum4, db;
1158 const char * acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1159 gmx_output_env_t* oenv;
1162 { efXVG, "-f", "graph", ffREAD }, { efXVG, "-ac", "autocorr", ffOPTWR },
1163 { efXVG, "-msd", "msd", ffOPTWR }, { efXVG, "-cc", "coscont", ffOPTWR },
1164 { efXVG, "-dist", "distr", ffOPTWR }, { efXVG, "-av", "average", ffOPTWR },
1165 { efXVG, "-ee", "errest", ffOPTWR }, { efXVG, "-fitted", "fitted", ffOPTWR },
1166 { efLOG, "-g", "fitlog", ffOPTWR }
1168 #define NFILE asize(fnm)
1174 ppa = add_acf_pargs(&npargs, pa);
1176 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW, NFILE, fnm, npargs, ppa, asize(desc), desc, 0,
1183 acfile = opt2fn_null("-ac", NFILE, fnm);
1184 msdfile = opt2fn_null("-msd", NFILE, fnm);
1185 ccfile = opt2fn_null("-cc", NFILE, fnm);
1186 distfile = opt2fn_null("-dist", NFILE, fnm);
1187 avfile = opt2fn_null("-av", NFILE, fnm);
1188 eefile = opt2fn_null("-ee", NFILE, fnm);
1189 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1191 fitfile = opt2fn("-g", NFILE, fnm);
1195 fitfile = opt2fn_null("-g", NFILE, fnm);
1198 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT, opt2parg_bSet("-b", npargs, ppa), tb,
1199 opt2parg_bSet("-e", npargs, ppa), te, nsets_in, &nset, &n, &dt, &t);
1200 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1204 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n", d, d);
1206 for (s = 0; s < nset; s++)
1208 for (i = 0; (i < n); i++)
1210 val[s][i] = (val[s][i + d] - val[s][i]) / (d * dt);
1219 printf("Calculating the integral using the trapezium rule\n");
1223 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1224 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1228 for (s = 0; s < nset; s++)
1230 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1231 printf("Integral %d %10.5f +/- %10.5f\n", s + 1, sum, stddev);
1236 if (fitfile != nullptr)
1238 print_fitted_function(fitfile, opt2fn_null("-fitted", NFILE, fnm), bXYdy, nset, n, t, val,
1242 printf(" std. dev. relative deviation of\n");
1243 printf(" standard --------- cumulants from those of\n");
1244 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1245 printf(" cum. 3 cum. 4\n");
1248 for (s = 0; (s < nset); s++)
1254 for (i = 0; (i < n); i++)
1259 for (i = 0; (i < n); i++)
1261 db = val[s][i] - cum1;
1263 cum3 += db * db * db;
1264 cum4 += db * db * db * db;
1270 sig[s] = std::sqrt(cum2);
1273 error = std::sqrt(cum2 / (n - 1));
1279 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n", s + 1, av[s], sig[s], error,
1280 sig[s] != 0.0 ? cum3 / (sig[s] * sig[s] * sig[s] * std::sqrt(8 / M_PI)) : 0,
1281 sig[s] != 0.0 ? cum4 / (sig[s] * sig[s] * sig[s] * sig[s] * 3) - 1 : 0);
1285 if (filtlen != 0.0F)
1287 filter(filtlen, n, nset, val, dt);
1292 out = xvgropen(msdfile, "Mean square displacement", "time", "MSD (nm\\S2\\N)", oenv);
1293 nlast = static_cast<int>(n * frac);
1294 for (s = 0; s < nset; s++)
1296 for (j = 0; j <= nlast; j++)
1300 fprintf(stderr, "\r%d", j);
1304 for (i = 0; i < n - j; i++)
1306 tot += gmx::square(val[s][i] - val[s][i + j]);
1309 fprintf(out, " %g %8g\n", dt * j, tot);
1313 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1317 fprintf(stderr, "\r%d, time=%g\n", j - 1, (j - 1) * dt);
1322 plot_coscont(ccfile, n, nset, val, oenv);
1327 histogram(distfile, binwidth, n, nset, val, oenv);
1331 average(avfile, nenum(avbar_opt), n, nset, val, t);
1335 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt, bEeFitAc, bEESEF, bEENLC, oenv);
1339 power_fit(n, nset, val, t);
1342 if (acfile != nullptr)
1346 for (s = 0; s < nset; s++)
1348 for (i = 0; i < n; i++)
1354 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt, eacNormal, bAverCorr);
1359 regression_analysis(n, bXYdy, t, nset, val);
1364 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1367 view_all(oenv, NFILE, fnm);