2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/commandline/viewit.h"
46 #include "gromacs/correlationfunctions/autocorr.h"
47 #include "gromacs/correlationfunctions/expfit.h"
48 #include "gromacs/correlationfunctions/integrate.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/gmxana/gstat.h"
52 #include "gromacs/linearalgebra/matrix.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/units.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/statistics/statistics.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/pleasecite.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/snprintf.h"
65 /* must correspond to char *avbar_opt[] declared in main() */
76 static void power_fit(int n, int nset, real** val, real* t)
78 real *x, *y, quality, a, b, r;
86 for (i = 0; i < n; i++)
90 x[i] = std::log(t[i]);
97 "First time is not larger than 0, using index number as time for power fit\n");
98 for (i = 0; i < n; i++)
100 x[i] = std::log1p(static_cast<real>(i));
104 for (s = 0; s < nset; s++)
106 for (i = 0; i < n && val[s][i] >= 0; i++)
108 y[i] = std::log(val[s][i]);
112 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
114 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
115 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n", s + 1, quality, a, std::exp(b));
122 static real cosine_content(int nhp, int n, const real* y)
123 /* Assumes n equidistant points */
125 double fac, cosyint, yyint;
133 fac = M_PI * nhp / (n - 1);
137 for (i = 0; i < n; i++)
139 cosyint += std::cos(fac * i) * y[i];
140 yyint += y[i] * y[i];
143 return 2 * cosyint * cosyint / (n * yyint);
146 static void plot_coscont(const char* ccfile, int n, int nset, real** val, const gmx_output_env_t* oenv)
152 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content", oenv);
154 for (s = 0; s < nset; s++)
156 cc = cosine_content(s + 1, n, val[s]);
157 fprintf(fp, " %d %g\n", s + 1, cc);
158 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n", s + 1, 0.5 * (s + 1), cc);
160 fprintf(stdout, "\n");
165 static void regression_analysis(int n, gmx_bool bXYdy, real* x, int nset, real** val)
167 real S, chi2, a, b, da, db, r = 0;
169 if (bXYdy || (nset == 1))
171 printf("Fitting data to a function f(x) = ax + b\n");
172 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
173 printf("Error estimates will be given if w_i (sigma) values are given\n");
174 printf("(use option -xydy).\n\n");
177 lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S);
181 lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S);
183 chi2 = gmx::square((n - 2) * S);
184 printf("Chi2 = %g\n", chi2);
185 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
186 printf("Correlation coefficient = %.1f%%\n", 100 * r);
190 printf("a = %g +/- %g\n", a, da);
191 printf("b = %g +/- %g\n", b, db);
195 printf("a = %g\n", a);
196 printf("b = %g\n", b);
201 double chi2, *a, **xx, *y;
206 for (j = 0; (j < nset - 1); j++)
210 for (i = 0; (i < n); i++)
213 for (j = 1; (j < nset); j++)
215 xx[j - 1][i] = val[j][i];
219 chi2 = multi_regression(nullptr, n, y, nset - 1, xx, a);
220 printf("Fitting %d data points in %d sets\n", n, nset - 1);
221 printf("chi2 = %g\n", chi2);
223 for (i = 0; (i < nset - 1); i++)
235 static void histogram(const char* distfile, real binwidth, int n, int nset, real** val, const gmx_output_env_t* oenv)
239 double minval, maxval;
245 for (s = 0; s < nset; s++)
247 for (i = 0; i < n; i++)
249 if (val[s][i] < minval)
253 else if (val[s][i] > maxval)
260 minval = binwidth * std::floor(minval / binwidth);
261 maxval = binwidth * std::ceil(maxval / binwidth);
268 nbin = gmx::roundToInt(((maxval - minval) / binwidth) + 1);
269 fprintf(stderr, "Making distributions with %d bins\n", nbin);
271 fp = xvgropen(distfile, "Distribution", "", "", oenv);
272 for (s = 0; s < nset; s++)
274 for (i = 0; i < nbin; i++)
278 for (i = 0; i < n; i++)
280 histo[gmx::roundToInt((val[s][i] - minval) / binwidth)]++;
282 for (i = 0; i < nbin; i++)
284 fprintf(fp, " %g %g\n", minval + i * binwidth, static_cast<double>(histo[i]) / (n * binwidth));
288 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
294 static int real_comp(const void* a, const void* b)
296 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
312 static void average(const char* avfile, int avbar_opt, int n, int nset, real** val, real* t)
319 fp = gmx_ffopen(avfile, "w");
320 if ((avbar_opt == avbarERROR) && (nset == 1))
322 avbar_opt = avbarNONE;
324 if (avbar_opt != avbarNONE)
326 if (avbar_opt == avbar90)
329 fprintf(fp, "@TYPE xydydy\n");
330 edge = gmx::roundToInt(nset * 0.05);
332 "Errorbars: discarding %d points on both sides: %d%%"
335 gmx::roundToInt(100. * (nset - 2 * edge) / nset));
339 fprintf(fp, "@TYPE xydy\n");
343 for (i = 0; i < n; i++)
346 for (s = 0; s < nset; s++)
351 fprintf(fp, " %g %g", t[i], av);
353 if (avbar_opt != avbarNONE)
355 if (avbar_opt == avbar90)
357 for (s = 0; s < nset; s++)
361 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
362 fprintf(fp, " %g %g", tmp[nset - 1 - edge] - av, av - tmp[edge]);
366 for (s = 0; s < nset; s++)
368 var += gmx::square(val[s][i] - av);
370 if (avbar_opt == avbarSTDDEV)
372 err = std::sqrt(var / nset);
376 err = std::sqrt(var / (nset * (nset - 1)));
378 fprintf(fp, " %g", err);
385 if (avbar_opt == avbar90)
391 /*! \brief Compute final error estimate.
393 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
395 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
397 // When sigma is zero, the fitparm data can be uninitialized
402 double ss = fitparm[1] * fitparm[0] + (1 - fitparm[1]) * fitparm[2];
403 if ((tTotal <= 0) || (ss <= 0))
405 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n", tTotal, ss);
409 return sigma * std::sqrt(2 * ss / tTotal);
412 static void estimate_error(const char* eefile,
422 gmx_bool bSingleExpFit,
423 gmx_bool bAllowNegLTCorr,
424 const gmx_output_env_t* oenv)
427 int bs, prev_bs, nbs, nb;
432 real * tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
434 real ee, a, tau1, tau2;
438 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
443 fp = xvgropen(eefile, "Error estimates", "Block size (time)", "Error estimate", oenv);
444 if (output_env_get_print_xvgr_codes(oenv))
446 fprintf(fp, "@ subtitle \"using block averaging, total time %g (%d points)\"\n", (n - 1) * dt, n);
449 xvgr_legend(fp, 2 * nset, leg, oenv);
452 spacing = std::pow(2.0, 1.0 / resol);
456 for (s = 0; s < nset; s++)
463 bs = n / static_cast<int>(nbr);
468 for (i = 0; i < nb; i++)
471 for (j = 0; j < bs; j++)
473 blav += val[s][bs * i + j];
475 var += gmx::square(av[s] - blav / bs);
484 ybs[nbs] = var / (nb * (nb - 1.0)) * (n * dt) / (sig[s] * sig[s]);
503 for (i = 0; i < nbs / 2; i++)
506 tbs[i] = tbs[nbs - 1 - i];
507 tbs[nbs - 1 - i] = rtmp;
509 ybs[i] = ybs[nbs - 1 - i];
510 ybs[nbs - 1 - i] = rtmp;
512 /* The initial slope of the normalized ybs^2 is 1.
513 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
514 * From this we take our initial guess for tau1.
516 twooe = 2.0 / std::exp(1.0);
522 } while (i < nbs - 1 && (ybs[i] > ybs[i + 1] || ybs[i] > twooe * tau1_est));
527 "Data set %d has strange time correlations:\n"
528 "the std. error using single points is larger than that of blocks of 2 "
530 "The error estimate might be inaccurate, check the fit\n",
532 /* Use the total time as tau for the fitting weights */
533 tau_sig = (n - 1) * dt;
542 fprintf(debug, "set %d tau1 estimate %f\n", s + 1, tau1_est);
545 /* Generate more or less appropriate sigma's,
546 * also taking the density of points into account.
548 for (i = 0; i < nbs; i++)
552 dens = tbs[1] / tbs[0] - 1;
554 else if (i == nbs - 1)
556 dens = tbs[nbs - 1] / tbs[nbs - 2] - 1;
560 dens = 0.5 * (tbs[i + 1] / tbs[i - 1] - 1);
562 fitsig[i] = std::sqrt((tau_sig + tbs[i]) / dens);
567 fitparm[0] = tau1_est;
569 /* We set the initial guess for tau2
570 * to halfway between tau1_est and the total time (on log scale).
572 fitparm[2] = std::sqrt(tau1_est * (n - 1) * dt);
573 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 0, nullptr);
575 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
576 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n - 1) * dt)
580 if (fitparm[2] > (n - 1) * dt)
583 "Warning: tau2 is longer than the length of the data (%g)\n"
584 " the statistics might be bad\n",
589 fprintf(stdout, "a fitted parameter is negative\n");
592 "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
593 optimal_error_estimate(sig[s], fitparm, n * dt),
597 /* Do a fit with tau2 fixed at the total time.
598 * One could also choose any other large value for tau2.
600 fitparm[0] = tau1_est;
602 fitparm[2] = (n - 1) * dt;
603 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
604 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 4, nullptr);
606 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0 || (fitparm[1] > 1 && !bAllowNegLTCorr))
610 fprintf(stdout, "a fitted parameter is negative\n");
612 "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
613 optimal_error_estimate(sig[s], fitparm, n * dt),
618 /* Do a single exponential fit */
619 fprintf(stderr, "Will use a single exponential fit for set %d\n", s + 1);
620 fitparm[0] = tau1_est;
623 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 6, nullptr);
626 ee = optimal_error_estimate(sig[s], fitparm, n * dt);
631 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n", s + 1, ee, a, tau1, tau2);
632 if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgr)
634 fprintf(fp, "@ legend string %d \"av %f\"\n", 2 * s, av[s]);
636 "@ legend string %d \"ee %6g\"\n",
638 optimal_error_estimate(sig[s], fitparm, n * dt));
640 else if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgrace)
642 fprintf(fp, "@ s%d legend \"av %f\"\n", 2 * s, av[s]);
643 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2 * s + 1, optimal_error_estimate(sig[s], fitparm, n * dt));
645 for (i = 0; i < nbs; i++)
650 sig[s] * std::sqrt(ybs[i] / (n * dt)),
651 sig[s] * std::sqrt(fit_function(effnERREST, fitparm, tbs[i]) / (n * dt)));
661 for (i = 0; i < n; i++)
663 ac[i] = val[s][i] - av[s];
666 fitsig[i] = std::sqrt(static_cast<real>(i));
674 nullptr, oenv, nullptr, n, 1, -1, &ac, dt, eacNormal, 1, FALSE, TRUE, FALSE, 0, 0, effnNONE);
678 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
680 for (i = 1; i <= fitlen / 2; i++)
686 /* Generate more or less appropriate sigma's */
687 for (i = 0; i <= fitlen; i++)
689 fitsig[i] = std::sqrt(acint + dt * i);
692 ac_fit[0] = 0.5 * acint;
694 ac_fit[2] = 10 * acint;
695 do_lmfit(n / nb_min, ac, fitsig, dt, nullptr, 0, fitlen * dt, oenv, bDebugMode(), effnEXPEXP, ac_fit, 0, nullptr);
696 ac_fit[3] = 1 - ac_fit[1];
699 "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
701 optimal_error_estimate(sig[s], ac_fit, n * dt),
706 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
707 for (i = 0; i < nbs; i++)
712 sig[s] * std::sqrt(fit_function(effnERREST, ac_fit, tbs[i])) / (n * dt));
719 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
728 static void luzar_correl(int nn, real* time, int nset, real** val, real temp, gmx_bool bError, real fit_start)
734 please_cite(stdout, "Spoel2006b");
736 /* Compute negative derivative k(t) = -dc(t)/dt */
740 compute_derivative(nn, time, val[0], kt);
741 for (j = 0; (j < nn); j++)
748 for (j = 0; (j < nn); j++)
750 d2 += gmx::square(kt[j] - val[3][j]);
752 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2 / nn));
754 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start, temp);
759 analyse_corr(nn, time, val[0], val[2], val[4], val[1], val[3], val[5], fit_start, temp);
763 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
764 printf("Not doing anything. Sorry.\n");
768 static void filter(real flen, int n, int nset, real** val, real dt)
771 double *filt, sum, vf, fluc, fluctot;
773 f = static_cast<int>(flen / (2 * dt));
777 for (i = 1; i <= f; i++)
779 filt[i] = std::cos(M_PI * dt * i / flen);
782 for (i = 0; i <= f; i++)
786 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n - 2 * f);
787 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2 * f + 1);
789 for (s = 0; s < nset; s++)
792 for (i = f; i < n - f; i++)
794 vf = filt[0] * val[s][i];
795 for (j = 1; j <= f; j++)
797 vf += filt[j] * (val[s][i - f] + val[s][i + f]);
799 fluc += gmx::square(val[s][i] - vf);
803 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s + 1, std::sqrt(fluc));
805 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot / nset));
806 fprintf(stdout, "\n");
811 static void do_fit(FILE* out,
819 const gmx_output_env_t* oenv,
820 const char* fn_fitted)
822 real * c1 = nullptr, *sig = nullptr;
824 real tendfit, tbeginfit;
825 int i, efitfn, nparm;
827 efitfn = get_acffitfn();
828 nparm = effnNparams(efitfn);
829 fprintf(out, "Will fit to the following function:\n");
830 fprintf(out, "%s\n", effnDescription(efitfn));
836 fprintf(out, "Using two columns as y and sigma values\n");
842 if (opt2parg_bSet("-beginfit", npargs, ppa))
844 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
850 if (opt2parg_bSet("-endfit", npargs, ppa))
852 tendfit = opt2parg_real("-endfit", npargs, ppa);
856 tendfit = x0[ny - 1];
859 snew(fitparm, nparm);
862 case effnEXP1: fitparm[0] = 0.5; break;
869 fitparm[1] = 0.5 * c1[0];
873 fitparm[0] = fitparm[2] = 0.5 * c1[0];
879 fitparm[0] = fitparm[2] = fitparm[4] = 0.33 * c1[0];
886 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25 * c1[0];
894 fprintf(out, "Warning: don't know how to initialize the parameters\n");
895 for (i = 0; (i < nparm); i++)
900 fprintf(out, "Starting parameters:\n");
901 for (i = 0; (i < nparm); i++)
903 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
905 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit, oenv, bDebugMode(), efitfn, fitparm, 0, fn_fitted) > 0)
907 for (i = 0; (i < nparm); i++)
909 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
914 fprintf(out, "No solution was found\n");
918 static void print_fitted_function(const char* fitfile,
919 const char* fn_fitted,
927 gmx_output_env_t* oenv)
929 FILE* out_fit = gmx_ffopen(fitfile, "w");
930 if (bXYdy && nset >= 2)
932 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv, fn_fitted);
936 char* buf2 = nullptr;
938 if (nullptr != fn_fitted)
940 buflen = std::strlen(fn_fitted) + 32;
942 std::strncpy(buf2, fn_fitted, buflen);
943 buf2[std::strlen(buf2) - 4] = '\0';
945 for (s = 0; s < nset; s++)
948 if (nullptr != fn_fitted)
951 snprintf(buf, n, "%s_%d.xvg", buf2, s);
953 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
958 gmx_ffclose(out_fit);
961 int gmx_analyze(int argc, char* argv[])
963 static const char* desc[] = {
964 "[THISMODULE] reads an ASCII file and analyzes data sets.",
965 "A line in the input file may start with a time",
966 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
967 "Multiple sets can also be",
968 "read when they are separated by & (option [TT]-n[tt]);",
969 "in this case only one [IT]y[it]-value is read from each line.",
970 "All lines starting with # and @ are skipped.",
971 "All analyses can also be done for the derivative of a set",
972 "(option [TT]-d[tt]).[PAR]",
974 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
975 "points are equidistant in time.[PAR]",
977 "[THISMODULE] always shows the average and standard deviation of each",
978 "set, as well as the relative deviation of the third",
979 "and fourth cumulant from those of a Gaussian distribution with the same",
980 "standard deviation.[PAR]",
982 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
983 "Be sure that the time interval between data points is",
984 "much shorter than the time scale of the autocorrelation.[PAR]",
986 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
987 "i/2 periods. The formula is::",
989 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 ",
990 " / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
992 "This is useful for principal components obtained from covariance",
993 "analysis, since the principal components of random diffusion are",
994 "pure cosines.[PAR]",
996 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
998 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1000 "Option [TT]-av[tt] produces the average over the sets.",
1001 "Error bars can be added with the option [TT]-errbar[tt].",
1002 "The errorbars can represent the standard deviation, the error",
1003 "(assuming the points are independent) or the interval containing",
1004 "90% of the points, by discarding 5% of the points at the top and",
1007 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1008 "A set is divided in a number of blocks and averages are calculated for",
1009 "each block. The error for the total average is calculated from",
1010 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1011 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1012 "These errors are plotted as a function of the block size.",
1013 "Also an analytical block average curve is plotted, assuming",
1014 "that the autocorrelation is a sum of two exponentials.",
1015 "The analytical curve for the block average is::",
1017 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ",
1018 " ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) ",
1019 " [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1020 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] ",
1021 " (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + ",
1022 " 1)))[sqrt][math],",
1024 "where T is the total time.",
1025 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are ",
1026 "obtained by fitting f^2(t) to error^2.",
1027 "When the actual block average is very close to the analytical curve,",
1028 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] ",
1029 "+ (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1030 "The complete derivation is given in",
1031 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1033 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1034 "of each set and over all sets with respect to a filtered average.",
1035 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1036 "to len/2. len is supplied with the option [TT]-filter[tt].",
1037 "This filter reduces oscillations with period len/2 and len by a factor",
1038 "of 0.79 and 0.33 respectively.[PAR]",
1040 "Option [TT]-g[tt] fits the data to the function given with option",
1041 "[TT]-fitfn[tt].[PAR]",
1043 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1044 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1045 "zero or with a negative value are ignored.[PAR]",
1047 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1048 "on output from [gmx-hbond]. The input file can be taken directly",
1049 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1050 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1051 "curves that make sense in the context of molecular dynamics, mainly",
1052 "exponential curves. More information is in the manual. To check the output",
1053 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1054 "original data and the fitted function to a new data file. The fitting",
1055 "parameters are stored as comment in the output file."
1057 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1058 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1059 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1060 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1061 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1062 static real temp = 298.15, fit_start = 1, fit_end = 60;
1064 /* must correspond to enum avbar* declared at beginning of file */
1065 static const char* avbar_opt[avbarNR + 1] = { nullptr, "none", "stddev", "error", "90", nullptr };
1068 { "-time", FALSE, etBOOL, { &bHaveT }, "Expect a time in the input" },
1069 { "-b", FALSE, etREAL, { &tb }, "First time to read from set" },
1070 { "-e", FALSE, etREAL, { &te }, "Last time to read from set" },
1071 { "-n", FALSE, etINT, { &nsets_in }, "Read this number of sets separated by &" },
1072 { "-d", FALSE, etBOOL, { &bDer }, "Use the derivative" },
1077 "HIDDENThe derivative is the difference over this number of points" },
1078 { "-bw", FALSE, etREAL, { &binwidth }, "Binwidth for the distribution" },
1079 { "-errbar", FALSE, etENUM, { avbar_opt }, "Error bars for [TT]-av[tt]" },
1084 "Integrate data function(s) numerically using trapezium rule" },
1085 { "-aver_start", FALSE, etREAL, { &aver_start }, "Start averaging the integral from here" },
1090 "Interpret second data set as error in the y values for integrating" },
1095 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set "
1096 "will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets "
1097 "are present a multilinear regression will be performed yielding the constant A that "
1098 "minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] "
1099 "x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data "
1100 "set in the input file and x[SUB]i[sub] the others. Do read the information at the "
1101 "option [TT]-time[tt]." },
1106 "Do a Luzar and Chandler analysis on a correlation function and "
1107 "related as produced by [gmx-hbond]. When in addition the "
1108 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1109 "interpreted as errors in c(t) and n(t)." },
1114 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1119 "Time (ps) from which to start fitting the correlation functions in order to obtain the "
1120 "forward and backward rate constants for HB breaking and formation" },
1125 "Time (ps) where to stop fitting the correlation functions in order to obtain the "
1126 "forward and backward rate constants for HB breaking and formation. Only with "
1132 "HIDDENMinimum number of blocks for block averaging" },
1137 "HIDDENResolution for the block averaging, block size increases with"
1138 " a factor 2^(1/resol)" },
1143 "HIDDENAlways use a single exponential fit for the error estimate" },
1144 { "-eenlc", FALSE, etBOOL, { &bEENLC }, "HIDDENAllow a negative long-time correlation" },
1149 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1154 "Print the high-frequency fluctuation after filtering with a cosine filter of this "
1156 { "-power", FALSE, etBOOL, { &bPower }, "Fit data to: b t^a" },
1157 { "-subav", FALSE, etBOOL, { &bSubAv }, "Subtract the average before autocorrelating" },
1158 { "-oneacf", FALSE, etBOOL, { &bAverCorr }, "Calculate one ACF over all sets" },
1162 int n, nlast, s, nset, i, j = 0;
1163 real ** val, *t, dt, tot, error;
1164 double * av, *sig, cum1, cum2, cum3, cum4, db;
1165 const char * acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1166 gmx_output_env_t* oenv;
1169 { efXVG, "-f", "graph", ffREAD }, { efXVG, "-ac", "autocorr", ffOPTWR },
1170 { efXVG, "-msd", "msd", ffOPTWR }, { efXVG, "-cc", "coscont", ffOPTWR },
1171 { efXVG, "-dist", "distr", ffOPTWR }, { efXVG, "-av", "average", ffOPTWR },
1172 { efXVG, "-ee", "errest", ffOPTWR }, { efXVG, "-fitted", "fitted", ffOPTWR },
1173 { efLOG, "-g", "fitlog", ffOPTWR }
1175 #define NFILE asize(fnm)
1181 ppa = add_acf_pargs(&npargs, pa);
1183 if (!parse_common_args(
1184 &argc, argv, PCA_CAN_VIEW, NFILE, fnm, npargs, ppa, asize(desc), desc, 0, nullptr, &oenv))
1190 acfile = opt2fn_null("-ac", NFILE, fnm);
1191 msdfile = opt2fn_null("-msd", NFILE, fnm);
1192 ccfile = opt2fn_null("-cc", NFILE, fnm);
1193 distfile = opt2fn_null("-dist", NFILE, fnm);
1194 avfile = opt2fn_null("-av", NFILE, fnm);
1195 eefile = opt2fn_null("-ee", NFILE, fnm);
1196 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1198 fitfile = opt2fn("-g", NFILE, fnm);
1202 fitfile = opt2fn_null("-g", NFILE, fnm);
1205 val = read_xvg_time(opt2fn("-f", NFILE, fnm),
1207 opt2parg_bSet("-b", npargs, ppa),
1209 opt2parg_bSet("-e", npargs, ppa),
1216 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1220 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n", d, d);
1222 for (s = 0; s < nset; s++)
1224 for (i = 0; (i < n); i++)
1226 val[s][i] = (val[s][i + d] - val[s][i]) / (d * dt);
1235 printf("Calculating the integral using the trapezium rule\n");
1239 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1240 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1244 for (s = 0; s < nset; s++)
1246 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1247 printf("Integral %d %10.5f +/- %10.5f\n", s + 1, sum, stddev);
1252 if (fitfile != nullptr)
1254 print_fitted_function(
1255 fitfile, opt2fn_null("-fitted", NFILE, fnm), bXYdy, nset, n, t, val, npargs, ppa, oenv);
1258 printf(" std. dev. relative deviation of\n");
1259 printf(" standard --------- cumulants from those of\n");
1260 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1261 printf(" cum. 3 cum. 4\n");
1264 for (s = 0; (s < nset); s++)
1270 for (i = 0; (i < n); i++)
1275 for (i = 0; (i < n); i++)
1277 db = val[s][i] - cum1;
1279 cum3 += db * db * db;
1280 cum4 += db * db * db * db;
1286 sig[s] = std::sqrt(cum2);
1289 error = std::sqrt(cum2 / (n - 1));
1295 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1300 sig[s] != 0.0 ? cum3 / (sig[s] * sig[s] * sig[s] * std::sqrt(8 / M_PI)) : 0,
1301 sig[s] != 0.0 ? cum4 / (sig[s] * sig[s] * sig[s] * sig[s] * 3) - 1 : 0);
1305 if (filtlen != 0.0F)
1307 filter(filtlen, n, nset, val, dt);
1312 out = xvgropen(msdfile, "Mean square displacement", "time", "MSD (nm\\S2\\N)", oenv);
1313 nlast = static_cast<int>(n * frac);
1314 for (s = 0; s < nset; s++)
1316 for (j = 0; j <= nlast; j++)
1320 fprintf(stderr, "\r%d", j);
1324 for (i = 0; i < n - j; i++)
1326 tot += gmx::square(val[s][i] - val[s][i + j]);
1329 fprintf(out, " %g %8g\n", dt * j, tot);
1333 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1337 fprintf(stderr, "\r%d, time=%g\n", j - 1, (j - 1) * dt);
1342 plot_coscont(ccfile, n, nset, val, oenv);
1347 histogram(distfile, binwidth, n, nset, val, oenv);
1351 average(avfile, nenum(avbar_opt), n, nset, val, t);
1355 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt, bEeFitAc, bEESEF, bEENLC, oenv);
1359 power_fit(n, nset, val, t);
1362 if (acfile != nullptr)
1366 for (s = 0; s < nset; s++)
1368 for (i = 0; i < n; i++)
1374 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt, eacNormal, bAverCorr);
1379 regression_analysis(n, bXYdy, t, nset, val);
1384 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1387 view_all(oenv, NFILE, fnm);