2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/commandline/viewit.h"
45 #include "gromacs/correlationfunctions/autocorr.h"
46 #include "gromacs/correlationfunctions/expfit.h"
47 #include "gromacs/correlationfunctions/integrate.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/linearalgebra/matrix.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/statistics/statistics.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/pleasecite.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/snprintf.h"
63 /* must correspond to char *avbar_opt[] declared in main() */
74 static void power_fit(int n, int nset, real** val, real* t)
76 real *x, *y, quality, a, b, r;
84 for (i = 0; i < n; i++)
88 x[i] = std::log(t[i]);
95 "First time is not larger than 0, using index number as time for power fit\n");
96 for (i = 0; i < n; i++)
98 x[i] = std::log1p(static_cast<real>(i));
102 for (s = 0; s < nset; s++)
104 for (i = 0; i < n && val[s][i] >= 0; i++)
106 y[i] = std::log(val[s][i]);
110 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
112 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
113 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n", s + 1, quality, a, std::exp(b));
120 static real cosine_content(int nhp, int n, const real* y)
121 /* Assumes n equidistant points */
123 double fac, cosyint, yyint;
131 fac = M_PI * nhp / (n - 1);
135 for (i = 0; i < n; i++)
137 cosyint += std::cos(fac * i) * y[i];
138 yyint += y[i] * y[i];
141 return 2 * cosyint * cosyint / (n * yyint);
144 static void plot_coscont(const char* ccfile, int n, int nset, real** val, const gmx_output_env_t* oenv)
150 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content", oenv);
152 for (s = 0; s < nset; s++)
154 cc = cosine_content(s + 1, n, val[s]);
155 fprintf(fp, " %d %g\n", s + 1, cc);
156 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n", s + 1, 0.5 * (s + 1), cc);
158 fprintf(stdout, "\n");
163 static void regression_analysis(int n, gmx_bool bXYdy, real* x, int nset, real** val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
183 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
185 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
188 chi2 = gmx::square((n - 2) * S);
189 printf("Chi2 = %g\n", chi2);
190 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
191 printf("Correlation coefficient = %.1f%%\n", 100 * r);
195 printf("a = %g +/- %g\n", a, da);
196 printf("b = %g +/- %g\n", b, db);
200 printf("a = %g\n", a);
201 printf("b = %g\n", b);
206 double chi2, *a, **xx, *y;
211 for (j = 0; (j < nset - 1); j++)
215 for (i = 0; (i < n); i++)
218 for (j = 1; (j < nset); j++)
220 xx[j - 1][i] = val[j][i];
224 chi2 = multi_regression(nullptr, n, y, nset - 1, xx, a);
225 printf("Fitting %d data points in %d sets\n", n, nset - 1);
226 printf("chi2 = %g\n", chi2);
228 for (i = 0; (i < nset - 1); i++)
240 static void histogram(const char* distfile, real binwidth, int n, int nset, real** val, const gmx_output_env_t* oenv)
244 double minval, maxval;
250 for (s = 0; s < nset; s++)
252 for (i = 0; i < n; i++)
254 if (val[s][i] < minval)
258 else if (val[s][i] > maxval)
265 minval = binwidth * std::floor(minval / binwidth);
266 maxval = binwidth * std::ceil(maxval / binwidth);
273 nbin = gmx::roundToInt(((maxval - minval) / binwidth) + 1);
274 fprintf(stderr, "Making distributions with %d bins\n", nbin);
276 fp = xvgropen(distfile, "Distribution", "", "", oenv);
277 for (s = 0; s < nset; s++)
279 for (i = 0; i < nbin; i++)
283 for (i = 0; i < n; i++)
285 histo[gmx::roundToInt((val[s][i] - minval) / binwidth)]++;
287 for (i = 0; i < nbin; i++)
289 fprintf(fp, " %g %g\n", minval + i * binwidth, static_cast<double>(histo[i]) / (n * binwidth));
293 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
299 static int real_comp(const void* a, const void* b)
301 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
317 static void average(const char* avfile, int avbar_opt, int n, int nset, real** val, real* t)
324 fp = gmx_ffopen(avfile, "w");
325 if ((avbar_opt == avbarERROR) && (nset == 1))
327 avbar_opt = avbarNONE;
329 if (avbar_opt != avbarNONE)
331 if (avbar_opt == avbar90)
334 fprintf(fp, "@TYPE xydydy\n");
335 edge = gmx::roundToInt(nset * 0.05);
337 "Errorbars: discarding %d points on both sides: %d%%"
339 edge, gmx::roundToInt(100. * (nset - 2 * edge) / nset));
343 fprintf(fp, "@TYPE xydy\n");
347 for (i = 0; i < n; i++)
350 for (s = 0; s < nset; s++)
355 fprintf(fp, " %g %g", t[i], av);
357 if (avbar_opt != avbarNONE)
359 if (avbar_opt == avbar90)
361 for (s = 0; s < nset; s++)
365 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
366 fprintf(fp, " %g %g", tmp[nset - 1 - edge] - av, av - tmp[edge]);
370 for (s = 0; s < nset; s++)
372 var += gmx::square(val[s][i] - av);
374 if (avbar_opt == avbarSTDDEV)
376 err = std::sqrt(var / nset);
380 err = std::sqrt(var / (nset * (nset - 1)));
382 fprintf(fp, " %g", err);
389 if (avbar_opt == avbar90)
395 /*! \brief Compute final error estimate.
397 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
399 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
401 // When sigma is zero, the fitparm data can be uninitialized
406 double ss = fitparm[1] * fitparm[0] + (1 - fitparm[1]) * fitparm[2];
407 if ((tTotal <= 0) || (ss <= 0))
409 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n", tTotal, ss);
413 return sigma * std::sqrt(2 * ss / tTotal);
416 static void estimate_error(const char* eefile,
426 gmx_bool bSingleExpFit,
427 gmx_bool bAllowNegLTCorr,
428 const gmx_output_env_t* oenv)
431 int bs, prev_bs, nbs, nb;
436 real * tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
438 real ee, a, tau1, tau2;
442 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
447 fp = xvgropen(eefile, "Error estimates", "Block size (time)", "Error estimate", oenv);
448 if (output_env_get_print_xvgr_codes(oenv))
450 fprintf(fp, "@ subtitle \"using block averaging, total time %g (%d points)\"\n", (n - 1) * dt, n);
453 xvgr_legend(fp, 2 * nset, leg, oenv);
456 spacing = std::pow(2.0, 1.0 / resol);
460 for (s = 0; s < nset; s++)
467 bs = n / static_cast<int>(nbr);
472 for (i = 0; i < nb; i++)
475 for (j = 0; j < bs; j++)
477 blav += val[s][bs * i + j];
479 var += gmx::square(av[s] - blav / bs);
488 ybs[nbs] = var / (nb * (nb - 1.0)) * (n * dt) / (sig[s] * sig[s]);
507 for (i = 0; i < nbs / 2; i++)
510 tbs[i] = tbs[nbs - 1 - i];
511 tbs[nbs - 1 - i] = rtmp;
513 ybs[i] = ybs[nbs - 1 - i];
514 ybs[nbs - 1 - i] = rtmp;
516 /* The initial slope of the normalized ybs^2 is 1.
517 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
518 * From this we take our initial guess for tau1.
520 twooe = 2.0 / std::exp(1.0);
526 } while (i < nbs - 1 && (ybs[i] > ybs[i + 1] || ybs[i] > twooe * tau1_est));
531 "Data set %d has strange time correlations:\n"
532 "the std. error using single points is larger than that of blocks of 2 "
534 "The error estimate might be inaccurate, check the fit\n",
536 /* Use the total time as tau for the fitting weights */
537 tau_sig = (n - 1) * dt;
546 fprintf(debug, "set %d tau1 estimate %f\n", s + 1, tau1_est);
549 /* Generate more or less appropriate sigma's,
550 * also taking the density of points into account.
552 for (i = 0; i < nbs; i++)
556 dens = tbs[1] / tbs[0] - 1;
558 else if (i == nbs - 1)
560 dens = tbs[nbs - 1] / tbs[nbs - 2] - 1;
564 dens = 0.5 * (tbs[i + 1] / tbs[i - 1] - 1);
566 fitsig[i] = std::sqrt((tau_sig + tbs[i]) / dens);
571 fitparm[0] = tau1_est;
573 /* We set the initial guess for tau2
574 * to halfway between tau1_est and the total time (on log scale).
576 fitparm[2] = std::sqrt(tau1_est * (n - 1) * dt);
577 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
578 fitparm, 0, nullptr);
580 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
581 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n - 1) * dt)
585 if (fitparm[2] > (n - 1) * dt)
588 "Warning: tau2 is longer than the length of the data (%g)\n"
589 " the statistics might be bad\n",
594 fprintf(stdout, "a fitted parameter is negative\n");
596 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
597 optimal_error_estimate(sig[s], fitparm, n * dt), fitparm[1], fitparm[0],
599 /* Do a fit with tau2 fixed at the total time.
600 * One could also choose any other large value for tau2.
602 fitparm[0] = tau1_est;
604 fitparm[2] = (n - 1) * dt;
605 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
606 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
607 fitparm, 4, nullptr);
609 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0 || (fitparm[1] > 1 && !bAllowNegLTCorr))
613 fprintf(stdout, "a fitted parameter is negative\n");
614 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
615 optimal_error_estimate(sig[s], fitparm, n * dt), fitparm[1],
616 fitparm[0], fitparm[2]);
618 /* Do a single exponential fit */
619 fprintf(stderr, "Will use a single exponential fit for set %d\n", s + 1);
620 fitparm[0] = tau1_est;
623 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST,
624 fitparm, 6, nullptr);
627 ee = optimal_error_estimate(sig[s], fitparm, n * dt);
632 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n", s + 1, ee, a, tau1, tau2);
633 if (output_env_get_xvg_format(oenv) == exvgXMGR)
635 fprintf(fp, "@ legend string %d \"av %f\"\n", 2 * s, av[s]);
636 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2 * s + 1,
637 optimal_error_estimate(sig[s], fitparm, n * dt));
639 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
641 fprintf(fp, "@ s%d legend \"av %f\"\n", 2 * s, av[s]);
642 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2 * s + 1,
643 optimal_error_estimate(sig[s], fitparm, n * dt));
645 for (i = 0; i < nbs; i++)
647 fprintf(fp, "%g %g %g\n", tbs[i], sig[s] * std::sqrt(ybs[i] / (n * dt)),
648 sig[s] * std::sqrt(fit_function(effnERREST, fitparm, tbs[i]) / (n * dt)));
658 for (i = 0; i < n; i++)
660 ac[i] = val[s][i] - av[s];
663 fitsig[i] = std::sqrt(static_cast<real>(i));
670 low_do_autocorr(nullptr, oenv, nullptr, n, 1, -1, &ac, dt, eacNormal, 1, FALSE, TRUE,
671 FALSE, 0, 0, effnNONE);
675 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
677 for (i = 1; i <= fitlen / 2; i++)
683 /* Generate more or less appropriate sigma's */
684 for (i = 0; i <= fitlen; i++)
686 fitsig[i] = std::sqrt(acint + dt * i);
689 ac_fit[0] = 0.5 * acint;
691 ac_fit[2] = 10 * acint;
692 do_lmfit(n / nb_min, ac, fitsig, dt, nullptr, 0, fitlen * dt, oenv, bDebugMode(),
693 effnEXPEXP, ac_fit, 0, nullptr);
694 ac_fit[3] = 1 - ac_fit[1];
696 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n", s + 1,
697 optimal_error_estimate(sig[s], ac_fit, n * dt), ac_fit[1], ac_fit[0], ac_fit[2]);
699 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
700 for (i = 0; i < nbs; i++)
702 fprintf(fp, "%g %g\n", tbs[i],
703 sig[s] * std::sqrt(fit_function(effnERREST, ac_fit, tbs[i])) / (n * dt));
710 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
719 static void luzar_correl(int nn, real* time, int nset, real** val, real temp, gmx_bool bError, real fit_start)
725 please_cite(stdout, "Spoel2006b");
727 /* Compute negative derivative k(t) = -dc(t)/dt */
731 compute_derivative(nn, time, val[0], kt);
732 for (j = 0; (j < nn); j++)
739 for (j = 0; (j < nn); j++)
741 d2 += gmx::square(kt[j] - val[3][j]);
743 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2 / nn));
745 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start, temp);
750 analyse_corr(nn, time, val[0], val[2], val[4], val[1], val[3], val[5], fit_start, temp);
754 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
755 printf("Not doing anything. Sorry.\n");
759 static void filter(real flen, int n, int nset, real** val, real dt)
762 double *filt, sum, vf, fluc, fluctot;
764 f = static_cast<int>(flen / (2 * dt));
768 for (i = 1; i <= f; i++)
770 filt[i] = std::cos(M_PI * dt * i / flen);
773 for (i = 0; i <= f; i++)
777 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n - 2 * f);
778 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2 * f + 1);
780 for (s = 0; s < nset; s++)
783 for (i = f; i < n - f; i++)
785 vf = filt[0] * val[s][i];
786 for (j = 1; j <= f; j++)
788 vf += filt[j] * (val[s][i - f] + val[s][i + f]);
790 fluc += gmx::square(val[s][i] - vf);
794 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s + 1, std::sqrt(fluc));
796 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot / nset));
797 fprintf(stdout, "\n");
802 static void do_fit(FILE* out,
810 const gmx_output_env_t* oenv,
811 const char* fn_fitted)
813 real * c1 = nullptr, *sig = nullptr;
815 real tendfit, tbeginfit;
816 int i, efitfn, nparm;
818 efitfn = get_acffitfn();
819 nparm = effnNparams(efitfn);
820 fprintf(out, "Will fit to the following function:\n");
821 fprintf(out, "%s\n", effnDescription(efitfn));
827 fprintf(out, "Using two columns as y and sigma values\n");
833 if (opt2parg_bSet("-beginfit", npargs, ppa))
835 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
841 if (opt2parg_bSet("-endfit", npargs, ppa))
843 tendfit = opt2parg_real("-endfit", npargs, ppa);
847 tendfit = x0[ny - 1];
850 snew(fitparm, nparm);
853 case effnEXP1: fitparm[0] = 0.5; break;
860 fitparm[1] = 0.5 * c1[0];
864 fitparm[0] = fitparm[2] = 0.5 * c1[0];
870 fitparm[0] = fitparm[2] = fitparm[4] = 0.33 * c1[0];
877 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25 * c1[0];
885 fprintf(out, "Warning: don't know how to initialize the parameters\n");
886 for (i = 0; (i < nparm); i++)
891 fprintf(out, "Starting parameters:\n");
892 for (i = 0; (i < nparm); i++)
894 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
896 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit, oenv, bDebugMode(), efitfn, fitparm, 0, fn_fitted) > 0)
898 for (i = 0; (i < nparm); i++)
900 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
905 fprintf(out, "No solution was found\n");
909 static void print_fitted_function(const char* fitfile,
910 const char* fn_fitted,
918 gmx_output_env_t* oenv)
920 FILE* out_fit = gmx_ffopen(fitfile, "w");
921 if (bXYdy && nset >= 2)
923 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv, fn_fitted);
927 char* buf2 = nullptr;
929 if (nullptr != fn_fitted)
931 buflen = std::strlen(fn_fitted) + 32;
933 std::strncpy(buf2, fn_fitted, buflen);
934 buf2[std::strlen(buf2) - 4] = '\0';
936 for (s = 0; s < nset; s++)
939 if (nullptr != fn_fitted)
942 snprintf(buf, n, "%s_%d.xvg", buf2, s);
944 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
949 gmx_ffclose(out_fit);
952 int gmx_analyze(int argc, char* argv[])
954 static const char* desc[] = {
955 "[THISMODULE] reads an ASCII file and analyzes data sets.",
956 "A line in the input file may start with a time",
957 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
958 "Multiple sets can also be",
959 "read when they are separated by & (option [TT]-n[tt]);",
960 "in this case only one [IT]y[it]-value is read from each line.",
961 "All lines starting with # and @ are skipped.",
962 "All analyses can also be done for the derivative of a set",
963 "(option [TT]-d[tt]).[PAR]",
965 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
966 "points are equidistant in time.[PAR]",
968 "[THISMODULE] always shows the average and standard deviation of each",
969 "set, as well as the relative deviation of the third",
970 "and fourth cumulant from those of a Gaussian distribution with the same",
971 "standard deviation.[PAR]",
973 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
974 "Be sure that the time interval between data points is",
975 "much shorter than the time scale of the autocorrelation.[PAR]",
977 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
978 "i/2 periods. The formula is::",
980 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 ",
981 " / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
983 "This is useful for principal components obtained from covariance",
984 "analysis, since the principal components of random diffusion are",
985 "pure cosines.[PAR]",
987 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
989 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
991 "Option [TT]-av[tt] produces the average over the sets.",
992 "Error bars can be added with the option [TT]-errbar[tt].",
993 "The errorbars can represent the standard deviation, the error",
994 "(assuming the points are independent) or the interval containing",
995 "90% of the points, by discarding 5% of the points at the top and",
998 "Option [TT]-ee[tt] produces error estimates using block averaging.",
999 "A set is divided in a number of blocks and averages are calculated for",
1000 "each block. The error for the total average is calculated from",
1001 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1002 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1003 "These errors are plotted as a function of the block size.",
1004 "Also an analytical block average curve is plotted, assuming",
1005 "that the autocorrelation is a sum of two exponentials.",
1006 "The analytical curve for the block average is::",
1008 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ",
1009 " ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) ",
1010 " [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1011 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] ",
1012 " (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + ",
1013 " 1)))[sqrt][math],",
1015 "where T is the total time.",
1016 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are ",
1017 "obtained by fitting f^2(t) to error^2.",
1018 "When the actual block average is very close to the analytical curve,",
1019 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] ",
1020 "+ (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1021 "The complete derivation is given in",
1022 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1024 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1025 "of each set and over all sets with respect to a filtered average.",
1026 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1027 "to len/2. len is supplied with the option [TT]-filter[tt].",
1028 "This filter reduces oscillations with period len/2 and len by a factor",
1029 "of 0.79 and 0.33 respectively.[PAR]",
1031 "Option [TT]-g[tt] fits the data to the function given with option",
1032 "[TT]-fitfn[tt].[PAR]",
1034 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1035 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1036 "zero or with a negative value are ignored.[PAR]",
1038 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1039 "on output from [gmx-hbond]. The input file can be taken directly",
1040 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1041 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1042 "curves that make sense in the context of molecular dynamics, mainly",
1043 "exponential curves. More information is in the manual. To check the output",
1044 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1045 "original data and the fitted function to a new data file. The fitting",
1046 "parameters are stored as comment in the output file."
1048 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1049 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1050 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1051 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1052 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1053 static real temp = 298.15, fit_start = 1, fit_end = 60;
1055 /* must correspond to enum avbar* declared at beginning of file */
1056 static const char* avbar_opt[avbarNR + 1] = { nullptr, "none", "stddev", "error", "90", nullptr };
1059 { "-time", FALSE, etBOOL, { &bHaveT }, "Expect a time in the input" },
1060 { "-b", FALSE, etREAL, { &tb }, "First time to read from set" },
1061 { "-e", FALSE, etREAL, { &te }, "Last time to read from set" },
1062 { "-n", FALSE, etINT, { &nsets_in }, "Read this number of sets separated by &" },
1063 { "-d", FALSE, etBOOL, { &bDer }, "Use the derivative" },
1068 "HIDDENThe derivative is the difference over this number of points" },
1069 { "-bw", FALSE, etREAL, { &binwidth }, "Binwidth for the distribution" },
1070 { "-errbar", FALSE, etENUM, { avbar_opt }, "Error bars for [TT]-av[tt]" },
1075 "Integrate data function(s) numerically using trapezium rule" },
1076 { "-aver_start", FALSE, etREAL, { &aver_start }, "Start averaging the integral from here" },
1081 "Interpret second data set as error in the y values for integrating" },
1086 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set "
1087 "will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets "
1088 "are present a multilinear regression will be performed yielding the constant A that "
1089 "minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] "
1090 "x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data "
1091 "set in the input file and x[SUB]i[sub] the others. Do read the information at the "
1092 "option [TT]-time[tt]." },
1097 "Do a Luzar and Chandler analysis on a correlation function and "
1098 "related as produced by [gmx-hbond]. When in addition the "
1099 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1100 "interpreted as errors in c(t) and n(t)." },
1105 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1110 "Time (ps) from which to start fitting the correlation functions in order to obtain the "
1111 "forward and backward rate constants for HB breaking and formation" },
1116 "Time (ps) where to stop fitting the correlation functions in order to obtain the "
1117 "forward and backward rate constants for HB breaking and formation. Only with "
1123 "HIDDENMinimum number of blocks for block averaging" },
1128 "HIDDENResolution for the block averaging, block size increases with"
1129 " a factor 2^(1/resol)" },
1134 "HIDDENAlways use a single exponential fit for the error estimate" },
1135 { "-eenlc", FALSE, etBOOL, { &bEENLC }, "HIDDENAllow a negative long-time correlation" },
1140 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1145 "Print the high-frequency fluctuation after filtering with a cosine filter of this "
1147 { "-power", FALSE, etBOOL, { &bPower }, "Fit data to: b t^a" },
1148 { "-subav", FALSE, etBOOL, { &bSubAv }, "Subtract the average before autocorrelating" },
1149 { "-oneacf", FALSE, etBOOL, { &bAverCorr }, "Calculate one ACF over all sets" },
1151 #define NPA asize(pa)
1154 int n, nlast, s, nset, i, j = 0;
1155 real ** val, *t, dt, tot, error;
1156 double * av, *sig, cum1, cum2, cum3, cum4, db;
1157 const char * acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1158 gmx_output_env_t* oenv;
1161 { efXVG, "-f", "graph", ffREAD }, { efXVG, "-ac", "autocorr", ffOPTWR },
1162 { efXVG, "-msd", "msd", ffOPTWR }, { efXVG, "-cc", "coscont", ffOPTWR },
1163 { efXVG, "-dist", "distr", ffOPTWR }, { efXVG, "-av", "average", ffOPTWR },
1164 { efXVG, "-ee", "errest", ffOPTWR }, { efXVG, "-fitted", "fitted", ffOPTWR },
1165 { efLOG, "-g", "fitlog", ffOPTWR }
1167 #define NFILE asize(fnm)
1173 ppa = add_acf_pargs(&npargs, pa);
1175 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW, NFILE, fnm, npargs, ppa, asize(desc), desc, 0,
1182 acfile = opt2fn_null("-ac", NFILE, fnm);
1183 msdfile = opt2fn_null("-msd", NFILE, fnm);
1184 ccfile = opt2fn_null("-cc", NFILE, fnm);
1185 distfile = opt2fn_null("-dist", NFILE, fnm);
1186 avfile = opt2fn_null("-av", NFILE, fnm);
1187 eefile = opt2fn_null("-ee", NFILE, fnm);
1188 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1190 fitfile = opt2fn("-g", NFILE, fnm);
1194 fitfile = opt2fn_null("-g", NFILE, fnm);
1197 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT, opt2parg_bSet("-b", npargs, ppa), tb,
1198 opt2parg_bSet("-e", npargs, ppa), te, nsets_in, &nset, &n, &dt, &t);
1199 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1203 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n", d, d);
1205 for (s = 0; s < nset; s++)
1207 for (i = 0; (i < n); i++)
1209 val[s][i] = (val[s][i + d] - val[s][i]) / (d * dt);
1218 printf("Calculating the integral using the trapezium rule\n");
1222 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1223 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1227 for (s = 0; s < nset; s++)
1229 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1230 printf("Integral %d %10.5f +/- %10.5f\n", s + 1, sum, stddev);
1235 if (fitfile != nullptr)
1237 print_fitted_function(fitfile, opt2fn_null("-fitted", NFILE, fnm), bXYdy, nset, n, t, val,
1241 printf(" std. dev. relative deviation of\n");
1242 printf(" standard --------- cumulants from those of\n");
1243 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1244 printf(" cum. 3 cum. 4\n");
1247 for (s = 0; (s < nset); s++)
1253 for (i = 0; (i < n); i++)
1258 for (i = 0; (i < n); i++)
1260 db = val[s][i] - cum1;
1262 cum3 += db * db * db;
1263 cum4 += db * db * db * db;
1269 sig[s] = std::sqrt(cum2);
1272 error = std::sqrt(cum2 / (n - 1));
1278 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n", s + 1, av[s], sig[s], error,
1279 sig[s] != 0.0 ? cum3 / (sig[s] * sig[s] * sig[s] * std::sqrt(8 / M_PI)) : 0,
1280 sig[s] != 0.0 ? cum4 / (sig[s] * sig[s] * sig[s] * sig[s] * 3) - 1 : 0);
1284 if (filtlen != 0.0F)
1286 filter(filtlen, n, nset, val, dt);
1291 out = xvgropen(msdfile, "Mean square displacement", "time", "MSD (nm\\S2\\N)", oenv);
1292 nlast = static_cast<int>(n * frac);
1293 for (s = 0; s < nset; s++)
1295 for (j = 0; j <= nlast; j++)
1299 fprintf(stderr, "\r%d", j);
1303 for (i = 0; i < n - j; i++)
1305 tot += gmx::square(val[s][i] - val[s][i + j]);
1308 fprintf(out, " %g %8g\n", dt * j, tot);
1312 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1316 fprintf(stderr, "\r%d, time=%g\n", j - 1, (j - 1) * dt);
1321 plot_coscont(ccfile, n, nset, val, oenv);
1326 histogram(distfile, binwidth, n, nset, val, oenv);
1330 average(avfile, nenum(avbar_opt), n, nset, val, t);
1334 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt, bEeFitAc, bEESEF, bEENLC, oenv);
1338 power_fit(n, nset, val, t);
1341 if (acfile != nullptr)
1345 for (s = 0; s < nset; s++)
1347 for (i = 0; i < n; i++)
1353 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt, eacNormal, bAverCorr);
1358 regression_analysis(n, bXYdy, t, nset, val);
1363 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1366 view_all(oenv, NFILE, fnm);