2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/commandline/viewit.h"
46 #include "gromacs/correlationfunctions/autocorr.h"
47 #include "gromacs/correlationfunctions/expfit.h"
48 #include "gromacs/correlationfunctions/integrate.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/gmxana/gstat.h"
52 #include "gromacs/linearalgebra/matrix.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/units.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/statistics/statistics.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/pleasecite.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/snprintf.h"
65 /* must correspond to char *avbar_opt[] declared in main() */
76 static void power_fit(int n, int nset, real** val, real* t)
78 real *x, *y, quality, a, b, r;
86 for (i = 0; i < n; i++)
90 x[i] = std::log(t[i]);
97 "First time is not larger than 0, using index number as time for power fit\n");
98 for (i = 0; i < n; i++)
100 x[i] = std::log1p(static_cast<real>(i));
104 for (s = 0; s < nset; s++)
106 for (i = 0; i < n && val[s][i] >= 0; i++)
108 y[i] = std::log(val[s][i]);
112 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
114 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
115 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n", s + 1, quality, a, std::exp(b));
122 static real cosine_content(int nhp, int n, const real* y)
123 /* Assumes n equidistant points */
125 double fac, cosyint, yyint;
133 fac = M_PI * nhp / (n - 1);
137 for (i = 0; i < n; i++)
139 cosyint += std::cos(fac * i) * y[i];
140 yyint += y[i] * y[i];
143 return 2 * cosyint * cosyint / (n * yyint);
146 static void plot_coscont(const char* ccfile, int n, int nset, real** val, const gmx_output_env_t* oenv)
152 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content", oenv);
154 for (s = 0; s < nset; s++)
156 cc = cosine_content(s + 1, n, val[s]);
157 fprintf(fp, " %d %g\n", s + 1, cc);
158 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n", s + 1, 0.5 * (s + 1), cc);
160 fprintf(stdout, "\n");
165 static void regression_analysis(int n, gmx_bool bXYdy, real* x, int nset, real** val)
167 real S, chi2, a, b, da, db, r = 0;
170 if (bXYdy || (nset == 1))
172 printf("Fitting data to a function f(x) = ax + b\n");
173 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
174 printf("Error estimates will be given if w_i (sigma) values are given\n");
175 printf("(use option -xydy).\n\n");
178 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
180 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
185 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
187 gmx_fatal(FARGS, "Error fitting the data: %s", gmx_stats_message(ok));
190 chi2 = gmx::square((n - 2) * S);
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100 * r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset - 1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j - 1][i] = val[j][i];
226 chi2 = multi_regression(nullptr, n, y, nset - 1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset - 1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset - 1); i++)
242 static void histogram(const char* distfile, real binwidth, int n, int nset, real** val, const gmx_output_env_t* oenv)
246 double minval, maxval;
252 for (s = 0; s < nset; s++)
254 for (i = 0; i < n; i++)
256 if (val[s][i] < minval)
260 else if (val[s][i] > maxval)
267 minval = binwidth * std::floor(minval / binwidth);
268 maxval = binwidth * std::ceil(maxval / binwidth);
275 nbin = gmx::roundToInt(((maxval - minval) / binwidth) + 1);
276 fprintf(stderr, "Making distributions with %d bins\n", nbin);
278 fp = xvgropen(distfile, "Distribution", "", "", oenv);
279 for (s = 0; s < nset; s++)
281 for (i = 0; i < nbin; i++)
285 for (i = 0; i < n; i++)
287 histo[gmx::roundToInt((val[s][i] - minval) / binwidth)]++;
289 for (i = 0; i < nbin; i++)
291 fprintf(fp, " %g %g\n", minval + i * binwidth, static_cast<double>(histo[i]) / (n * binwidth));
295 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
301 static int real_comp(const void* a, const void* b)
303 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
319 static void average(const char* avfile, int avbar_opt, int n, int nset, real** val, real* t)
326 fp = gmx_ffopen(avfile, "w");
327 if ((avbar_opt == avbarERROR) && (nset == 1))
329 avbar_opt = avbarNONE;
331 if (avbar_opt != avbarNONE)
333 if (avbar_opt == avbar90)
336 fprintf(fp, "@TYPE xydydy\n");
337 edge = gmx::roundToInt(nset * 0.05);
339 "Errorbars: discarding %d points on both sides: %d%%"
342 gmx::roundToInt(100. * (nset - 2 * edge) / nset));
346 fprintf(fp, "@TYPE xydy\n");
350 for (i = 0; i < n; i++)
353 for (s = 0; s < nset; s++)
358 fprintf(fp, " %g %g", t[i], av);
360 if (avbar_opt != avbarNONE)
362 if (avbar_opt == avbar90)
364 for (s = 0; s < nset; s++)
368 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
369 fprintf(fp, " %g %g", tmp[nset - 1 - edge] - av, av - tmp[edge]);
373 for (s = 0; s < nset; s++)
375 var += gmx::square(val[s][i] - av);
377 if (avbar_opt == avbarSTDDEV)
379 err = std::sqrt(var / nset);
383 err = std::sqrt(var / (nset * (nset - 1)));
385 fprintf(fp, " %g", err);
392 if (avbar_opt == avbar90)
398 /*! \brief Compute final error estimate.
400 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
402 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
404 // When sigma is zero, the fitparm data can be uninitialized
409 double ss = fitparm[1] * fitparm[0] + (1 - fitparm[1]) * fitparm[2];
410 if ((tTotal <= 0) || (ss <= 0))
412 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n", tTotal, ss);
416 return sigma * std::sqrt(2 * ss / tTotal);
419 static void estimate_error(const char* eefile,
429 gmx_bool bSingleExpFit,
430 gmx_bool bAllowNegLTCorr,
431 const gmx_output_env_t* oenv)
434 int bs, prev_bs, nbs, nb;
439 real * tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
441 real ee, a, tau1, tau2;
445 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
450 fp = xvgropen(eefile, "Error estimates", "Block size (time)", "Error estimate", oenv);
451 if (output_env_get_print_xvgr_codes(oenv))
453 fprintf(fp, "@ subtitle \"using block averaging, total time %g (%d points)\"\n", (n - 1) * dt, n);
456 xvgr_legend(fp, 2 * nset, leg, oenv);
459 spacing = std::pow(2.0, 1.0 / resol);
463 for (s = 0; s < nset; s++)
470 bs = n / static_cast<int>(nbr);
475 for (i = 0; i < nb; i++)
478 for (j = 0; j < bs; j++)
480 blav += val[s][bs * i + j];
482 var += gmx::square(av[s] - blav / bs);
491 ybs[nbs] = var / (nb * (nb - 1.0)) * (n * dt) / (sig[s] * sig[s]);
510 for (i = 0; i < nbs / 2; i++)
513 tbs[i] = tbs[nbs - 1 - i];
514 tbs[nbs - 1 - i] = rtmp;
516 ybs[i] = ybs[nbs - 1 - i];
517 ybs[nbs - 1 - i] = rtmp;
519 /* The initial slope of the normalized ybs^2 is 1.
520 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
521 * From this we take our initial guess for tau1.
523 twooe = 2.0 / std::exp(1.0);
529 } while (i < nbs - 1 && (ybs[i] > ybs[i + 1] || ybs[i] > twooe * tau1_est));
534 "Data set %d has strange time correlations:\n"
535 "the std. error using single points is larger than that of blocks of 2 "
537 "The error estimate might be inaccurate, check the fit\n",
539 /* Use the total time as tau for the fitting weights */
540 tau_sig = (n - 1) * dt;
549 fprintf(debug, "set %d tau1 estimate %f\n", s + 1, tau1_est);
552 /* Generate more or less appropriate sigma's,
553 * also taking the density of points into account.
555 for (i = 0; i < nbs; i++)
559 dens = tbs[1] / tbs[0] - 1;
561 else if (i == nbs - 1)
563 dens = tbs[nbs - 1] / tbs[nbs - 2] - 1;
567 dens = 0.5 * (tbs[i + 1] / tbs[i - 1] - 1);
569 fitsig[i] = std::sqrt((tau_sig + tbs[i]) / dens);
574 fitparm[0] = tau1_est;
576 /* We set the initial guess for tau2
577 * to halfway between tau1_est and the total time (on log scale).
579 fitparm[2] = std::sqrt(tau1_est * (n - 1) * dt);
580 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 0, nullptr);
582 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
583 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n - 1) * dt)
587 if (fitparm[2] > (n - 1) * dt)
590 "Warning: tau2 is longer than the length of the data (%g)\n"
591 " the statistics might be bad\n",
596 fprintf(stdout, "a fitted parameter is negative\n");
599 "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
600 optimal_error_estimate(sig[s], fitparm, n * dt),
604 /* Do a fit with tau2 fixed at the total time.
605 * One could also choose any other large value for tau2.
607 fitparm[0] = tau1_est;
609 fitparm[2] = (n - 1) * dt;
610 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
611 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 4, nullptr);
613 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0 || (fitparm[1] > 1 && !bAllowNegLTCorr))
617 fprintf(stdout, "a fitted parameter is negative\n");
619 "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
620 optimal_error_estimate(sig[s], fitparm, n * dt),
625 /* Do a single exponential fit */
626 fprintf(stderr, "Will use a single exponential fit for set %d\n", s + 1);
627 fitparm[0] = tau1_est;
630 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt * n, oenv, bDebugMode(), effnERREST, fitparm, 6, nullptr);
633 ee = optimal_error_estimate(sig[s], fitparm, n * dt);
638 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n", s + 1, ee, a, tau1, tau2);
639 if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgr)
641 fprintf(fp, "@ legend string %d \"av %f\"\n", 2 * s, av[s]);
643 "@ legend string %d \"ee %6g\"\n",
645 optimal_error_estimate(sig[s], fitparm, n * dt));
647 else if (output_env_get_xvg_format(oenv) == XvgFormat::Xmgrace)
649 fprintf(fp, "@ s%d legend \"av %f\"\n", 2 * s, av[s]);
650 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2 * s + 1, optimal_error_estimate(sig[s], fitparm, n * dt));
652 for (i = 0; i < nbs; i++)
657 sig[s] * std::sqrt(ybs[i] / (n * dt)),
658 sig[s] * std::sqrt(fit_function(effnERREST, fitparm, tbs[i]) / (n * dt)));
668 for (i = 0; i < n; i++)
670 ac[i] = val[s][i] - av[s];
673 fitsig[i] = std::sqrt(static_cast<real>(i));
681 nullptr, oenv, nullptr, n, 1, -1, &ac, dt, eacNormal, 1, FALSE, TRUE, FALSE, 0, 0, effnNONE);
685 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
687 for (i = 1; i <= fitlen / 2; i++)
693 /* Generate more or less appropriate sigma's */
694 for (i = 0; i <= fitlen; i++)
696 fitsig[i] = std::sqrt(acint + dt * i);
699 ac_fit[0] = 0.5 * acint;
701 ac_fit[2] = 10 * acint;
702 do_lmfit(n / nb_min, ac, fitsig, dt, nullptr, 0, fitlen * dt, oenv, bDebugMode(), effnEXPEXP, ac_fit, 0, nullptr);
703 ac_fit[3] = 1 - ac_fit[1];
706 "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
708 optimal_error_estimate(sig[s], ac_fit, n * dt),
713 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
714 for (i = 0; i < nbs; i++)
719 sig[s] * std::sqrt(fit_function(effnERREST, ac_fit, tbs[i])) / (n * dt));
726 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
735 static void luzar_correl(int nn, real* time, int nset, real** val, real temp, gmx_bool bError, real fit_start)
741 please_cite(stdout, "Spoel2006b");
743 /* Compute negative derivative k(t) = -dc(t)/dt */
747 compute_derivative(nn, time, val[0], kt);
748 for (j = 0; (j < nn); j++)
755 for (j = 0; (j < nn); j++)
757 d2 += gmx::square(kt[j] - val[3][j]);
759 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2 / nn));
761 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start, temp);
766 analyse_corr(nn, time, val[0], val[2], val[4], val[1], val[3], val[5], fit_start, temp);
770 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
771 printf("Not doing anything. Sorry.\n");
775 static void filter(real flen, int n, int nset, real** val, real dt)
778 double *filt, sum, vf, fluc, fluctot;
780 f = static_cast<int>(flen / (2 * dt));
784 for (i = 1; i <= f; i++)
786 filt[i] = std::cos(M_PI * dt * i / flen);
789 for (i = 0; i <= f; i++)
793 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n - 2 * f);
794 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2 * f + 1);
796 for (s = 0; s < nset; s++)
799 for (i = f; i < n - f; i++)
801 vf = filt[0] * val[s][i];
802 for (j = 1; j <= f; j++)
804 vf += filt[j] * (val[s][i - f] + val[s][i + f]);
806 fluc += gmx::square(val[s][i] - vf);
810 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s + 1, std::sqrt(fluc));
812 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot / nset));
813 fprintf(stdout, "\n");
818 static void do_fit(FILE* out,
826 const gmx_output_env_t* oenv,
827 const char* fn_fitted)
829 real * c1 = nullptr, *sig = nullptr;
831 real tendfit, tbeginfit;
832 int i, efitfn, nparm;
834 efitfn = get_acffitfn();
835 nparm = effnNparams(efitfn);
836 fprintf(out, "Will fit to the following function:\n");
837 fprintf(out, "%s\n", effnDescription(efitfn));
843 fprintf(out, "Using two columns as y and sigma values\n");
849 if (opt2parg_bSet("-beginfit", npargs, ppa))
851 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
857 if (opt2parg_bSet("-endfit", npargs, ppa))
859 tendfit = opt2parg_real("-endfit", npargs, ppa);
863 tendfit = x0[ny - 1];
866 snew(fitparm, nparm);
869 case effnEXP1: fitparm[0] = 0.5; break;
876 fitparm[1] = 0.5 * c1[0];
880 fitparm[0] = fitparm[2] = 0.5 * c1[0];
886 fitparm[0] = fitparm[2] = fitparm[4] = 0.33 * c1[0];
893 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25 * c1[0];
901 fprintf(out, "Warning: don't know how to initialize the parameters\n");
902 for (i = 0; (i < nparm); i++)
907 fprintf(out, "Starting parameters:\n");
908 for (i = 0; (i < nparm); i++)
910 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
912 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit, oenv, bDebugMode(), efitfn, fitparm, 0, fn_fitted) > 0)
914 for (i = 0; (i < nparm); i++)
916 fprintf(out, "a%-2d = %12.5e\n", i + 1, fitparm[i]);
921 fprintf(out, "No solution was found\n");
925 static void print_fitted_function(const char* fitfile,
926 const char* fn_fitted,
934 gmx_output_env_t* oenv)
936 FILE* out_fit = gmx_ffopen(fitfile, "w");
937 if (bXYdy && nset >= 2)
939 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv, fn_fitted);
943 char* buf2 = nullptr;
945 if (nullptr != fn_fitted)
947 buflen = std::strlen(fn_fitted) + 32;
949 std::strncpy(buf2, fn_fitted, buflen);
950 buf2[std::strlen(buf2) - 4] = '\0';
952 for (s = 0; s < nset; s++)
955 if (nullptr != fn_fitted)
958 snprintf(buf, n, "%s_%d.xvg", buf2, s);
960 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
965 gmx_ffclose(out_fit);
968 int gmx_analyze(int argc, char* argv[])
970 static const char* desc[] = {
971 "[THISMODULE] reads an ASCII file and analyzes data sets.",
972 "A line in the input file may start with a time",
973 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
974 "Multiple sets can also be",
975 "read when they are separated by & (option [TT]-n[tt]);",
976 "in this case only one [IT]y[it]-value is read from each line.",
977 "All lines starting with # and @ are skipped.",
978 "All analyses can also be done for the derivative of a set",
979 "(option [TT]-d[tt]).[PAR]",
981 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
982 "points are equidistant in time.[PAR]",
984 "[THISMODULE] always shows the average and standard deviation of each",
985 "set, as well as the relative deviation of the third",
986 "and fourth cumulant from those of a Gaussian distribution with the same",
987 "standard deviation.[PAR]",
989 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
990 "Be sure that the time interval between data points is",
991 "much shorter than the time scale of the autocorrelation.[PAR]",
993 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
994 "i/2 periods. The formula is::",
996 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 ",
997 " / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
999 "This is useful for principal components obtained from covariance",
1000 "analysis, since the principal components of random diffusion are",
1001 "pure cosines.[PAR]",
1003 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1005 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1007 "Option [TT]-av[tt] produces the average over the sets.",
1008 "Error bars can be added with the option [TT]-errbar[tt].",
1009 "The errorbars can represent the standard deviation, the error",
1010 "(assuming the points are independent) or the interval containing",
1011 "90% of the points, by discarding 5% of the points at the top and",
1014 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1015 "A set is divided in a number of blocks and averages are calculated for",
1016 "each block. The error for the total average is calculated from",
1017 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1018 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1019 "These errors are plotted as a function of the block size.",
1020 "Also an analytical block average curve is plotted, assuming",
1021 "that the autocorrelation is a sum of two exponentials.",
1022 "The analytical curve for the block average is::",
1024 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ",
1025 " ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) ",
1026 " [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1027 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] ",
1028 " (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + ",
1029 " 1)))[sqrt][math],",
1031 "where T is the total time.",
1032 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are ",
1033 "obtained by fitting f^2(t) to error^2.",
1034 "When the actual block average is very close to the analytical curve,",
1035 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] ",
1036 "+ (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1037 "The complete derivation is given in",
1038 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1040 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1041 "of each set and over all sets with respect to a filtered average.",
1042 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1043 "to len/2. len is supplied with the option [TT]-filter[tt].",
1044 "This filter reduces oscillations with period len/2 and len by a factor",
1045 "of 0.79 and 0.33 respectively.[PAR]",
1047 "Option [TT]-g[tt] fits the data to the function given with option",
1048 "[TT]-fitfn[tt].[PAR]",
1050 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1051 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1052 "zero or with a negative value are ignored.[PAR]",
1054 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1055 "on output from [gmx-hbond]. The input file can be taken directly",
1056 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1057 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1058 "curves that make sense in the context of molecular dynamics, mainly",
1059 "exponential curves. More information is in the manual. To check the output",
1060 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1061 "original data and the fitted function to a new data file. The fitting",
1062 "parameters are stored as comment in the output file."
1064 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1065 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1066 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1067 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1068 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1069 static real temp = 298.15, fit_start = 1, fit_end = 60;
1071 /* must correspond to enum avbar* declared at beginning of file */
1072 static const char* avbar_opt[avbarNR + 1] = { nullptr, "none", "stddev", "error", "90", nullptr };
1075 { "-time", FALSE, etBOOL, { &bHaveT }, "Expect a time in the input" },
1076 { "-b", FALSE, etREAL, { &tb }, "First time to read from set" },
1077 { "-e", FALSE, etREAL, { &te }, "Last time to read from set" },
1078 { "-n", FALSE, etINT, { &nsets_in }, "Read this number of sets separated by &" },
1079 { "-d", FALSE, etBOOL, { &bDer }, "Use the derivative" },
1084 "HIDDENThe derivative is the difference over this number of points" },
1085 { "-bw", FALSE, etREAL, { &binwidth }, "Binwidth for the distribution" },
1086 { "-errbar", FALSE, etENUM, { avbar_opt }, "Error bars for [TT]-av[tt]" },
1091 "Integrate data function(s) numerically using trapezium rule" },
1092 { "-aver_start", FALSE, etREAL, { &aver_start }, "Start averaging the integral from here" },
1097 "Interpret second data set as error in the y values for integrating" },
1102 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set "
1103 "will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets "
1104 "are present a multilinear regression will be performed yielding the constant A that "
1105 "minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] "
1106 "x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data "
1107 "set in the input file and x[SUB]i[sub] the others. Do read the information at the "
1108 "option [TT]-time[tt]." },
1113 "Do a Luzar and Chandler analysis on a correlation function and "
1114 "related as produced by [gmx-hbond]. When in addition the "
1115 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1116 "interpreted as errors in c(t) and n(t)." },
1121 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1126 "Time (ps) from which to start fitting the correlation functions in order to obtain the "
1127 "forward and backward rate constants for HB breaking and formation" },
1132 "Time (ps) where to stop fitting the correlation functions in order to obtain the "
1133 "forward and backward rate constants for HB breaking and formation. Only with "
1139 "HIDDENMinimum number of blocks for block averaging" },
1144 "HIDDENResolution for the block averaging, block size increases with"
1145 " a factor 2^(1/resol)" },
1150 "HIDDENAlways use a single exponential fit for the error estimate" },
1151 { "-eenlc", FALSE, etBOOL, { &bEENLC }, "HIDDENAllow a negative long-time correlation" },
1156 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1161 "Print the high-frequency fluctuation after filtering with a cosine filter of this "
1163 { "-power", FALSE, etBOOL, { &bPower }, "Fit data to: b t^a" },
1164 { "-subav", FALSE, etBOOL, { &bSubAv }, "Subtract the average before autocorrelating" },
1165 { "-oneacf", FALSE, etBOOL, { &bAverCorr }, "Calculate one ACF over all sets" },
1167 #define NPA asize(pa)
1170 int n, nlast, s, nset, i, j = 0;
1171 real ** val, *t, dt, tot, error;
1172 double * av, *sig, cum1, cum2, cum3, cum4, db;
1173 const char * acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1174 gmx_output_env_t* oenv;
1177 { efXVG, "-f", "graph", ffREAD }, { efXVG, "-ac", "autocorr", ffOPTWR },
1178 { efXVG, "-msd", "msd", ffOPTWR }, { efXVG, "-cc", "coscont", ffOPTWR },
1179 { efXVG, "-dist", "distr", ffOPTWR }, { efXVG, "-av", "average", ffOPTWR },
1180 { efXVG, "-ee", "errest", ffOPTWR }, { efXVG, "-fitted", "fitted", ffOPTWR },
1181 { efLOG, "-g", "fitlog", ffOPTWR }
1183 #define NFILE asize(fnm)
1189 ppa = add_acf_pargs(&npargs, pa);
1191 if (!parse_common_args(
1192 &argc, argv, PCA_CAN_VIEW, NFILE, fnm, npargs, ppa, asize(desc), desc, 0, nullptr, &oenv))
1198 acfile = opt2fn_null("-ac", NFILE, fnm);
1199 msdfile = opt2fn_null("-msd", NFILE, fnm);
1200 ccfile = opt2fn_null("-cc", NFILE, fnm);
1201 distfile = opt2fn_null("-dist", NFILE, fnm);
1202 avfile = opt2fn_null("-av", NFILE, fnm);
1203 eefile = opt2fn_null("-ee", NFILE, fnm);
1204 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1206 fitfile = opt2fn("-g", NFILE, fnm);
1210 fitfile = opt2fn_null("-g", NFILE, fnm);
1213 val = read_xvg_time(opt2fn("-f", NFILE, fnm),
1215 opt2parg_bSet("-b", npargs, ppa),
1217 opt2parg_bSet("-e", npargs, ppa),
1224 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1228 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n", d, d);
1230 for (s = 0; s < nset; s++)
1232 for (i = 0; (i < n); i++)
1234 val[s][i] = (val[s][i + d] - val[s][i]) / (d * dt);
1243 printf("Calculating the integral using the trapezium rule\n");
1247 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1248 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1252 for (s = 0; s < nset; s++)
1254 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1255 printf("Integral %d %10.5f +/- %10.5f\n", s + 1, sum, stddev);
1260 if (fitfile != nullptr)
1262 print_fitted_function(
1263 fitfile, opt2fn_null("-fitted", NFILE, fnm), bXYdy, nset, n, t, val, npargs, ppa, oenv);
1266 printf(" std. dev. relative deviation of\n");
1267 printf(" standard --------- cumulants from those of\n");
1268 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1269 printf(" cum. 3 cum. 4\n");
1272 for (s = 0; (s < nset); s++)
1278 for (i = 0; (i < n); i++)
1283 for (i = 0; (i < n); i++)
1285 db = val[s][i] - cum1;
1287 cum3 += db * db * db;
1288 cum4 += db * db * db * db;
1294 sig[s] = std::sqrt(cum2);
1297 error = std::sqrt(cum2 / (n - 1));
1303 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1308 sig[s] != 0.0 ? cum3 / (sig[s] * sig[s] * sig[s] * std::sqrt(8 / M_PI)) : 0,
1309 sig[s] != 0.0 ? cum4 / (sig[s] * sig[s] * sig[s] * sig[s] * 3) - 1 : 0);
1313 if (filtlen != 0.0F)
1315 filter(filtlen, n, nset, val, dt);
1320 out = xvgropen(msdfile, "Mean square displacement", "time", "MSD (nm\\S2\\N)", oenv);
1321 nlast = static_cast<int>(n * frac);
1322 for (s = 0; s < nset; s++)
1324 for (j = 0; j <= nlast; j++)
1328 fprintf(stderr, "\r%d", j);
1332 for (i = 0; i < n - j; i++)
1334 tot += gmx::square(val[s][i] - val[s][i + j]);
1337 fprintf(out, " %g %8g\n", dt * j, tot);
1341 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1345 fprintf(stderr, "\r%d, time=%g\n", j - 1, (j - 1) * dt);
1350 plot_coscont(ccfile, n, nset, val, oenv);
1355 histogram(distfile, binwidth, n, nset, val, oenv);
1359 average(avfile, nenum(avbar_opt), n, nset, val, t);
1363 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt, bEeFitAc, bEESEF, bEENLC, oenv);
1367 power_fit(n, nset, val, t);
1370 if (acfile != nullptr)
1374 for (s = 0; s < nset; s++)
1376 for (i = 0; i < n; i++)
1382 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt, eacNormal, bAverCorr);
1387 regression_analysis(n, bXYdy, t, nset, val);
1392 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1395 view_all(oenv, NFILE, fnm);