2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/gmxana/gstat.h"
50 #include "gromacs/legacyheaders/copyrite.h"
51 #include "gromacs/legacyheaders/macros.h"
52 #include "gromacs/legacyheaders/readinp.h"
53 #include "gromacs/legacyheaders/txtdump.h"
54 #include "gromacs/legacyheaders/typedefs.h"
55 #include "gromacs/legacyheaders/viewit.h"
56 #include "gromacs/linearalgebra/matrix.h"
57 #include "gromacs/math/vec.h"
58 #include "gromacs/statistics/statistics.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/snprintf.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
83 x[i] = std::log(t[i]);
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
92 x[i] = gmx_log1p(static_cast<real>(i));
96 for (s = 0; s < nset; s++)
98 for (i = 0; i < n && val[s][i] >= 0; i++)
100 y[i] = std::log(val[s][i]);
104 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
106 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
107 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
108 s+1, quality, a, std::exp(b));
115 static real cosine_content(int nhp, int n, real *y)
116 /* Assumes n equidistant points */
118 double fac, cosyint, yyint;
126 fac = M_PI*nhp/(n-1);
130 for (i = 0; i < n; i++)
132 cosyint += std::cos(fac*i)*y[i];
136 return 2*cosyint*cosyint/(n*yyint);
139 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
140 const output_env_t oenv)
146 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
149 for (s = 0; s < nset; s++)
151 cc = cosine_content(s+1, n, val[s]);
152 fprintf(fp, " %d %g\n", s+1, cc);
153 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
156 fprintf(stdout, "\n");
161 static void regression_analysis(int n, gmx_bool bXYdy,
162 real *x, int nset, real **val)
164 real S, chi2, a, b, da, db, r = 0;
167 if (bXYdy || (nset == 1))
169 printf("Fitting data to a function f(x) = ax + b\n");
170 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
171 printf("Error estimates will be given if w_i (sigma) values are given\n");
172 printf("(use option -xydy).\n\n");
175 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
177 gmx_fatal(FARGS, "Error fitting the data: %s",
178 gmx_stats_message(ok));
183 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
185 gmx_fatal(FARGS, "Error fitting the data: %s",
186 gmx_stats_message(ok));
190 printf("Chi2 = %g\n", chi2);
191 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
192 printf("Correlation coefficient = %.1f%%\n", 100*r);
196 printf("a = %g +/- %g\n", a, da);
197 printf("b = %g +/- %g\n", b, db);
201 printf("a = %g\n", a);
202 printf("b = %g\n", b);
207 double chi2, *a, **xx, *y;
212 for (j = 0; (j < nset-1); j++)
216 for (i = 0; (i < n); i++)
219 for (j = 1; (j < nset); j++)
221 xx[j-1][i] = val[j][i];
225 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
226 printf("Fitting %d data points in %d sets\n", n, nset-1);
227 printf("chi2 = %g\n", chi2);
229 for (i = 0; (i < nset-1); i++)
241 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
242 const output_env_t oenv)
246 double minval, maxval;
252 for (s = 0; s < nset; s++)
254 for (i = 0; i < n; i++)
256 if (val[s][i] < minval)
260 else if (val[s][i] > maxval)
267 minval = binwidth*std::floor(minval/binwidth);
268 maxval = binwidth*std::ceil(maxval/binwidth);
275 nbin = static_cast<int>(((maxval - minval)/binwidth + 0.5) + 1);
276 fprintf(stderr, "Making distributions with %d bins\n", nbin);
278 fp = xvgropen(distfile, "Distribution", "", "", oenv);
279 for (s = 0; s < nset; s++)
281 for (i = 0; i < nbin; i++)
285 for (i = 0; i < n; i++)
287 histo[static_cast<int>((val[s][i] - minval)/binwidth + 0.5)]++;
289 for (i = 0; i < nbin; i++)
291 fprintf(fp, " %g %g\n", minval+i*binwidth, static_cast<double>(histo[i])/(n*binwidth));
295 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
301 static int real_comp(const void *a, const void *b)
303 real dif = *(real *)a - *(real *)b;
319 static void average(const char *avfile, int avbar_opt,
320 int n, int nset, real **val, real *t)
327 fp = gmx_ffopen(avfile, "w");
328 if ((avbar_opt == avbarERROR) && (nset == 1))
330 avbar_opt = avbarNONE;
332 if (avbar_opt != avbarNONE)
334 if (avbar_opt == avbar90)
337 fprintf(fp, "@TYPE xydydy\n");
338 edge = static_cast<int>(nset*0.05+0.5);
339 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
340 " interval\n", edge, static_cast<int>(100*(nset-2*edge)/nset+0.5));
344 fprintf(fp, "@TYPE xydy\n");
348 for (i = 0; i < n; i++)
351 for (s = 0; s < nset; s++)
356 fprintf(fp, " %g %g", t[i], av);
358 if (avbar_opt != avbarNONE)
360 if (avbar_opt == avbar90)
362 for (s = 0; s < nset; s++)
366 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
367 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
371 for (s = 0; s < nset; s++)
373 var += sqr(val[s][i]-av);
375 if (avbar_opt == avbarSTDDEV)
377 err = std::sqrt(var/nset);
381 err = std::sqrt(var/(nset*(nset-1)));
383 fprintf(fp, " %g", err);
390 if (avbar_opt == avbar90)
396 /*! \brief Compute final error estimate.
398 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
400 static real optimal_error_estimate(double sigma, double fitparm[], real tTotal)
402 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
403 if ((tTotal <= 0) || (ss <= 0))
405 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
410 return sigma*std::sqrt(2*ss/tTotal);
413 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
414 int nset, double *av, double *sig, real **val, real dt,
415 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
416 const output_env_t oenv)
419 int bs, prev_bs, nbs, nb;
424 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
426 real ee, a, tau1, tau2;
430 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
435 fp = xvgropen(eefile, "Error estimates",
436 "Block size (time)", "Error estimate", oenv);
437 if (output_env_get_print_xvgr_codes(oenv))
440 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
444 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
447 spacing = std::pow(2.0, 1.0/resol);
451 for (s = 0; s < nset; s++)
458 bs = n/static_cast<int>(nbr);
463 for (i = 0; i < nb; i++)
466 for (j = 0; j < bs; j++)
468 blav += val[s][bs*i+j];
470 var += sqr(av[s] - blav/bs);
479 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
495 for (i = 0; i < nbs/2; i++)
498 tbs[i] = tbs[nbs-1-i];
501 ybs[i] = ybs[nbs-1-i];
504 /* The initial slope of the normalized ybs^2 is 1.
505 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
506 * From this we take our initial guess for tau1.
508 twooe = 2.0/std::exp(1.0);
515 while (i < nbs - 1 &&
516 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
520 fprintf(stdout, "Data set %d has strange time correlations:\n"
521 "the std. error using single points is larger than that of blocks of 2 points\n"
522 "The error estimate might be inaccurate, check the fit\n",
524 /* Use the total time as tau for the fitting weights */
525 tau_sig = (n - 1)*dt;
534 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
537 /* Generate more or less appropriate sigma's,
538 * also taking the density of points into account.
540 for (i = 0; i < nbs; i++)
544 dens = tbs[1]/tbs[0] - 1;
548 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
552 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
554 fitsig[i] = std::sqrt((tau_sig + tbs[i])/dens);
559 fitparm[0] = tau1_est;
561 /* We set the initial guess for tau2
562 * to halfway between tau1_est and the total time (on log scale).
564 fitparm[2] = std::sqrt(tau1_est*(n-1)*dt);
565 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
566 bDebugMode(), effnERREST, fitparm, 0,
569 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
570 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
574 if (fitparm[2] > (n-1)*dt)
577 "Warning: tau2 is longer than the length of the data (%g)\n"
578 " the statistics might be bad\n",
583 fprintf(stdout, "a fitted parameter is negative\n");
585 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
586 optimal_error_estimate(sig[s], fitparm, n*dt),
587 fitparm[1], fitparm[0], fitparm[2]);
588 /* Do a fit with tau2 fixed at the total time.
589 * One could also choose any other large value for tau2.
591 fitparm[0] = tau1_est;
593 fitparm[2] = (n-1)*dt;
594 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
595 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
596 effnERREST, fitparm, 4, NULL);
598 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
599 || (fitparm[1] > 1 && !bAllowNegLTCorr))
603 fprintf(stdout, "a fitted parameter is negative\n");
604 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
605 optimal_error_estimate(sig[s], fitparm, n*dt),
606 fitparm[1], fitparm[0], fitparm[2]);
608 /* Do a single exponential fit */
609 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
610 fitparm[0] = tau1_est;
613 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
614 effnERREST, fitparm, 6, NULL);
617 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
622 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
623 s+1, ee, a, tau1, tau2);
624 if (output_env_get_xvg_format(oenv) == exvgXMGR)
626 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
627 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
628 optimal_error_estimate(sig[s], fitparm, n*dt));
630 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
632 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
633 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
634 optimal_error_estimate(sig[s], fitparm, n*dt));
636 for (i = 0; i < nbs; i++)
638 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*std::sqrt(ybs[i]/(n*dt)),
639 sig[s]*std::sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
649 for (i = 0; i < n; i++)
651 ac[i] = val[s][i] - av[s];
654 fitsig[i] = std::sqrt(static_cast<real>(i));
661 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
662 dt, eacNormal, 1, FALSE, TRUE,
663 FALSE, 0, 0, effnNONE);
667 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
669 for (i = 1; i <= fitlen/2; i++)
675 /* Generate more or less appropriate sigma's */
676 for (i = 0; i <= fitlen; i++)
678 fitsig[i] = std::sqrt(acint + dt*i);
681 ac_fit[0] = 0.5*acint;
683 ac_fit[2] = 10*acint;
684 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
685 bDebugMode(), effnEXPEXP, ac_fit, 0, NULL);
686 ac_fit[3] = 1 - ac_fit[1];
688 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
689 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
690 ac_fit[1], ac_fit[0], ac_fit[2]);
692 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
693 for (i = 0; i < nbs; i++)
695 fprintf(fp, "%g %g\n", tbs[i],
696 sig[s]*std::sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
703 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
712 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
713 gmx_bool bError, real fit_start)
719 please_cite(stdout, "Spoel2006b");
721 /* Compute negative derivative k(t) = -dc(t)/dt */
725 compute_derivative(nn, time, val[0], kt);
726 for (j = 0; (j < nn); j++)
733 for (j = 0; (j < nn); j++)
735 d2 += sqr(kt[j] - val[3][j]);
737 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2/nn));
739 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
745 analyse_corr(nn, time, val[0], val[2], val[4],
746 val[1], val[3], val[5], fit_start, temp);
750 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
751 printf("Not doing anything. Sorry.\n");
755 static void filter(real flen, int n, int nset, real **val, real dt)
758 double *filt, sum, vf, fluc, fluctot;
760 f = static_cast<int>(flen/(2*dt));
764 for (i = 1; i <= f; i++)
766 filt[i] = std::cos(M_PI*dt*i/flen);
769 for (i = 0; i <= f; i++)
773 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
774 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
776 for (s = 0; s < nset; s++)
779 for (i = f; i < n-f; i++)
781 vf = filt[0]*val[s][i];
782 for (j = 1; j <= f; j++)
784 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
786 fluc += sqr(val[s][i] - vf);
790 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, std::sqrt(fluc));
792 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot/nset));
793 fprintf(stdout, "\n");
798 static void do_fit(FILE *out, int n, gmx_bool bYdy,
799 int ny, real *x0, real **val,
800 int npargs, t_pargs *ppa, const output_env_t oenv,
801 const char *fn_fitted)
803 real *c1 = NULL, *sig = NULL;
805 real tendfit, tbeginfit;
806 int i, efitfn, nparm;
808 efitfn = get_acffitfn();
809 nparm = effnNparams(efitfn);
810 fprintf(out, "Will fit to the following function:\n");
811 fprintf(out, "%s\n", effnDescription(efitfn));
817 fprintf(out, "Using two columns as y and sigma values\n");
823 if (opt2parg_bSet("-beginfit", npargs, ppa))
825 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
831 if (opt2parg_bSet("-endfit", npargs, ppa))
833 tendfit = opt2parg_real("-endfit", npargs, ppa);
840 snew(fitparm, nparm);
852 fitparm[1] = 0.5*c1[0];
856 fitparm[0] = fitparm[2] = 0.5*c1[0];
862 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
869 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
877 fprintf(out, "Warning: don't know how to initialize the parameters\n");
878 for (i = 0; (i < nparm); i++)
883 fprintf(out, "Starting parameters:\n");
884 for (i = 0; (i < nparm); i++)
886 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
888 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
889 oenv, bDebugMode(), efitfn, fitparm, 0,
892 for (i = 0; (i < nparm); i++)
894 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
899 fprintf(out, "No solution was found\n");
903 static void print_fitted_function(const char *fitfile,
904 const char *fn_fitted,
914 FILE *out_fit = gmx_ffopen(fitfile, "w");
915 if (bXYdy && nset >= 2)
917 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
924 if (NULL != fn_fitted)
926 buflen = std::strlen(fn_fitted)+32;
928 std::strncpy(buf2, fn_fitted, buflen);
929 buf2[std::strlen(buf2)-4] = '\0';
931 for (s = 0; s < nset; s++)
934 if (NULL != fn_fitted)
937 snprintf(buf, n, "%s_%d.xvg", buf2, s);
939 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
944 gmx_ffclose(out_fit);
947 int gmx_analyze(int argc, char *argv[])
949 static const char *desc[] = {
950 "[THISMODULE] reads an ASCII file and analyzes data sets.",
951 "A line in the input file may start with a time",
952 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
953 "Multiple sets can also be",
954 "read when they are separated by & (option [TT]-n[tt]);",
955 "in this case only one [IT]y[it]-value is read from each line.",
956 "All lines starting with # and @ are skipped.",
957 "All analyses can also be done for the derivative of a set",
958 "(option [TT]-d[tt]).[PAR]",
960 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
961 "points are equidistant in time.[PAR]",
963 "[THISMODULE] always shows the average and standard deviation of each",
964 "set, as well as the relative deviation of the third",
965 "and fourth cumulant from those of a Gaussian distribution with the same",
966 "standard deviation.[PAR]",
968 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
969 "Be sure that the time interval between data points is",
970 "much shorter than the time scale of the autocorrelation.[PAR]",
972 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
973 "i/2 periods. The formula is::",
975 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
977 "This is useful for principal components obtained from covariance",
978 "analysis, since the principal components of random diffusion are",
979 "pure cosines.[PAR]",
981 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
983 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
985 "Option [TT]-av[tt] produces the average over the sets.",
986 "Error bars can be added with the option [TT]-errbar[tt].",
987 "The errorbars can represent the standard deviation, the error",
988 "(assuming the points are independent) or the interval containing",
989 "90% of the points, by discarding 5% of the points at the top and",
992 "Option [TT]-ee[tt] produces error estimates using block averaging.",
993 "A set is divided in a number of blocks and averages are calculated for",
994 "each block. The error for the total average is calculated from",
995 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
996 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
997 "These errors are plotted as a function of the block size.",
998 "Also an analytical block average curve is plotted, assuming",
999 "that the autocorrelation is a sum of two exponentials.",
1000 "The analytical curve for the block average is::",
1002 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1003 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1005 "where T is the total time.",
1006 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1007 "When the actual block average is very close to the analytical curve,",
1008 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1009 "The complete derivation is given in",
1010 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1012 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1013 "of each set and over all sets with respect to a filtered average.",
1014 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1015 "to len/2. len is supplied with the option [TT]-filter[tt].",
1016 "This filter reduces oscillations with period len/2 and len by a factor",
1017 "of 0.79 and 0.33 respectively.[PAR]",
1019 "Option [TT]-g[tt] fits the data to the function given with option",
1020 "[TT]-fitfn[tt].[PAR]",
1022 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1023 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1024 "zero or with a negative value are ignored.[PAR]"
1026 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1027 "on output from [gmx-hbond]. The input file can be taken directly",
1028 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1029 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1030 "curves that make sense in the context of molecular dynamics, mainly",
1031 "exponential curves. More information is in the manual. To check the output",
1032 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1033 "original data and the fitted function to a new data file. The fitting",
1034 "parameters are stored as comment in the output file."
1036 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1037 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1038 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1039 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1040 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1041 static real temp = 298.15, fit_start = 1, fit_end = 60;
1043 /* must correspond to enum avbar* declared at beginning of file */
1044 static const char *avbar_opt[avbarNR+1] = {
1045 NULL, "none", "stddev", "error", "90", NULL
1049 { "-time", FALSE, etBOOL, {&bHaveT},
1050 "Expect a time in the input" },
1051 { "-b", FALSE, etREAL, {&tb},
1052 "First time to read from set" },
1053 { "-e", FALSE, etREAL, {&te},
1054 "Last time to read from set" },
1055 { "-n", FALSE, etINT, {&nsets_in},
1056 "Read this number of sets separated by &" },
1057 { "-d", FALSE, etBOOL, {&bDer},
1058 "Use the derivative" },
1059 { "-dp", FALSE, etINT, {&d},
1060 "HIDDENThe derivative is the difference over this number of points" },
1061 { "-bw", FALSE, etREAL, {&binwidth},
1062 "Binwidth for the distribution" },
1063 { "-errbar", FALSE, etENUM, {avbar_opt},
1064 "Error bars for [TT]-av[tt]" },
1065 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1066 "Integrate data function(s) numerically using trapezium rule" },
1067 { "-aver_start", FALSE, etREAL, {&aver_start},
1068 "Start averaging the integral from here" },
1069 { "-xydy", FALSE, etBOOL, {&bXYdy},
1070 "Interpret second data set as error in the y values for integrating" },
1071 { "-regression", FALSE, etBOOL, {&bRegression},
1072 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1073 { "-luzar", FALSE, etBOOL, {&bLuzar},
1074 "Do a Luzar and Chandler analysis on a correlation function and "
1075 "related as produced by [gmx-hbond]. When in addition the "
1076 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1077 "interpreted as errors in c(t) and n(t)." },
1078 { "-temp", FALSE, etREAL, {&temp},
1079 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1080 { "-fitstart", FALSE, etREAL, {&fit_start},
1081 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1082 { "-fitend", FALSE, etREAL, {&fit_end},
1083 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1084 { "-nbmin", FALSE, etINT, {&nb_min},
1085 "HIDDENMinimum number of blocks for block averaging" },
1086 { "-resol", FALSE, etINT, {&resol},
1087 "HIDDENResolution for the block averaging, block size increases with"
1088 " a factor 2^(1/resol)" },
1089 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1090 "HIDDENAlways use a single exponential fit for the error estimate" },
1091 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1092 "HIDDENAllow a negative long-time correlation" },
1093 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1094 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1095 { "-filter", FALSE, etREAL, {&filtlen},
1096 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1097 { "-power", FALSE, etBOOL, {&bPower},
1098 "Fit data to: b t^a" },
1099 { "-subav", FALSE, etBOOL, {&bSubAv},
1100 "Subtract the average before autocorrelating" },
1101 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1102 "Calculate one ACF over all sets" },
1104 #define NPA asize(pa)
1107 int n, nlast, s, nset, i, j = 0;
1108 real **val, *t, dt, tot, error;
1109 double *av, *sig, cum1, cum2, cum3, cum4, db;
1110 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1114 { efXVG, "-f", "graph", ffREAD },
1115 { efXVG, "-ac", "autocorr", ffOPTWR },
1116 { efXVG, "-msd", "msd", ffOPTWR },
1117 { efXVG, "-cc", "coscont", ffOPTWR },
1118 { efXVG, "-dist", "distr", ffOPTWR },
1119 { efXVG, "-av", "average", ffOPTWR },
1120 { efXVG, "-ee", "errest", ffOPTWR },
1121 { efXVG, "-fitted", "fitted", ffOPTWR },
1122 { efLOG, "-g", "fitlog", ffOPTWR }
1124 #define NFILE asize(fnm)
1130 ppa = add_acf_pargs(&npargs, pa);
1132 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1133 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1138 acfile = opt2fn_null("-ac", NFILE, fnm);
1139 msdfile = opt2fn_null("-msd", NFILE, fnm);
1140 ccfile = opt2fn_null("-cc", NFILE, fnm);
1141 distfile = opt2fn_null("-dist", NFILE, fnm);
1142 avfile = opt2fn_null("-av", NFILE, fnm);
1143 eefile = opt2fn_null("-ee", NFILE, fnm);
1144 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1146 fitfile = opt2fn("-g", NFILE, fnm);
1150 fitfile = opt2fn_null("-g", NFILE, fnm);
1153 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1154 opt2parg_bSet("-b", npargs, ppa), tb,
1155 opt2parg_bSet("-e", npargs, ppa), te,
1156 nsets_in, &nset, &n, &dt, &t);
1157 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1161 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1164 for (s = 0; s < nset; s++)
1166 for (i = 0; (i < n); i++)
1168 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1177 printf("Calculating the integral using the trapezium rule\n");
1181 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1182 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1186 for (s = 0; s < nset; s++)
1188 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1189 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1194 if (fitfile != NULL)
1196 print_fitted_function(fitfile,
1197 opt2fn_null("-fitted", NFILE, fnm),
1204 printf(" std. dev. relative deviation of\n");
1205 printf(" standard --------- cumulants from those of\n");
1206 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1207 printf(" cum. 3 cum. 4\n");
1210 for (s = 0; (s < nset); s++)
1216 for (i = 0; (i < n); i++)
1221 for (i = 0; (i < n); i++)
1223 db = val[s][i]-cum1;
1226 cum4 += db*db*db*db;
1232 sig[s] = std::sqrt(cum2);
1235 error = std::sqrt(cum2/(n-1));
1241 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1242 s+1, av[s], sig[s], error,
1243 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*std::sqrt(8/M_PI)) : 0,
1244 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1250 filter(filtlen, n, nset, val, dt);
1255 out = xvgropen(msdfile, "Mean square displacement",
1256 "time", "MSD (nm\\S2\\N)", oenv);
1257 nlast = static_cast<int>(n*frac);
1258 for (s = 0; s < nset; s++)
1260 for (j = 0; j <= nlast; j++)
1264 fprintf(stderr, "\r%d", j);
1267 for (i = 0; i < n-j; i++)
1269 tot += sqr(val[s][i]-val[s][i+j]);
1272 fprintf(out, " %g %8g\n", dt*j, tot);
1276 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1280 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1284 plot_coscont(ccfile, n, nset, val, oenv);
1289 histogram(distfile, binwidth, n, nset, val, oenv);
1293 average(avfile, nenum(avbar_opt), n, nset, val, t);
1297 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1298 bEeFitAc, bEESEF, bEENLC, oenv);
1302 power_fit(n, nset, val, t);
1309 for (s = 0; s < nset; s++)
1311 for (i = 0; i < n; i++)
1317 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1318 eacNormal, bAverCorr);
1323 regression_analysis(n, bXYdy, t, nset, val);
1328 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1331 view_all(oenv, NFILE, fnm);