2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/gmxana/gstat.h"
50 #include "gromacs/legacyheaders/copyrite.h"
51 #include "gromacs/legacyheaders/macros.h"
52 #include "gromacs/legacyheaders/readinp.h"
53 #include "gromacs/legacyheaders/txtdump.h"
54 #include "gromacs/legacyheaders/typedefs.h"
55 #include "gromacs/legacyheaders/viewit.h"
56 #include "gromacs/linearalgebra/matrix.h"
57 #include "gromacs/math/vec.h"
58 #include "gromacs/statistics/statistics.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/snprintf.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
96 for (s = 0; s < nset; s++)
99 for (i = 0; i < n && val[s][i] >= 0; i++)
101 y[i] = log(val[s][i]);
105 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
107 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
108 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
109 s+1, quality, a, exp(b));
116 static real cosine_content(int nhp, int n, real *y)
117 /* Assumes n equidistant points */
119 double fac, cosyint, yyint;
127 fac = M_PI*nhp/(n-1);
131 for (i = 0; i < n; i++)
133 cosyint += cos(fac*i)*y[i];
137 return 2*cosyint*cosyint/(n*yyint);
140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
141 const output_env_t oenv)
147 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
150 for (s = 0; s < nset; s++)
152 cc = cosine_content(s+1, n, val[s]);
153 fprintf(fp, " %d %g\n", s+1, cc);
154 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
157 fprintf(stdout, "\n");
162 static void regression_analysis(int n, gmx_bool bXYdy,
163 real *x, int nset, real **val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s",
179 gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s",
187 gmx_stats_message(ok));
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100*r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset-1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j-1][i] = val[j][i];
226 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset-1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset-1); i++)
242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
243 const output_env_t oenv)
253 for (s = 0; s < nset; s++)
255 for (i = 0; i < n; i++)
261 else if (val[s][i] > max)
268 min = binwidth*floor(min/binwidth);
269 max = binwidth*ceil(max/binwidth);
276 nbin = (int)((max - min)/binwidth + 0.5) + 1;
277 fprintf(stderr, "Making distributions with %d bins\n", nbin);
279 fp = xvgropen(distfile, "Distribution", "", "", oenv);
280 for (s = 0; s < nset; s++)
282 for (i = 0; i < nbin; i++)
286 for (i = 0; i < n; i++)
288 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
290 for (i = 0; i < nbin; i++)
292 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
296 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
302 static int real_comp(const void *a, const void *b)
304 real dif = *(real *)a - *(real *)b;
320 static void average(const char *avfile, int avbar_opt,
321 int n, int nset, real **val, real *t)
328 fp = gmx_ffopen(avfile, "w");
329 if ((avbar_opt == avbarERROR) && (nset == 1))
331 avbar_opt = avbarNONE;
333 if (avbar_opt != avbarNONE)
335 if (avbar_opt == avbar90)
338 fprintf(fp, "@TYPE xydydy\n");
339 edge = (int)(nset*0.05+0.5);
340 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
341 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
345 fprintf(fp, "@TYPE xydy\n");
349 for (i = 0; i < n; i++)
352 for (s = 0; s < nset; s++)
357 fprintf(fp, " %g %g", t[i], av);
359 if (avbar_opt != avbarNONE)
361 if (avbar_opt == avbar90)
363 for (s = 0; s < nset; s++)
367 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
368 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
372 for (s = 0; s < nset; s++)
374 var += sqr(val[s][i]-av);
376 if (avbar_opt == avbarSTDDEV)
378 err = sqrt(var/nset);
382 err = sqrt(var/(nset*(nset-1)));
384 fprintf(fp, " %g", err);
391 if (avbar_opt == avbar90)
397 /*! \brief Compute final error estimate.
399 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
401 static real optimal_error_estimate(double sigma, double fitparm[], real tTotal)
403 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
404 if ((tTotal <= 0) || (ss <= 0))
406 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
411 return sigma*sqrt(2*ss/tTotal);
414 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
415 int nset, double *av, double *sig, real **val, real dt,
416 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
417 const output_env_t oenv)
420 int bs, prev_bs, nbs, nb;
425 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
427 real ee, a, tau1, tau2;
431 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
436 fp = xvgropen(eefile, "Error estimates",
437 "Block size (time)", "Error estimate", oenv);
438 if (output_env_get_print_xvgr_codes(oenv))
441 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
445 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
448 spacing = pow(2, 1.0/resol);
452 for (s = 0; s < nset; s++)
464 for (i = 0; i < nb; i++)
467 for (j = 0; j < bs; j++)
469 blav += val[s][bs*i+j];
471 var += sqr(av[s] - blav/bs);
480 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
497 for (i = 0; i < nbs/2; i++)
500 tbs[i] = tbs[nbs-1-i];
503 ybs[i] = ybs[nbs-1-i];
506 /* The initial slope of the normalized ybs^2 is 1.
507 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
508 * From this we take our initial guess for tau1.
517 while (i < nbs - 1 &&
518 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
522 fprintf(stdout, "Data set %d has strange time correlations:\n"
523 "the std. error using single points is larger than that of blocks of 2 points\n"
524 "The error estimate might be inaccurate, check the fit\n",
526 /* Use the total time as tau for the fitting weights */
527 tau_sig = (n - 1)*dt;
536 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
539 /* Generate more or less appropriate sigma's,
540 * also taking the density of points into account.
542 for (i = 0; i < nbs; i++)
546 dens = tbs[1]/tbs[0] - 1;
550 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
554 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
556 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
561 fitparm[0] = tau1_est;
563 /* We set the initial guess for tau2
564 * to halfway between tau1_est and the total time (on log scale).
566 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
567 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
568 bDebugMode(), effnERREST, fitparm, 0,
571 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
572 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
576 if (fitparm[2] > (n-1)*dt)
579 "Warning: tau2 is longer than the length of the data (%g)\n"
580 " the statistics might be bad\n",
585 fprintf(stdout, "a fitted parameter is negative\n");
587 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
588 optimal_error_estimate(sig[s], fitparm, n*dt),
589 fitparm[1], fitparm[0], fitparm[2]);
590 /* Do a fit with tau2 fixed at the total time.
591 * One could also choose any other large value for tau2.
593 fitparm[0] = tau1_est;
595 fitparm[2] = (n-1)*dt;
596 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
597 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
598 effnERREST, fitparm, 4, NULL);
600 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
601 || (fitparm[1] > 1 && !bAllowNegLTCorr))
605 fprintf(stdout, "a fitted parameter is negative\n");
606 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
607 optimal_error_estimate(sig[s], fitparm, n*dt),
608 fitparm[1], fitparm[0], fitparm[2]);
610 /* Do a single exponential fit */
611 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
612 fitparm[0] = tau1_est;
615 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
616 effnERREST, fitparm, 6, NULL);
619 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
624 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
625 s+1, ee, a, tau1, tau2);
626 if (output_env_get_xvg_format(oenv) == exvgXMGR)
628 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
629 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
630 optimal_error_estimate(sig[s], fitparm, n*dt));
632 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
634 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
635 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
636 optimal_error_estimate(sig[s], fitparm, n*dt));
638 for (i = 0; i < nbs; i++)
640 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
641 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
651 for (i = 0; i < n; i++)
653 ac[i] = val[s][i] - av[s];
663 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
664 dt, eacNormal, 1, FALSE, TRUE,
665 FALSE, 0, 0, effnNONE);
669 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
671 for (i = 1; i <= fitlen/2; i++)
677 /* Generate more or less appropriate sigma's */
678 for (i = 0; i <= fitlen; i++)
680 fitsig[i] = sqrt(acint + dt*i);
683 ac_fit[0] = 0.5*acint;
685 ac_fit[2] = 10*acint;
686 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
687 bDebugMode(), effnEXPEXP, ac_fit, 0, NULL);
688 ac_fit[3] = 1 - ac_fit[1];
690 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
691 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
692 ac_fit[1], ac_fit[0], ac_fit[2]);
694 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
695 for (i = 0; i < nbs; i++)
697 fprintf(fp, "%g %g\n", tbs[i],
698 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
705 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
714 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
715 gmx_bool bError, real fit_start)
717 const real tol = 1e-8;
722 please_cite(stdout, "Spoel2006b");
724 /* Compute negative derivative k(t) = -dc(t)/dt */
728 compute_derivative(nn, time, val[0], kt);
729 for (j = 0; (j < nn); j++)
736 for (j = 0; (j < nn); j++)
738 d2 += sqr(kt[j] - val[3][j]);
740 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
742 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
748 analyse_corr(nn, time, val[0], val[2], val[4],
749 val[1], val[3], val[5], fit_start, temp);
753 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
754 printf("Not doing anything. Sorry.\n");
758 static void filter(real flen, int n, int nset, real **val, real dt)
761 double *filt, sum, vf, fluc, fluctot;
763 f = (int)(flen/(2*dt));
767 for (i = 1; i <= f; i++)
769 filt[i] = cos(M_PI*dt*i/flen);
772 for (i = 0; i <= f; i++)
776 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
777 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
779 for (s = 0; s < nset; s++)
782 for (i = f; i < n-f; i++)
784 vf = filt[0]*val[s][i];
785 for (j = 1; j <= f; j++)
787 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
789 fluc += sqr(val[s][i] - vf);
793 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
795 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
796 fprintf(stdout, "\n");
801 static void do_fit(FILE *out, int n, gmx_bool bYdy,
802 int ny, real *x0, real **val,
803 int npargs, t_pargs *ppa, const output_env_t oenv,
804 const char *fn_fitted)
806 real *c1 = NULL, *sig = NULL;
808 real tendfit, tbeginfit;
809 int i, efitfn, nparm;
811 efitfn = get_acffitfn();
812 nparm = effnNparams(efitfn);
813 fprintf(out, "Will fit to the following function:\n");
814 fprintf(out, "%s\n", effnDescription(efitfn));
820 fprintf(out, "Using two columns as y and sigma values\n");
826 if (opt2parg_bSet("-beginfit", npargs, ppa))
828 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
834 if (opt2parg_bSet("-endfit", npargs, ppa))
836 tendfit = opt2parg_real("-endfit", npargs, ppa);
843 snew(fitparm, nparm);
855 fitparm[1] = 0.5*c1[0];
859 fitparm[0] = fitparm[2] = 0.5*c1[0];
865 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
872 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
880 fprintf(out, "Warning: don't know how to initialize the parameters\n");
881 for (i = 0; (i < nparm); i++)
886 fprintf(out, "Starting parameters:\n");
887 for (i = 0; (i < nparm); i++)
889 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
891 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
892 oenv, bDebugMode(), efitfn, fitparm, 0,
895 for (i = 0; (i < nparm); i++)
897 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
902 fprintf(out, "No solution was found\n");
906 static void print_fitted_function(const char *fitfile,
907 const char *fn_fitted,
917 FILE *out_fit = gmx_ffopen(fitfile, "w");
918 if (bXYdy && nset >= 2)
920 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
927 if (NULL != fn_fitted)
929 buflen = strlen(fn_fitted)+32;
931 strncpy(buf2, fn_fitted, buflen);
932 buf2[strlen(buf2)-4] = '\0';
934 for (s = 0; s < nset; s++)
937 if (NULL != fn_fitted)
940 snprintf(buf, n, "%s_%d.xvg", buf2, s);
942 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
947 gmx_ffclose(out_fit);
950 int gmx_analyze(int argc, char *argv[])
952 static const char *desc[] = {
953 "[THISMODULE] reads an ASCII file and analyzes data sets.",
954 "A line in the input file may start with a time",
955 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
956 "Multiple sets can also be",
957 "read when they are separated by & (option [TT]-n[tt]);",
958 "in this case only one [IT]y[it]-value is read from each line.",
959 "All lines starting with # and @ are skipped.",
960 "All analyses can also be done for the derivative of a set",
961 "(option [TT]-d[tt]).[PAR]",
963 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
964 "points are equidistant in time.[PAR]",
966 "[THISMODULE] always shows the average and standard deviation of each",
967 "set, as well as the relative deviation of the third",
968 "and fourth cumulant from those of a Gaussian distribution with the same",
969 "standard deviation.[PAR]",
971 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
972 "Be sure that the time interval between data points is",
973 "much shorter than the time scale of the autocorrelation.[PAR]",
975 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
976 "i/2 periods. The formula is::",
978 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
980 "This is useful for principal components obtained from covariance",
981 "analysis, since the principal components of random diffusion are",
982 "pure cosines.[PAR]",
984 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
986 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
988 "Option [TT]-av[tt] produces the average over the sets.",
989 "Error bars can be added with the option [TT]-errbar[tt].",
990 "The errorbars can represent the standard deviation, the error",
991 "(assuming the points are independent) or the interval containing",
992 "90% of the points, by discarding 5% of the points at the top and",
995 "Option [TT]-ee[tt] produces error estimates using block averaging.",
996 "A set is divided in a number of blocks and averages are calculated for",
997 "each block. The error for the total average is calculated from",
998 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
999 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1000 "These errors are plotted as a function of the block size.",
1001 "Also an analytical block average curve is plotted, assuming",
1002 "that the autocorrelation is a sum of two exponentials.",
1003 "The analytical curve for the block average is::",
1005 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1006 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1008 "where T is the total time.",
1009 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1010 "When the actual block average is very close to the analytical curve,",
1011 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1012 "The complete derivation is given in",
1013 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1015 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1016 "of each set and over all sets with respect to a filtered average.",
1017 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1018 "to len/2. len is supplied with the option [TT]-filter[tt].",
1019 "This filter reduces oscillations with period len/2 and len by a factor",
1020 "of 0.79 and 0.33 respectively.[PAR]",
1022 "Option [TT]-g[tt] fits the data to the function given with option",
1023 "[TT]-fitfn[tt].[PAR]",
1025 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1026 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1027 "zero or with a negative value are ignored.[PAR]"
1029 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1030 "on output from [gmx-hbond]. The input file can be taken directly",
1031 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1032 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1033 "curves that make sense in the context of molecular dynamics, mainly",
1034 "exponential curves. More information is in the manual. To check the output",
1035 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1036 "original data and the fitted function to a new data file. The fitting",
1037 "parameters are stored as comment in the output file."
1039 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1040 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1041 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1042 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1043 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nFitPoints = 100;
1044 static real temp = 298.15, fit_start = 1, fit_end = 60, diffusion = 5e-5, rcut = 0.35;
1046 /* must correspond to enum avbar* declared at beginning of file */
1047 static const char *avbar_opt[avbarNR+1] = {
1048 NULL, "none", "stddev", "error", "90", NULL
1052 { "-time", FALSE, etBOOL, {&bHaveT},
1053 "Expect a time in the input" },
1054 { "-b", FALSE, etREAL, {&tb},
1055 "First time to read from set" },
1056 { "-e", FALSE, etREAL, {&te},
1057 "Last time to read from set" },
1058 { "-n", FALSE, etINT, {&nsets_in},
1059 "Read this number of sets separated by &" },
1060 { "-d", FALSE, etBOOL, {&bDer},
1061 "Use the derivative" },
1062 { "-dp", FALSE, etINT, {&d},
1063 "HIDDENThe derivative is the difference over this number of points" },
1064 { "-bw", FALSE, etREAL, {&binwidth},
1065 "Binwidth for the distribution" },
1066 { "-errbar", FALSE, etENUM, {avbar_opt},
1067 "Error bars for [TT]-av[tt]" },
1068 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1069 "Integrate data function(s) numerically using trapezium rule" },
1070 { "-aver_start", FALSE, etREAL, {&aver_start},
1071 "Start averaging the integral from here" },
1072 { "-xydy", FALSE, etBOOL, {&bXYdy},
1073 "Interpret second data set as error in the y values for integrating" },
1074 { "-regression", FALSE, etBOOL, {&bRegression},
1075 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1076 { "-luzar", FALSE, etBOOL, {&bLuzar},
1077 "Do a Luzar and Chandler analysis on a correlation function and "
1078 "related as produced by [gmx-hbond]. When in addition the "
1079 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1080 "interpreted as errors in c(t) and n(t)." },
1081 { "-temp", FALSE, etREAL, {&temp},
1082 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1083 { "-fitstart", FALSE, etREAL, {&fit_start},
1084 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1085 { "-fitend", FALSE, etREAL, {&fit_end},
1086 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1087 { "-nbmin", FALSE, etINT, {&nb_min},
1088 "HIDDENMinimum number of blocks for block averaging" },
1089 { "-resol", FALSE, etINT, {&resol},
1090 "HIDDENResolution for the block averaging, block size increases with"
1091 " a factor 2^(1/resol)" },
1092 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1093 "HIDDENAlways use a single exponential fit for the error estimate" },
1094 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1095 "HIDDENAllow a negative long-time correlation" },
1096 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1097 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1098 { "-filter", FALSE, etREAL, {&filtlen},
1099 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1100 { "-power", FALSE, etBOOL, {&bPower},
1101 "Fit data to: b t^a" },
1102 { "-subav", FALSE, etBOOL, {&bSubAv},
1103 "Subtract the average before autocorrelating" },
1104 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1105 "Calculate one ACF over all sets" },
1107 #define NPA asize(pa)
1109 FILE *out, *out_fit;
1110 int n, nlast, s, nset, i, j = 0;
1111 real **val, *t, dt, tot, error;
1112 double *av, *sig, cum1, cum2, cum3, cum4, db;
1113 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1117 { efXVG, "-f", "graph", ffREAD },
1118 { efXVG, "-ac", "autocorr", ffOPTWR },
1119 { efXVG, "-msd", "msd", ffOPTWR },
1120 { efXVG, "-cc", "coscont", ffOPTWR },
1121 { efXVG, "-dist", "distr", ffOPTWR },
1122 { efXVG, "-av", "average", ffOPTWR },
1123 { efXVG, "-ee", "errest", ffOPTWR },
1124 { efXVG, "-fitted", "fitted", ffOPTWR },
1125 { efLOG, "-g", "fitlog", ffOPTWR }
1127 #define NFILE asize(fnm)
1133 ppa = add_acf_pargs(&npargs, pa);
1135 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1136 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1141 acfile = opt2fn_null("-ac", NFILE, fnm);
1142 msdfile = opt2fn_null("-msd", NFILE, fnm);
1143 ccfile = opt2fn_null("-cc", NFILE, fnm);
1144 distfile = opt2fn_null("-dist", NFILE, fnm);
1145 avfile = opt2fn_null("-av", NFILE, fnm);
1146 eefile = opt2fn_null("-ee", NFILE, fnm);
1147 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1149 fitfile = opt2fn("-g", NFILE, fnm);
1153 fitfile = opt2fn_null("-g", NFILE, fnm);
1156 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1157 opt2parg_bSet("-b", npargs, ppa), tb,
1158 opt2parg_bSet("-e", npargs, ppa), te,
1159 nsets_in, &nset, &n, &dt, &t);
1160 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1164 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1167 for (s = 0; s < nset; s++)
1169 for (i = 0; (i < n); i++)
1171 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1180 printf("Calculating the integral using the trapezium rule\n");
1184 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1185 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1189 for (s = 0; s < nset; s++)
1191 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1192 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1197 if (fitfile != NULL)
1199 print_fitted_function(fitfile,
1200 opt2fn_null("-fitted", NFILE, fnm),
1207 printf(" std. dev. relative deviation of\n");
1208 printf(" standard --------- cumulants from those of\n");
1209 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1210 printf(" cum. 3 cum. 4\n");
1213 for (s = 0; (s < nset); s++)
1219 for (i = 0; (i < n); i++)
1224 for (i = 0; (i < n); i++)
1226 db = val[s][i]-cum1;
1229 cum4 += db*db*db*db;
1235 sig[s] = sqrt(cum2);
1238 error = sqrt(cum2/(n-1));
1244 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1245 s+1, av[s], sig[s], error,
1246 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1247 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1253 filter(filtlen, n, nset, val, dt);
1258 out = xvgropen(msdfile, "Mean square displacement",
1259 "time", "MSD (nm\\S2\\N)", oenv);
1260 nlast = (int)(n*frac);
1261 for (s = 0; s < nset; s++)
1263 for (j = 0; j <= nlast; j++)
1267 fprintf(stderr, "\r%d", j);
1270 for (i = 0; i < n-j; i++)
1272 tot += sqr(val[s][i]-val[s][i+j]);
1275 fprintf(out, " %g %8g\n", dt*j, tot);
1279 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1283 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1287 plot_coscont(ccfile, n, nset, val, oenv);
1292 histogram(distfile, binwidth, n, nset, val, oenv);
1296 average(avfile, nenum(avbar_opt), n, nset, val, t);
1300 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1301 bEeFitAc, bEESEF, bEENLC, oenv);
1305 power_fit(n, nset, val, t);
1312 for (s = 0; s < nset; s++)
1314 for (i = 0; i < n; i++)
1320 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1321 eacNormal, bAverCorr);
1326 regression_analysis(n, bXYdy, t, nset, val);
1331 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1334 view_all(oenv, NFILE, fnm);