2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gmx_fatal.h"
56 #include "gmx_matrix.h"
57 #include "gmx_statistics.h"
62 /* must correspond to char *avbar_opt[] declared in main() */
64 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
67 static void power_fit(int n, int nset, real **val, real *t)
69 real *x, *y, quality, a, b, r;
77 for (i = 0; i < n; i++)
87 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
88 for (i = 0; i < n; i++)
94 for (s = 0; s < nset; s++)
97 for (i = 0; i < n && val[s][i] >= 0; i++)
99 y[i] = log(val[s][i]);
103 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
105 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
106 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
107 s+1, quality, a, exp(b));
114 static real cosine_content(int nhp, int n, real *y)
115 /* Assumes n equidistant points */
117 double fac, cosyint, yyint;
125 fac = M_PI*nhp/(n-1);
129 for (i = 0; i < n; i++)
131 cosyint += cos(fac*i)*y[i];
135 return 2*cosyint*cosyint/(n*yyint);
138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
139 const output_env_t oenv)
145 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
148 for (s = 0; s < nset; s++)
150 cc = cosine_content(s+1, n, val[s]);
151 fprintf(fp, " %d %g\n", s+1, cc);
152 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
155 fprintf(stdout, "\n");
160 static void regression_analysis(int n, gmx_bool bXYdy,
161 real *x, int nset, real **val)
163 real S, chi2, a, b, da, db, r = 0;
166 if (bXYdy || (nset == 1))
168 printf("Fitting data to a function f(x) = ax + b\n");
169 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
170 printf("Error estimates will be given if w_i (sigma) values are given\n");
171 printf("(use option -xydy).\n\n");
174 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
176 gmx_fatal(FARGS, "Error fitting the data: %s",
177 gmx_stats_message(ok));
182 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
184 gmx_fatal(FARGS, "Error fitting the data: %s",
185 gmx_stats_message(ok));
189 printf("Chi2 = %g\n", chi2);
190 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
191 printf("Correlation coefficient = %.1f%%\n", 100*r);
195 printf("a = %g +/- %g\n", a, da);
196 printf("b = %g +/- %g\n", b, db);
200 printf("a = %g\n", a);
201 printf("b = %g\n", b);
206 double chi2, *a, **xx, *y;
211 for (j = 0; (j < nset-1); j++)
215 for (i = 0; (i < n); i++)
218 for (j = 1; (j < nset); j++)
220 xx[j-1][i] = val[j][i];
224 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
225 printf("Fitting %d data points in %d sets\n", n, nset-1);
226 printf("chi2 = %g\n", chi2);
228 for (i = 0; (i < nset-1); i++)
240 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
241 const output_env_t oenv)
247 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
248 long long int *histo;
255 for (s = 0; s < nset; s++)
257 for (i = 0; i < n; i++)
263 else if (val[s][i] > max)
270 min = binwidth*floor(min/binwidth);
271 max = binwidth*ceil(max/binwidth);
278 nbin = (int)((max - min)/binwidth + 0.5) + 1;
279 fprintf(stderr, "Making distributions with %d bins\n", nbin);
281 fp = xvgropen(distfile, "Distribution", "", "", oenv);
282 for (s = 0; s < nset; s++)
284 for (i = 0; i < nbin; i++)
288 for (i = 0; i < n; i++)
290 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
292 for (i = 0; i < nbin; i++)
294 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
298 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
304 static int real_comp(const void *a, const void *b)
306 real dif = *(real *)a - *(real *)b;
322 static void average(const char *avfile, int avbar_opt,
323 int n, int nset, real **val, real *t)
330 fp = ffopen(avfile, "w");
331 if ((avbar_opt == avbarERROR) && (nset == 1))
333 avbar_opt = avbarNONE;
335 if (avbar_opt != avbarNONE)
337 if (avbar_opt == avbar90)
340 fprintf(fp, "@TYPE xydydy\n");
341 edge = (int)(nset*0.05+0.5);
342 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
343 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
347 fprintf(fp, "@TYPE xydy\n");
351 for (i = 0; i < n; i++)
354 for (s = 0; s < nset; s++)
359 fprintf(fp, " %g %g", t[i], av);
361 if (avbar_opt != avbarNONE)
363 if (avbar_opt == avbar90)
365 for (s = 0; s < nset; s++)
369 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
370 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
374 for (s = 0; s < nset; s++)
376 var += sqr(val[s][i]-av);
378 if (avbar_opt == avbarSTDDEV)
380 err = sqrt(var/nset);
384 err = sqrt(var/(nset*(nset-1)));
386 fprintf(fp, " %g", err);
393 if (avbar_opt == avbar90)
399 static real anal_ee_inf(real *parm, real T)
401 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
404 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
405 int nset, double *av, double *sig, real **val, real dt,
406 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
407 const output_env_t oenv)
410 int bs, prev_bs, nbs, nb;
415 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
417 real ee, a, tau1, tau2;
421 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
426 fp = xvgropen(eefile, "Error estimates",
427 "Block size (time)", "Error estimate", oenv);
428 if (output_env_get_print_xvgr_codes(oenv))
431 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
435 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
438 spacing = pow(2, 1.0/resol);
442 for (s = 0; s < nset; s++)
454 for (i = 0; i < nb; i++)
457 for (j = 0; j < bs; j++)
459 blav += val[s][bs*i+j];
461 var += sqr(av[s] - blav/bs);
470 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
487 for (i = 0; i < nbs/2; i++)
490 tbs[i] = tbs[nbs-1-i];
493 ybs[i] = ybs[nbs-1-i];
496 /* The initial slope of the normalized ybs^2 is 1.
497 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
498 * From this we take our initial guess for tau1.
507 while (i < nbs - 1 &&
508 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
512 fprintf(stdout, "Data set %d has strange time correlations:\n"
513 "the std. error using single points is larger than that of blocks of 2 points\n"
514 "The error estimate might be inaccurate, check the fit\n",
516 /* Use the total time as tau for the fitting weights */
517 tau_sig = (n - 1)*dt;
526 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
529 /* Generate more or less appropriate sigma's,
530 * also taking the density of points into account.
532 for (i = 0; i < nbs; i++)
536 dens = tbs[1]/tbs[0] - 1;
540 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
544 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
546 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
551 fitparm[0] = tau1_est;
553 /* We set the initial guess for tau2
554 * to halfway between tau1_est and the total time (on log scale).
556 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
557 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
558 bDebugMode(), effnERREST, fitparm, 0);
559 fitparm[3] = 1-fitparm[1];
561 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
562 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
566 if (fitparm[2] > (n-1)*dt)
569 "Warning: tau2 is longer than the length of the data (%g)\n"
570 " the statistics might be bad\n",
575 fprintf(stdout, "a fitted parameter is negative\n");
577 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
578 sig[s]*anal_ee_inf(fitparm, n*dt),
579 fitparm[1], fitparm[0], fitparm[2]);
580 /* Do a fit with tau2 fixed at the total time.
581 * One could also choose any other large value for tau2.
583 fitparm[0] = tau1_est;
585 fitparm[2] = (n-1)*dt;
586 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
587 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
588 effnERREST, fitparm, 4);
589 fitparm[3] = 1-fitparm[1];
591 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
592 || (fitparm[1] > 1 && !bAllowNegLTCorr))
596 fprintf(stdout, "a fitted parameter is negative\n");
597 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
598 sig[s]*anal_ee_inf(fitparm, n*dt),
599 fitparm[1], fitparm[0], fitparm[2]);
601 /* Do a single exponential fit */
602 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
603 fitparm[0] = tau1_est;
606 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
607 effnERREST, fitparm, 6);
608 fitparm[3] = 1-fitparm[1];
611 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
616 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
617 s+1, ee, a, tau1, tau2);
618 if (output_env_get_xvg_format(oenv) == exvgXMGR)
620 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
621 fprintf(fp, "@ legend string %d \"ee %6g\"\n",2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
623 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
625 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
626 fprintf(fp, "@ s%d legend \"ee %6g\"\n",2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
628 for (i = 0; i < nbs; i++)
630 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
631 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
637 real *ac, acint, ac_fit[4];
640 for (i = 0; i < n; i++)
642 ac[i] = val[s][i] - av[s];
652 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
653 dt, eacNormal, 1, FALSE, TRUE,
654 FALSE, 0, 0, effnNONE, 0);
658 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
660 for (i = 1; i <= fitlen/2; i++)
666 /* Generate more or less appropriate sigma's */
667 for (i = 0; i <= fitlen; i++)
669 fitsig[i] = sqrt(acint + dt*i);
672 ac_fit[0] = 0.5*acint;
674 ac_fit[2] = 10*acint;
675 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
676 bDebugMode(), effnEXP3, ac_fit, 0);
677 ac_fit[3] = 1 - ac_fit[1];
679 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
680 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
681 ac_fit[1], ac_fit[0], ac_fit[2]);
683 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
684 for (i = 0; i < nbs; i++)
686 fprintf(fp, "%g %g\n", tbs[i],
687 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
694 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
703 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
704 gmx_bool bError, real fit_start, real smooth_tail_start,
705 const output_env_t oenv)
707 const real tol = 1e-8;
712 please_cite(stdout, "Spoel2006b");
714 /* Compute negative derivative k(t) = -dc(t)/dt */
718 compute_derivative(nn, time, val[0], kt);
719 for (j = 0; (j < nn); j++)
726 for (j = 0; (j < nn); j++)
728 d2 += sqr(kt[j] - val[3][j]);
730 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
732 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
733 temp, smooth_tail_start, oenv);
738 analyse_corr(nn, time, val[0], val[2], val[4],
739 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
743 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
744 printf("Not doing anything. Sorry.\n");
748 static void filter(real flen, int n, int nset, real **val, real dt,
749 const output_env_t oenv)
752 double *filt, sum, vf, fluc, fluctot;
754 f = (int)(flen/(2*dt));
758 for (i = 1; i <= f; i++)
760 filt[i] = cos(M_PI*dt*i/flen);
763 for (i = 0; i <= f; i++)
767 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
768 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
770 for (s = 0; s < nset; s++)
773 for (i = f; i < n-f; i++)
775 vf = filt[0]*val[s][i];
776 for (j = 1; j <= f; j++)
778 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
780 fluc += sqr(val[s][i] - vf);
784 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
786 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
787 fprintf(stdout, "\n");
792 static void do_fit(FILE *out, int n, gmx_bool bYdy,
793 int ny, real *x0, real **val,
794 int npargs, t_pargs *ppa, const output_env_t oenv)
796 real *c1 = NULL, *sig = NULL, *fitparm;
797 real tendfit, tbeginfit;
798 int i, efitfn, nparm;
800 efitfn = get_acffitfn();
801 nparm = nfp_ffn[efitfn];
802 fprintf(out, "Will fit to the following function:\n");
803 fprintf(out, "%s\n", longs_ffn[efitfn]);
809 fprintf(out, "Using two columns as y and sigma values\n");
815 if (opt2parg_bSet("-beginfit", npargs, ppa))
817 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
823 if (opt2parg_bSet("-endfit", npargs, ppa))
825 tendfit = opt2parg_real("-endfit", npargs, ppa);
832 snew(fitparm, nparm);
844 fitparm[1] = 0.5*c1[0];
848 fitparm[0] = fitparm[2] = 0.5*c1[0];
854 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
861 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
869 fprintf(out, "Warning: don't know how to initialize the parameters\n");
870 for (i = 0; (i < nparm); i++)
875 fprintf(out, "Starting parameters:\n");
876 for (i = 0; (i < nparm); i++)
878 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
880 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
881 oenv, bDebugMode(), efitfn, fitparm, 0))
883 for (i = 0; (i < nparm); i++)
885 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
890 fprintf(out, "No solution was found\n");
894 static void do_ballistic(const char *balFile, int nData,
895 real *t, real **val, int nSet,
896 real balTime, int nBalExp,
897 gmx_bool bDerivative,
898 const output_env_t oenv)
900 double **ctd = NULL, *td = NULL;
901 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
902 static char *leg[] = {"Ac'(t)"};
906 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
911 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
912 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
914 for (set = 0; set < nSet; set++)
916 snew(ctd[set], nData);
917 for (i = 0; i < nData; i++)
919 ctd[set][i] = (double)val[set][i];
922 td[i] = (double)t[i];
926 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
929 for (i = 0; i < nData; i++)
931 fprintf(fp, " %g", t[i]);
932 for (set = 0; set < nSet; set++)
934 fprintf(fp, " %g", ctd[set][i]);
940 for (set = 0; set < nSet; set++)
949 printf("Number of data points is less than the number of parameters to fit\n."
950 "The system is underdetermined, hence no ballistic term can be found.\n\n");
954 static void do_geminate(const char *gemFile, int nData,
955 real *t, real **val, int nSet,
956 const real D, const real rcut, const real balTime,
957 const int nFitPoints, const real begFit, const real endFit,
958 const output_env_t oenv)
960 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
961 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
962 begFit, endFit, balTime, 1, FALSE);
963 const char *leg[] = {"Ac\\sgem\\N(t)"};
971 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
972 xvgr_legend(fp, asize(leg), leg, oenv);
974 for (set = 0; set < nSet; set++)
976 snew(ctd[set], nData);
977 snew(ctdGem[set], nData);
978 for (i = 0; i < nData; i++)
980 ctd[set][i] = (double)val[set][i];
983 td[i] = (double)t[i];
986 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
989 for (i = 0; i < nData; i++)
991 fprintf(fp, " %g", t[i]);
992 for (set = 0; set < nSet; set++)
994 fprintf(fp, " %g", ctdGem[set][i]);
999 for (set = 0; set < nSet; set++)
1009 int gmx_analyze(int argc, char *argv[])
1011 static const char *desc[] = {
1012 "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
1013 "A line in the input file may start with a time",
1014 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1015 "Multiple sets can also be",
1016 "read when they are separated by & (option [TT]-n[tt]);",
1017 "in this case only one [IT]y[it]-value is read from each line.",
1018 "All lines starting with # and @ are skipped.",
1019 "All analyses can also be done for the derivative of a set",
1020 "(option [TT]-d[tt]).[PAR]",
1022 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1023 "points are equidistant in time.[PAR]",
1025 "[TT]g_analyze[tt] always shows the average and standard deviation of each",
1026 "set, as well as the relative deviation of the third",
1027 "and fourth cumulant from those of a Gaussian distribution with the same",
1028 "standard deviation.[PAR]",
1030 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1031 "Be sure that the time interval between data points is",
1032 "much shorter than the time scale of the autocorrelation.[PAR]",
1034 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1035 "i/2 periods. The formula is:[BR]"
1036 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1037 "This is useful for principal components obtained from covariance",
1038 "analysis, since the principal components of random diffusion are",
1039 "pure cosines.[PAR]",
1041 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1043 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1045 "Option [TT]-av[tt] produces the average over the sets.",
1046 "Error bars can be added with the option [TT]-errbar[tt].",
1047 "The errorbars can represent the standard deviation, the error",
1048 "(assuming the points are independent) or the interval containing",
1049 "90% of the points, by discarding 5% of the points at the top and",
1052 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1053 "A set is divided in a number of blocks and averages are calculated for",
1054 "each block. The error for the total average is calculated from",
1055 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1056 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1057 "These errors are plotted as a function of the block size.",
1058 "Also an analytical block average curve is plotted, assuming",
1059 "that the autocorrelation is a sum of two exponentials.",
1060 "The analytical curve for the block average is:[BR]",
1061 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1062 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1063 "where T is the total time.",
1064 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1065 "When the actual block average is very close to the analytical curve,",
1066 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1067 "The complete derivation is given in",
1068 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1070 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1071 "component from a hydrogen bond autocorrelation function by the fitting",
1072 "of a sum of exponentials, as described in e.g.",
1073 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1074 "is the one with the most negative coefficient in the exponential,",
1075 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1076 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1078 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1079 "(and optionally kD) to the hydrogen bond autocorrelation function",
1080 "according to the reversible geminate recombination model. Removal of",
1081 "the ballistic component first is strongly advised. The model is presented in",
1082 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1084 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1085 "of each set and over all sets with respect to a filtered average.",
1086 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1087 "to len/2. len is supplied with the option [TT]-filter[tt].",
1088 "This filter reduces oscillations with period len/2 and len by a factor",
1089 "of 0.79 and 0.33 respectively.[PAR]",
1091 "Option [TT]-g[tt] fits the data to the function given with option",
1092 "[TT]-fitfn[tt].[PAR]",
1094 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1095 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1096 "zero or with a negative value are ignored.[PAR]"
1098 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1099 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
1100 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
1102 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1103 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1104 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1105 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1106 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1107 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1109 /* must correspond to enum avbar* declared at beginning of file */
1110 static const char *avbar_opt[avbarNR+1] = {
1111 NULL, "none", "stddev", "error", "90", NULL
1115 { "-time", FALSE, etBOOL, {&bHaveT},
1116 "Expect a time in the input" },
1117 { "-b", FALSE, etREAL, {&tb},
1118 "First time to read from set" },
1119 { "-e", FALSE, etREAL, {&te},
1120 "Last time to read from set" },
1121 { "-n", FALSE, etINT, {&nsets_in},
1122 "Read this number of sets separated by &" },
1123 { "-d", FALSE, etBOOL, {&bDer},
1124 "Use the derivative" },
1125 { "-dp", FALSE, etINT, {&d},
1126 "HIDDENThe derivative is the difference over this number of points" },
1127 { "-bw", FALSE, etREAL, {&binwidth},
1128 "Binwidth for the distribution" },
1129 { "-errbar", FALSE, etENUM, {avbar_opt},
1130 "Error bars for [TT]-av[tt]" },
1131 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1132 "Integrate data function(s) numerically using trapezium rule" },
1133 { "-aver_start", FALSE, etREAL, {&aver_start},
1134 "Start averaging the integral from here" },
1135 { "-xydy", FALSE, etBOOL, {&bXYdy},
1136 "Interpret second data set as error in the y values for integrating" },
1137 { "-regression", FALSE, etBOOL, {&bRegression},
1138 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1139 { "-luzar", FALSE, etBOOL, {&bLuzar},
1140 "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1141 { "-temp", FALSE, etREAL, {&temp},
1142 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1143 { "-fitstart", FALSE, etREAL, {&fit_start},
1144 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1145 { "-fitend", FALSE, etREAL, {&fit_end},
1146 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1147 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1148 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1149 { "-nbmin", FALSE, etINT, {&nb_min},
1150 "HIDDENMinimum number of blocks for block averaging" },
1151 { "-resol", FALSE, etINT, {&resol},
1152 "HIDDENResolution for the block averaging, block size increases with"
1153 " a factor 2^(1/resol)" },
1154 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1155 "HIDDENAlways use a single exponential fit for the error estimate" },
1156 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1157 "HIDDENAllow a negative long-time correlation" },
1158 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1159 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1160 { "-filter", FALSE, etREAL, {&filtlen},
1161 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1162 { "-power", FALSE, etBOOL, {&bPower},
1163 "Fit data to: b t^a" },
1164 { "-subav", FALSE, etBOOL, {&bSubAv},
1165 "Subtract the average before autocorrelating" },
1166 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1167 "Calculate one ACF over all sets" },
1168 { "-nbalexp", FALSE, etINT, {&nBalExp},
1169 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1170 { "-baltime", FALSE, etREAL, {&balTime},
1171 "HIDDENTime up to which the ballistic component will be fitted" },
1172 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1173 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1174 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1175 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1176 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1177 /* "What type of gminate recombination to use"}, */
1178 /* { "-D", FALSE, etREAL, {&diffusion}, */
1179 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1181 #define NPA asize(pa)
1183 FILE *out, *out_fit;
1184 int n, nlast, s, nset, i, j = 0;
1185 real **val, *t, dt, tot, error;
1186 double *av, *sig, cum1, cum2, cum3, cum4, db;
1187 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1191 { efXVG, "-f", "graph", ffREAD },
1192 { efXVG, "-ac", "autocorr", ffOPTWR },
1193 { efXVG, "-msd", "msd", ffOPTWR },
1194 { efXVG, "-cc", "coscont", ffOPTWR },
1195 { efXVG, "-dist", "distr", ffOPTWR },
1196 { efXVG, "-av", "average", ffOPTWR },
1197 { efXVG, "-ee", "errest", ffOPTWR },
1198 { efXVG, "-bal", "ballisitc", ffOPTWR },
1199 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1200 { efLOG, "-g", "fitlog", ffOPTWR }
1202 #define NFILE asize(fnm)
1208 ppa = add_acf_pargs(&npargs, pa);
1210 CopyRight(stderr, argv[0]);
1211 parse_common_args(&argc, argv, PCA_CAN_VIEW,
1212 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
1214 acfile = opt2fn_null("-ac", NFILE, fnm);
1215 msdfile = opt2fn_null("-msd", NFILE, fnm);
1216 ccfile = opt2fn_null("-cc", NFILE, fnm);
1217 distfile = opt2fn_null("-dist", NFILE, fnm);
1218 avfile = opt2fn_null("-av", NFILE, fnm);
1219 eefile = opt2fn_null("-ee", NFILE, fnm);
1220 balfile = opt2fn_null("-bal", NFILE, fnm);
1221 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1222 /* When doing autocorrelation we don't want a fitlog for fitting
1223 * the function itself (not the acf) when the user did not ask for it.
1225 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1227 fitfile = opt2fn("-g", NFILE, fnm);
1231 fitfile = opt2fn_null("-g", NFILE, fnm);
1234 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1235 opt2parg_bSet("-b", npargs, ppa), tb,
1236 opt2parg_bSet("-e", npargs, ppa), te,
1237 nsets_in, &nset, &n, &dt, &t);
1238 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1242 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1245 for (s = 0; s < nset; s++)
1247 for (i = 0; (i < n); i++)
1249 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1258 printf("Calculating the integral using the trapezium rule\n");
1262 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1263 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1267 for (s = 0; s < nset; s++)
1269 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1270 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1275 if (fitfile != NULL)
1277 out_fit = ffopen(fitfile, "w");
1278 if (bXYdy && nset >= 2)
1280 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1284 for (s = 0; s < nset; s++)
1286 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1292 printf(" std. dev. relative deviation of\n");
1293 printf(" standard --------- cumulants from those of\n");
1294 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1295 printf(" cum. 3 cum. 4\n");
1298 for (s = 0; (s < nset); s++)
1304 for (i = 0; (i < n); i++)
1309 for (i = 0; (i < n); i++)
1311 db = val[s][i]-cum1;
1314 cum4 += db*db*db*db;
1320 sig[s] = sqrt(cum2);
1323 error = sqrt(cum2/(n-1));
1329 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1330 s+1, av[s], sig[s], error,
1331 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1332 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1338 filter(filtlen, n, nset, val, dt, oenv);
1343 out = xvgropen(msdfile, "Mean square displacement",
1344 "time", "MSD (nm\\S2\\N)", oenv);
1345 nlast = (int)(n*frac);
1346 for (s = 0; s < nset; s++)
1348 for (j = 0; j <= nlast; j++)
1352 fprintf(stderr, "\r%d", j);
1355 for (i = 0; i < n-j; i++)
1357 tot += sqr(val[s][i]-val[s][i+j]);
1360 fprintf(out, " %g %8g\n", dt*j, tot);
1364 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1368 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1372 plot_coscont(ccfile, n, nset, val, oenv);
1377 histogram(distfile, binwidth, n, nset, val, oenv);
1381 average(avfile, nenum(avbar_opt), n, nset, val, t);
1385 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1386 bEeFitAc, bEESEF, bEENLC, oenv);
1390 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, bDer, oenv);
1393 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1394 /* nFitPoints, fit_start, fit_end, oenv); */
1397 power_fit(n, nset, val, t);
1404 for (s = 0; s < nset; s++)
1406 for (i = 0; i < n; i++)
1412 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1413 eacNormal, bAverCorr);
1418 regression_analysis(n, bXYdy, t, nset, val);
1423 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1426 view_all(oenv, NFILE, fnm);