2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/utility/smalloc.h"
47 #include "gromacs/utility/fatalerror.h"
48 #include "gromacs/math/vec.h"
50 #include "gromacs/utility/futil.h"
54 #include "gromacs/statistics/statistics.h"
55 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/linearalgebra/matrix.h"
62 /* must correspond to char *avbar_opt[] declared in main() */
64 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
67 static void power_fit(int n, int nset, real **val, real *t)
69 real *x, *y, quality, a, b, r;
77 for (i = 0; i < n; i++)
87 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
88 for (i = 0; i < n; i++)
94 for (s = 0; s < nset; s++)
97 for (i = 0; i < n && val[s][i] >= 0; i++)
99 y[i] = log(val[s][i]);
103 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
105 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
106 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
107 s+1, quality, a, exp(b));
114 static real cosine_content(int nhp, int n, real *y)
115 /* Assumes n equidistant points */
117 double fac, cosyint, yyint;
125 fac = M_PI*nhp/(n-1);
129 for (i = 0; i < n; i++)
131 cosyint += cos(fac*i)*y[i];
135 return 2*cosyint*cosyint/(n*yyint);
138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
139 const output_env_t oenv)
145 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
148 for (s = 0; s < nset; s++)
150 cc = cosine_content(s+1, n, val[s]);
151 fprintf(fp, " %d %g\n", s+1, cc);
152 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
155 fprintf(stdout, "\n");
160 static void regression_analysis(int n, gmx_bool bXYdy,
161 real *x, int nset, real **val)
163 real S, chi2, a, b, da, db, r = 0;
166 if (bXYdy || (nset == 1))
168 printf("Fitting data to a function f(x) = ax + b\n");
169 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
170 printf("Error estimates will be given if w_i (sigma) values are given\n");
171 printf("(use option -xydy).\n\n");
174 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
176 gmx_fatal(FARGS, "Error fitting the data: %s",
177 gmx_stats_message(ok));
182 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
184 gmx_fatal(FARGS, "Error fitting the data: %s",
185 gmx_stats_message(ok));
189 printf("Chi2 = %g\n", chi2);
190 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
191 printf("Correlation coefficient = %.1f%%\n", 100*r);
195 printf("a = %g +/- %g\n", a, da);
196 printf("b = %g +/- %g\n", b, db);
200 printf("a = %g\n", a);
201 printf("b = %g\n", b);
206 double chi2, *a, **xx, *y;
211 for (j = 0; (j < nset-1); j++)
215 for (i = 0; (i < n); i++)
218 for (j = 1; (j < nset); j++)
220 xx[j-1][i] = val[j][i];
224 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
225 printf("Fitting %d data points in %d sets\n", n, nset-1);
226 printf("chi2 = %g\n", chi2);
228 for (i = 0; (i < nset-1); i++)
240 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
241 const output_env_t oenv)
251 for (s = 0; s < nset; s++)
253 for (i = 0; i < n; i++)
259 else if (val[s][i] > max)
266 min = binwidth*floor(min/binwidth);
267 max = binwidth*ceil(max/binwidth);
274 nbin = (int)((max - min)/binwidth + 0.5) + 1;
275 fprintf(stderr, "Making distributions with %d bins\n", nbin);
277 fp = xvgropen(distfile, "Distribution", "", "", oenv);
278 for (s = 0; s < nset; s++)
280 for (i = 0; i < nbin; i++)
284 for (i = 0; i < n; i++)
286 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
288 for (i = 0; i < nbin; i++)
290 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
294 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
300 static int real_comp(const void *a, const void *b)
302 real dif = *(real *)a - *(real *)b;
318 static void average(const char *avfile, int avbar_opt,
319 int n, int nset, real **val, real *t)
326 fp = gmx_ffopen(avfile, "w");
327 if ((avbar_opt == avbarERROR) && (nset == 1))
329 avbar_opt = avbarNONE;
331 if (avbar_opt != avbarNONE)
333 if (avbar_opt == avbar90)
336 fprintf(fp, "@TYPE xydydy\n");
337 edge = (int)(nset*0.05+0.5);
338 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
339 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
343 fprintf(fp, "@TYPE xydy\n");
347 for (i = 0; i < n; i++)
350 for (s = 0; s < nset; s++)
355 fprintf(fp, " %g %g", t[i], av);
357 if (avbar_opt != avbarNONE)
359 if (avbar_opt == avbar90)
361 for (s = 0; s < nset; s++)
365 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
366 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
370 for (s = 0; s < nset; s++)
372 var += sqr(val[s][i]-av);
374 if (avbar_opt == avbarSTDDEV)
376 err = sqrt(var/nset);
380 err = sqrt(var/(nset*(nset-1)));
382 fprintf(fp, " %g", err);
389 if (avbar_opt == avbar90)
395 static real anal_ee_inf(real *parm, real T)
397 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
400 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
401 int nset, double *av, double *sig, real **val, real dt,
402 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
403 const output_env_t oenv)
406 int bs, prev_bs, nbs, nb;
411 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
413 real ee, a, tau1, tau2;
417 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
422 fp = xvgropen(eefile, "Error estimates",
423 "Block size (time)", "Error estimate", oenv);
424 if (output_env_get_print_xvgr_codes(oenv))
427 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
431 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
434 spacing = pow(2, 1.0/resol);
438 for (s = 0; s < nset; s++)
450 for (i = 0; i < nb; i++)
453 for (j = 0; j < bs; j++)
455 blav += val[s][bs*i+j];
457 var += sqr(av[s] - blav/bs);
466 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
483 for (i = 0; i < nbs/2; i++)
486 tbs[i] = tbs[nbs-1-i];
489 ybs[i] = ybs[nbs-1-i];
492 /* The initial slope of the normalized ybs^2 is 1.
493 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
494 * From this we take our initial guess for tau1.
503 while (i < nbs - 1 &&
504 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
508 fprintf(stdout, "Data set %d has strange time correlations:\n"
509 "the std. error using single points is larger than that of blocks of 2 points\n"
510 "The error estimate might be inaccurate, check the fit\n",
512 /* Use the total time as tau for the fitting weights */
513 tau_sig = (n - 1)*dt;
522 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
525 /* Generate more or less appropriate sigma's,
526 * also taking the density of points into account.
528 for (i = 0; i < nbs; i++)
532 dens = tbs[1]/tbs[0] - 1;
536 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
540 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
542 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
547 fitparm[0] = tau1_est;
549 /* We set the initial guess for tau2
550 * to halfway between tau1_est and the total time (on log scale).
552 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
553 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
554 bDebugMode(), effnERREST, fitparm, 0);
555 fitparm[3] = 1-fitparm[1];
557 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
558 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
562 if (fitparm[2] > (n-1)*dt)
565 "Warning: tau2 is longer than the length of the data (%g)\n"
566 " the statistics might be bad\n",
571 fprintf(stdout, "a fitted parameter is negative\n");
573 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
574 sig[s]*anal_ee_inf(fitparm, n*dt),
575 fitparm[1], fitparm[0], fitparm[2]);
576 /* Do a fit with tau2 fixed at the total time.
577 * One could also choose any other large value for tau2.
579 fitparm[0] = tau1_est;
581 fitparm[2] = (n-1)*dt;
582 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
583 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
584 effnERREST, fitparm, 4);
585 fitparm[3] = 1-fitparm[1];
587 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
588 || (fitparm[1] > 1 && !bAllowNegLTCorr))
592 fprintf(stdout, "a fitted parameter is negative\n");
593 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
594 sig[s]*anal_ee_inf(fitparm, n*dt),
595 fitparm[1], fitparm[0], fitparm[2]);
597 /* Do a single exponential fit */
598 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
599 fitparm[0] = tau1_est;
602 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
603 effnERREST, fitparm, 6);
604 fitparm[3] = 1-fitparm[1];
607 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
612 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
613 s+1, ee, a, tau1, tau2);
614 if (output_env_get_xvg_format(oenv) == exvgXMGR)
616 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
617 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
619 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
621 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
622 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
624 for (i = 0; i < nbs; i++)
626 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
627 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
633 real *ac, acint, ac_fit[4];
636 for (i = 0; i < n; i++)
638 ac[i] = val[s][i] - av[s];
648 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
649 dt, eacNormal, 1, FALSE, TRUE,
650 FALSE, 0, 0, effnNONE);
654 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
656 for (i = 1; i <= fitlen/2; i++)
662 /* Generate more or less appropriate sigma's */
663 for (i = 0; i <= fitlen; i++)
665 fitsig[i] = sqrt(acint + dt*i);
668 ac_fit[0] = 0.5*acint;
670 ac_fit[2] = 10*acint;
671 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
672 bDebugMode(), effnEXP3, ac_fit, 0);
673 ac_fit[3] = 1 - ac_fit[1];
675 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
676 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
677 ac_fit[1], ac_fit[0], ac_fit[2]);
679 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
680 for (i = 0; i < nbs; i++)
682 fprintf(fp, "%g %g\n", tbs[i],
683 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
690 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
699 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
700 gmx_bool bError, real fit_start, real smooth_tail_start,
701 const output_env_t oenv)
703 const real tol = 1e-8;
708 please_cite(stdout, "Spoel2006b");
710 /* Compute negative derivative k(t) = -dc(t)/dt */
714 compute_derivative(nn, time, val[0], kt);
715 for (j = 0; (j < nn); j++)
722 for (j = 0; (j < nn); j++)
724 d2 += sqr(kt[j] - val[3][j]);
726 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
728 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
729 temp, smooth_tail_start, oenv);
734 analyse_corr(nn, time, val[0], val[2], val[4],
735 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
739 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
740 printf("Not doing anything. Sorry.\n");
744 static void filter(real flen, int n, int nset, real **val, real dt)
747 double *filt, sum, vf, fluc, fluctot;
749 f = (int)(flen/(2*dt));
753 for (i = 1; i <= f; i++)
755 filt[i] = cos(M_PI*dt*i/flen);
758 for (i = 0; i <= f; i++)
762 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
763 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
765 for (s = 0; s < nset; s++)
768 for (i = f; i < n-f; i++)
770 vf = filt[0]*val[s][i];
771 for (j = 1; j <= f; j++)
773 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
775 fluc += sqr(val[s][i] - vf);
779 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
781 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
782 fprintf(stdout, "\n");
787 static void do_fit(FILE *out, int n, gmx_bool bYdy,
788 int ny, real *x0, real **val,
789 int npargs, t_pargs *ppa, const output_env_t oenv)
791 real *c1 = NULL, *sig = NULL, *fitparm;
792 real tendfit, tbeginfit;
793 int i, efitfn, nparm;
795 efitfn = get_acffitfn();
796 nparm = nfp_ffn[efitfn];
797 fprintf(out, "Will fit to the following function:\n");
798 fprintf(out, "%s\n", longs_ffn[efitfn]);
804 fprintf(out, "Using two columns as y and sigma values\n");
810 if (opt2parg_bSet("-beginfit", npargs, ppa))
812 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
818 if (opt2parg_bSet("-endfit", npargs, ppa))
820 tendfit = opt2parg_real("-endfit", npargs, ppa);
827 snew(fitparm, nparm);
839 fitparm[1] = 0.5*c1[0];
843 fitparm[0] = fitparm[2] = 0.5*c1[0];
849 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
856 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
864 fprintf(out, "Warning: don't know how to initialize the parameters\n");
865 for (i = 0; (i < nparm); i++)
870 fprintf(out, "Starting parameters:\n");
871 for (i = 0; (i < nparm); i++)
873 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
875 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
876 oenv, bDebugMode(), efitfn, fitparm, 0))
878 for (i = 0; (i < nparm); i++)
880 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
885 fprintf(out, "No solution was found\n");
889 static void do_ballistic(const char *balFile, int nData,
890 real *t, real **val, int nSet,
891 real balTime, int nBalExp,
892 const output_env_t oenv)
894 double **ctd = NULL, *td = NULL;
895 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
896 static char *leg[] = {"Ac'(t)"};
900 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
905 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
906 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
908 for (set = 0; set < nSet; set++)
910 snew(ctd[set], nData);
911 for (i = 0; i < nData; i++)
913 ctd[set][i] = (double)val[set][i];
916 td[i] = (double)t[i];
920 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
923 for (i = 0; i < nData; i++)
925 fprintf(fp, " %g", t[i]);
926 for (set = 0; set < nSet; set++)
928 fprintf(fp, " %g", ctd[set][i]);
934 for (set = 0; set < nSet; set++)
943 printf("Number of data points is less than the number of parameters to fit\n."
944 "The system is underdetermined, hence no ballistic term can be found.\n\n");
948 static void do_geminate(const char *gemFile, int nData,
949 real *t, real **val, int nSet,
950 const real D, const real rcut, const real balTime,
951 const int nFitPoints, const real begFit, const real endFit,
952 const output_env_t oenv)
954 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
955 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
956 begFit, endFit, balTime, 1);
957 const char *leg[] = {"Ac\\sgem\\N(t)"};
965 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
966 xvgr_legend(fp, asize(leg), leg, oenv);
968 for (set = 0; set < nSet; set++)
970 snew(ctd[set], nData);
971 snew(ctdGem[set], nData);
972 for (i = 0; i < nData; i++)
974 ctd[set][i] = (double)val[set][i];
977 td[i] = (double)t[i];
980 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
983 for (i = 0; i < nData; i++)
985 fprintf(fp, " %g", t[i]);
986 for (set = 0; set < nSet; set++)
988 fprintf(fp, " %g", ctdGem[set][i]);
993 for (set = 0; set < nSet; set++)
1003 int gmx_analyze(int argc, char *argv[])
1005 static const char *desc[] = {
1006 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1007 "A line in the input file may start with a time",
1008 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1009 "Multiple sets can also be",
1010 "read when they are separated by & (option [TT]-n[tt]);",
1011 "in this case only one [IT]y[it]-value is read from each line.",
1012 "All lines starting with # and @ are skipped.",
1013 "All analyses can also be done for the derivative of a set",
1014 "(option [TT]-d[tt]).[PAR]",
1016 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1017 "points are equidistant in time.[PAR]",
1019 "[THISMODULE] always shows the average and standard deviation of each",
1020 "set, as well as the relative deviation of the third",
1021 "and fourth cumulant from those of a Gaussian distribution with the same",
1022 "standard deviation.[PAR]",
1024 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1025 "Be sure that the time interval between data points is",
1026 "much shorter than the time scale of the autocorrelation.[PAR]",
1028 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1029 "i/2 periods. The formula is:[BR]"
1030 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1031 "This is useful for principal components obtained from covariance",
1032 "analysis, since the principal components of random diffusion are",
1033 "pure cosines.[PAR]",
1035 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1037 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1039 "Option [TT]-av[tt] produces the average over the sets.",
1040 "Error bars can be added with the option [TT]-errbar[tt].",
1041 "The errorbars can represent the standard deviation, the error",
1042 "(assuming the points are independent) or the interval containing",
1043 "90% of the points, by discarding 5% of the points at the top and",
1046 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1047 "A set is divided in a number of blocks and averages are calculated for",
1048 "each block. The error for the total average is calculated from",
1049 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1050 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1051 "These errors are plotted as a function of the block size.",
1052 "Also an analytical block average curve is plotted, assuming",
1053 "that the autocorrelation is a sum of two exponentials.",
1054 "The analytical curve for the block average is:[BR]",
1055 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1056 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1057 "where T is the total time.",
1058 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1059 "When the actual block average is very close to the analytical curve,",
1060 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1061 "The complete derivation is given in",
1062 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1064 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1065 "component from a hydrogen bond autocorrelation function by the fitting",
1066 "of a sum of exponentials, as described in e.g.",
1067 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1068 "is the one with the most negative coefficient in the exponential,",
1069 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1070 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1072 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1073 "(and optionally kD) to the hydrogen bond autocorrelation function",
1074 "according to the reversible geminate recombination model. Removal of",
1075 "the ballistic component first is strongly advised. The model is presented in",
1076 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1078 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1079 "of each set and over all sets with respect to a filtered average.",
1080 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1081 "to len/2. len is supplied with the option [TT]-filter[tt].",
1082 "This filter reduces oscillations with period len/2 and len by a factor",
1083 "of 0.79 and 0.33 respectively.[PAR]",
1085 "Option [TT]-g[tt] fits the data to the function given with option",
1086 "[TT]-fitfn[tt].[PAR]",
1088 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1089 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1090 "zero or with a negative value are ignored.[PAR]"
1092 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1093 "on output from [gmx-hbond]. The input file can be taken directly",
1094 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1096 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1097 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1098 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1099 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1100 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1101 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1103 /* must correspond to enum avbar* declared at beginning of file */
1104 static const char *avbar_opt[avbarNR+1] = {
1105 NULL, "none", "stddev", "error", "90", NULL
1109 { "-time", FALSE, etBOOL, {&bHaveT},
1110 "Expect a time in the input" },
1111 { "-b", FALSE, etREAL, {&tb},
1112 "First time to read from set" },
1113 { "-e", FALSE, etREAL, {&te},
1114 "Last time to read from set" },
1115 { "-n", FALSE, etINT, {&nsets_in},
1116 "Read this number of sets separated by &" },
1117 { "-d", FALSE, etBOOL, {&bDer},
1118 "Use the derivative" },
1119 { "-dp", FALSE, etINT, {&d},
1120 "HIDDENThe derivative is the difference over this number of points" },
1121 { "-bw", FALSE, etREAL, {&binwidth},
1122 "Binwidth for the distribution" },
1123 { "-errbar", FALSE, etENUM, {avbar_opt},
1124 "Error bars for [TT]-av[tt]" },
1125 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1126 "Integrate data function(s) numerically using trapezium rule" },
1127 { "-aver_start", FALSE, etREAL, {&aver_start},
1128 "Start averaging the integral from here" },
1129 { "-xydy", FALSE, etBOOL, {&bXYdy},
1130 "Interpret second data set as error in the y values for integrating" },
1131 { "-regression", FALSE, etBOOL, {&bRegression},
1132 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1133 { "-luzar", FALSE, etBOOL, {&bLuzar},
1134 "Do a Luzar and Chandler analysis on a correlation function and "
1135 "related as produced by [gmx-hbond]. When in addition the "
1136 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1137 "interpreted as errors in c(t) and n(t)." },
1138 { "-temp", FALSE, etREAL, {&temp},
1139 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1140 { "-fitstart", FALSE, etREAL, {&fit_start},
1141 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1142 { "-fitend", FALSE, etREAL, {&fit_end},
1143 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1144 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1145 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1146 { "-nbmin", FALSE, etINT, {&nb_min},
1147 "HIDDENMinimum number of blocks for block averaging" },
1148 { "-resol", FALSE, etINT, {&resol},
1149 "HIDDENResolution for the block averaging, block size increases with"
1150 " a factor 2^(1/resol)" },
1151 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1152 "HIDDENAlways use a single exponential fit for the error estimate" },
1153 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1154 "HIDDENAllow a negative long-time correlation" },
1155 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1156 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1157 { "-filter", FALSE, etREAL, {&filtlen},
1158 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1159 { "-power", FALSE, etBOOL, {&bPower},
1160 "Fit data to: b t^a" },
1161 { "-subav", FALSE, etBOOL, {&bSubAv},
1162 "Subtract the average before autocorrelating" },
1163 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1164 "Calculate one ACF over all sets" },
1165 { "-nbalexp", FALSE, etINT, {&nBalExp},
1166 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1167 { "-baltime", FALSE, etREAL, {&balTime},
1168 "HIDDENTime up to which the ballistic component will be fitted" },
1169 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1170 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1171 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1172 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1173 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1174 /* "What type of gminate recombination to use"}, */
1175 /* { "-D", FALSE, etREAL, {&diffusion}, */
1176 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1178 #define NPA asize(pa)
1180 FILE *out, *out_fit;
1181 int n, nlast, s, nset, i, j = 0;
1182 real **val, *t, dt, tot, error;
1183 double *av, *sig, cum1, cum2, cum3, cum4, db;
1184 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1188 { efXVG, "-f", "graph", ffREAD },
1189 { efXVG, "-ac", "autocorr", ffOPTWR },
1190 { efXVG, "-msd", "msd", ffOPTWR },
1191 { efXVG, "-cc", "coscont", ffOPTWR },
1192 { efXVG, "-dist", "distr", ffOPTWR },
1193 { efXVG, "-av", "average", ffOPTWR },
1194 { efXVG, "-ee", "errest", ffOPTWR },
1195 { efXVG, "-bal", "ballisitc", ffOPTWR },
1196 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1197 { efLOG, "-g", "fitlog", ffOPTWR }
1199 #define NFILE asize(fnm)
1205 ppa = add_acf_pargs(&npargs, pa);
1207 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1208 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1213 acfile = opt2fn_null("-ac", NFILE, fnm);
1214 msdfile = opt2fn_null("-msd", NFILE, fnm);
1215 ccfile = opt2fn_null("-cc", NFILE, fnm);
1216 distfile = opt2fn_null("-dist", NFILE, fnm);
1217 avfile = opt2fn_null("-av", NFILE, fnm);
1218 eefile = opt2fn_null("-ee", NFILE, fnm);
1219 balfile = opt2fn_null("-bal", NFILE, fnm);
1220 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1221 /* When doing autocorrelation we don't want a fitlog for fitting
1222 * the function itself (not the acf) when the user did not ask for it.
1224 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1226 fitfile = opt2fn("-g", NFILE, fnm);
1230 fitfile = opt2fn_null("-g", NFILE, fnm);
1233 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1234 opt2parg_bSet("-b", npargs, ppa), tb,
1235 opt2parg_bSet("-e", npargs, ppa), te,
1236 nsets_in, &nset, &n, &dt, &t);
1237 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1241 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1244 for (s = 0; s < nset; s++)
1246 for (i = 0; (i < n); i++)
1248 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1257 printf("Calculating the integral using the trapezium rule\n");
1261 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1262 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1266 for (s = 0; s < nset; s++)
1268 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1269 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1274 if (fitfile != NULL)
1276 out_fit = gmx_ffopen(fitfile, "w");
1277 if (bXYdy && nset >= 2)
1279 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1283 for (s = 0; s < nset; s++)
1285 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1288 gmx_ffclose(out_fit);
1291 printf(" std. dev. relative deviation of\n");
1292 printf(" standard --------- cumulants from those of\n");
1293 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1294 printf(" cum. 3 cum. 4\n");
1297 for (s = 0; (s < nset); s++)
1303 for (i = 0; (i < n); i++)
1308 for (i = 0; (i < n); i++)
1310 db = val[s][i]-cum1;
1313 cum4 += db*db*db*db;
1319 sig[s] = sqrt(cum2);
1322 error = sqrt(cum2/(n-1));
1328 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1329 s+1, av[s], sig[s], error,
1330 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1331 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1337 filter(filtlen, n, nset, val, dt);
1342 out = xvgropen(msdfile, "Mean square displacement",
1343 "time", "MSD (nm\\S2\\N)", oenv);
1344 nlast = (int)(n*frac);
1345 for (s = 0; s < nset; s++)
1347 for (j = 0; j <= nlast; j++)
1351 fprintf(stderr, "\r%d", j);
1354 for (i = 0; i < n-j; i++)
1356 tot += sqr(val[s][i]-val[s][i+j]);
1359 fprintf(out, " %g %8g\n", dt*j, tot);
1363 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1367 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1371 plot_coscont(ccfile, n, nset, val, oenv);
1376 histogram(distfile, binwidth, n, nset, val, oenv);
1380 average(avfile, nenum(avbar_opt), n, nset, val, t);
1384 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1385 bEeFitAc, bEESEF, bEENLC, oenv);
1389 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1392 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1393 /* nFitPoints, fit_start, fit_end, oenv); */
1396 power_fit(n, nset, val, t);
1403 for (s = 0; s < nset; s++)
1405 for (i = 0; i < n; i++)
1411 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1412 eacNormal, bAverCorr);
1417 regression_analysis(n, bXYdy, t, nset, val);
1422 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1425 view_all(oenv, NFILE, fnm);