2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/utility/futil.h"
56 #include "gromacs/statistics/statistics.h"
61 #include "gromacs/linearalgebra/matrix.h"
63 /* must correspond to char *avbar_opt[] declared in main() */
65 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
68 static void power_fit(int n, int nset, real **val, real *t)
70 real *x, *y, quality, a, b, r;
78 for (i = 0; i < n; i++)
88 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
89 for (i = 0; i < n; i++)
95 for (s = 0; s < nset; s++)
98 for (i = 0; i < n && val[s][i] >= 0; i++)
100 y[i] = log(val[s][i]);
104 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
106 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
107 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
108 s+1, quality, a, exp(b));
115 static real cosine_content(int nhp, int n, real *y)
116 /* Assumes n equidistant points */
118 double fac, cosyint, yyint;
126 fac = M_PI*nhp/(n-1);
130 for (i = 0; i < n; i++)
132 cosyint += cos(fac*i)*y[i];
136 return 2*cosyint*cosyint/(n*yyint);
139 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
140 const output_env_t oenv)
146 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
149 for (s = 0; s < nset; s++)
151 cc = cosine_content(s+1, n, val[s]);
152 fprintf(fp, " %d %g\n", s+1, cc);
153 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
156 fprintf(stdout, "\n");
161 static void regression_analysis(int n, gmx_bool bXYdy,
162 real *x, int nset, real **val)
164 real S, chi2, a, b, da, db, r = 0;
167 if (bXYdy || (nset == 1))
169 printf("Fitting data to a function f(x) = ax + b\n");
170 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
171 printf("Error estimates will be given if w_i (sigma) values are given\n");
172 printf("(use option -xydy).\n\n");
175 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
177 gmx_fatal(FARGS, "Error fitting the data: %s",
178 gmx_stats_message(ok));
183 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
185 gmx_fatal(FARGS, "Error fitting the data: %s",
186 gmx_stats_message(ok));
190 printf("Chi2 = %g\n", chi2);
191 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
192 printf("Correlation coefficient = %.1f%%\n", 100*r);
196 printf("a = %g +/- %g\n", a, da);
197 printf("b = %g +/- %g\n", b, db);
201 printf("a = %g\n", a);
202 printf("b = %g\n", b);
207 double chi2, *a, **xx, *y;
212 for (j = 0; (j < nset-1); j++)
216 for (i = 0; (i < n); i++)
219 for (j = 1; (j < nset); j++)
221 xx[j-1][i] = val[j][i];
225 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
226 printf("Fitting %d data points in %d sets\n", n, nset-1);
227 printf("chi2 = %g\n", chi2);
229 for (i = 0; (i < nset-1); i++)
241 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
242 const output_env_t oenv)
252 for (s = 0; s < nset; s++)
254 for (i = 0; i < n; i++)
260 else if (val[s][i] > max)
267 min = binwidth*floor(min/binwidth);
268 max = binwidth*ceil(max/binwidth);
275 nbin = (int)((max - min)/binwidth + 0.5) + 1;
276 fprintf(stderr, "Making distributions with %d bins\n", nbin);
278 fp = xvgropen(distfile, "Distribution", "", "", oenv);
279 for (s = 0; s < nset; s++)
281 for (i = 0; i < nbin; i++)
285 for (i = 0; i < n; i++)
287 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
289 for (i = 0; i < nbin; i++)
291 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
301 static int real_comp(const void *a, const void *b)
303 real dif = *(real *)a - *(real *)b;
319 static void average(const char *avfile, int avbar_opt,
320 int n, int nset, real **val, real *t)
327 fp = gmx_ffopen(avfile, "w");
328 if ((avbar_opt == avbarERROR) && (nset == 1))
330 avbar_opt = avbarNONE;
332 if (avbar_opt != avbarNONE)
334 if (avbar_opt == avbar90)
337 fprintf(fp, "@TYPE xydydy\n");
338 edge = (int)(nset*0.05+0.5);
339 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
340 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
344 fprintf(fp, "@TYPE xydy\n");
348 for (i = 0; i < n; i++)
351 for (s = 0; s < nset; s++)
356 fprintf(fp, " %g %g", t[i], av);
358 if (avbar_opt != avbarNONE)
360 if (avbar_opt == avbar90)
362 for (s = 0; s < nset; s++)
366 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
367 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
371 for (s = 0; s < nset; s++)
373 var += sqr(val[s][i]-av);
375 if (avbar_opt == avbarSTDDEV)
377 err = sqrt(var/nset);
381 err = sqrt(var/(nset*(nset-1)));
383 fprintf(fp, " %g", err);
390 if (avbar_opt == avbar90)
396 static real anal_ee_inf(real *parm, real T)
398 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
401 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
402 int nset, double *av, double *sig, real **val, real dt,
403 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
404 const output_env_t oenv)
407 int bs, prev_bs, nbs, nb;
412 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
414 real ee, a, tau1, tau2;
418 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
423 fp = xvgropen(eefile, "Error estimates",
424 "Block size (time)", "Error estimate", oenv);
425 if (output_env_get_print_xvgr_codes(oenv))
428 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
432 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
435 spacing = pow(2, 1.0/resol);
439 for (s = 0; s < nset; s++)
451 for (i = 0; i < nb; i++)
454 for (j = 0; j < bs; j++)
456 blav += val[s][bs*i+j];
458 var += sqr(av[s] - blav/bs);
467 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
484 for (i = 0; i < nbs/2; i++)
487 tbs[i] = tbs[nbs-1-i];
490 ybs[i] = ybs[nbs-1-i];
493 /* The initial slope of the normalized ybs^2 is 1.
494 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
495 * From this we take our initial guess for tau1.
504 while (i < nbs - 1 &&
505 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
509 fprintf(stdout, "Data set %d has strange time correlations:\n"
510 "the std. error using single points is larger than that of blocks of 2 points\n"
511 "The error estimate might be inaccurate, check the fit\n",
513 /* Use the total time as tau for the fitting weights */
514 tau_sig = (n - 1)*dt;
523 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
526 /* Generate more or less appropriate sigma's,
527 * also taking the density of points into account.
529 for (i = 0; i < nbs; i++)
533 dens = tbs[1]/tbs[0] - 1;
537 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
541 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
543 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
548 fitparm[0] = tau1_est;
550 /* We set the initial guess for tau2
551 * to halfway between tau1_est and the total time (on log scale).
553 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
554 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
555 bDebugMode(), effnERREST, fitparm, 0);
556 fitparm[3] = 1-fitparm[1];
558 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
559 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
563 if (fitparm[2] > (n-1)*dt)
566 "Warning: tau2 is longer than the length of the data (%g)\n"
567 " the statistics might be bad\n",
572 fprintf(stdout, "a fitted parameter is negative\n");
574 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
575 sig[s]*anal_ee_inf(fitparm, n*dt),
576 fitparm[1], fitparm[0], fitparm[2]);
577 /* Do a fit with tau2 fixed at the total time.
578 * One could also choose any other large value for tau2.
580 fitparm[0] = tau1_est;
582 fitparm[2] = (n-1)*dt;
583 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
584 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
585 effnERREST, fitparm, 4);
586 fitparm[3] = 1-fitparm[1];
588 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
589 || (fitparm[1] > 1 && !bAllowNegLTCorr))
593 fprintf(stdout, "a fitted parameter is negative\n");
594 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
595 sig[s]*anal_ee_inf(fitparm, n*dt),
596 fitparm[1], fitparm[0], fitparm[2]);
598 /* Do a single exponential fit */
599 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
600 fitparm[0] = tau1_est;
603 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
604 effnERREST, fitparm, 6);
605 fitparm[3] = 1-fitparm[1];
608 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
613 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
614 s+1, ee, a, tau1, tau2);
615 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
616 fprintf(fp, "@ legend string %d \"ee %6g\"\n",
617 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
618 for (i = 0; i < nbs; i++)
620 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
621 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
627 real *ac, acint, ac_fit[4];
630 for (i = 0; i < n; i++)
632 ac[i] = val[s][i] - av[s];
642 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
643 dt, eacNormal, 1, FALSE, TRUE,
644 FALSE, 0, 0, effnNONE);
648 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
650 for (i = 1; i <= fitlen/2; i++)
656 /* Generate more or less appropriate sigma's */
657 for (i = 0; i <= fitlen; i++)
659 fitsig[i] = sqrt(acint + dt*i);
662 ac_fit[0] = 0.5*acint;
664 ac_fit[2] = 10*acint;
665 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
666 bDebugMode(), effnEXP3, ac_fit, 0);
667 ac_fit[3] = 1 - ac_fit[1];
669 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
670 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
671 ac_fit[1], ac_fit[0], ac_fit[2]);
674 for (i = 0; i < nbs; i++)
676 fprintf(fp, "%g %g\n", tbs[i],
677 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
693 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
694 gmx_bool bError, real fit_start, real smooth_tail_start,
695 const output_env_t oenv)
697 const real tol = 1e-8;
702 please_cite(stdout, "Spoel2006b");
704 /* Compute negative derivative k(t) = -dc(t)/dt */
708 compute_derivative(nn, time, val[0], kt);
709 for (j = 0; (j < nn); j++)
716 for (j = 0; (j < nn); j++)
718 d2 += sqr(kt[j] - val[3][j]);
720 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
722 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
723 temp, smooth_tail_start, oenv);
728 analyse_corr(nn, time, val[0], val[2], val[4],
729 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
733 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
734 printf("Not doing anything. Sorry.\n");
738 static void filter(real flen, int n, int nset, real **val, real dt)
741 double *filt, sum, vf, fluc, fluctot;
743 f = (int)(flen/(2*dt));
747 for (i = 1; i <= f; i++)
749 filt[i] = cos(M_PI*dt*i/flen);
752 for (i = 0; i <= f; i++)
756 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
757 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
759 for (s = 0; s < nset; s++)
762 for (i = f; i < n-f; i++)
764 vf = filt[0]*val[s][i];
765 for (j = 1; j <= f; j++)
767 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
769 fluc += sqr(val[s][i] - vf);
773 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
775 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
776 fprintf(stdout, "\n");
781 static void do_fit(FILE *out, int n, gmx_bool bYdy,
782 int ny, real *x0, real **val,
783 int npargs, t_pargs *ppa, const output_env_t oenv)
785 real *c1 = NULL, *sig = NULL, *fitparm;
786 real tendfit, tbeginfit;
787 int i, efitfn, nparm;
789 efitfn = get_acffitfn();
790 nparm = nfp_ffn[efitfn];
791 fprintf(out, "Will fit to the following function:\n");
792 fprintf(out, "%s\n", longs_ffn[efitfn]);
798 fprintf(out, "Using two columns as y and sigma values\n");
804 if (opt2parg_bSet("-beginfit", npargs, ppa))
806 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
812 if (opt2parg_bSet("-endfit", npargs, ppa))
814 tendfit = opt2parg_real("-endfit", npargs, ppa);
821 snew(fitparm, nparm);
833 fitparm[1] = 0.5*c1[0];
837 fitparm[0] = fitparm[2] = 0.5*c1[0];
843 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
850 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
858 fprintf(out, "Warning: don't know how to initialize the parameters\n");
859 for (i = 0; (i < nparm); i++)
864 fprintf(out, "Starting parameters:\n");
865 for (i = 0; (i < nparm); i++)
867 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
869 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
870 oenv, bDebugMode(), efitfn, fitparm, 0))
872 for (i = 0; (i < nparm); i++)
874 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
879 fprintf(out, "No solution was found\n");
883 static void do_ballistic(const char *balFile, int nData,
884 real *t, real **val, int nSet,
885 real balTime, int nBalExp,
886 const output_env_t oenv)
888 double **ctd = NULL, *td = NULL;
889 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
890 static char *leg[] = {"Ac'(t)"};
894 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
899 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
900 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
902 for (set = 0; set < nSet; set++)
904 snew(ctd[set], nData);
905 for (i = 0; i < nData; i++)
907 ctd[set][i] = (double)val[set][i];
910 td[i] = (double)t[i];
914 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
917 for (i = 0; i < nData; i++)
919 fprintf(fp, " %g", t[i]);
920 for (set = 0; set < nSet; set++)
922 fprintf(fp, " %g", ctd[set][i]);
928 for (set = 0; set < nSet; set++)
937 printf("Number of data points is less than the number of parameters to fit\n."
938 "The system is underdetermined, hence no ballistic term can be found.\n\n");
942 static void do_geminate(const char *gemFile, int nData,
943 real *t, real **val, int nSet,
944 const real D, const real rcut, const real balTime,
945 const int nFitPoints, const real begFit, const real endFit,
946 const output_env_t oenv)
948 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
949 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
950 begFit, endFit, balTime, 1);
951 const char *leg[] = {"Ac\\sgem\\N(t)"};
959 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
960 xvgr_legend(fp, asize(leg), leg, oenv);
962 for (set = 0; set < nSet; set++)
964 snew(ctd[set], nData);
965 snew(ctdGem[set], nData);
966 for (i = 0; i < nData; i++)
968 ctd[set][i] = (double)val[set][i];
971 td[i] = (double)t[i];
974 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
977 for (i = 0; i < nData; i++)
979 fprintf(fp, " %g", t[i]);
980 for (set = 0; set < nSet; set++)
982 fprintf(fp, " %g", ctdGem[set][i]);
987 for (set = 0; set < nSet; set++)
997 int gmx_analyze(int argc, char *argv[])
999 static const char *desc[] = {
1000 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1001 "A line in the input file may start with a time",
1002 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1003 "Multiple sets can also be",
1004 "read when they are separated by & (option [TT]-n[tt]);",
1005 "in this case only one [IT]y[it]-value is read from each line.",
1006 "All lines starting with # and @ are skipped.",
1007 "All analyses can also be done for the derivative of a set",
1008 "(option [TT]-d[tt]).[PAR]",
1010 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1011 "points are equidistant in time.[PAR]",
1013 "[THISMODULE] always shows the average and standard deviation of each",
1014 "set, as well as the relative deviation of the third",
1015 "and fourth cumulant from those of a Gaussian distribution with the same",
1016 "standard deviation.[PAR]",
1018 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1019 "Be sure that the time interval between data points is",
1020 "much shorter than the time scale of the autocorrelation.[PAR]",
1022 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1023 "i/2 periods. The formula is:[BR]"
1024 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1025 "This is useful for principal components obtained from covariance",
1026 "analysis, since the principal components of random diffusion are",
1027 "pure cosines.[PAR]",
1029 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1031 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1033 "Option [TT]-av[tt] produces the average over the sets.",
1034 "Error bars can be added with the option [TT]-errbar[tt].",
1035 "The errorbars can represent the standard deviation, the error",
1036 "(assuming the points are independent) or the interval containing",
1037 "90% of the points, by discarding 5% of the points at the top and",
1040 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1041 "A set is divided in a number of blocks and averages are calculated for",
1042 "each block. The error for the total average is calculated from",
1043 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1044 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1045 "These errors are plotted as a function of the block size.",
1046 "Also an analytical block average curve is plotted, assuming",
1047 "that the autocorrelation is a sum of two exponentials.",
1048 "The analytical curve for the block average is:[BR]",
1049 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1050 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1051 "where T is the total time.",
1052 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1053 "When the actual block average is very close to the analytical curve,",
1054 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1055 "The complete derivation is given in",
1056 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1058 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1059 "component from a hydrogen bond autocorrelation function by the fitting",
1060 "of a sum of exponentials, as described in e.g.",
1061 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1062 "is the one with the most negative coefficient in the exponential,",
1063 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1064 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1066 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1067 "(and optionally kD) to the hydrogen bond autocorrelation function",
1068 "according to the reversible geminate recombination model. Removal of",
1069 "the ballistic component first is strongly advised. The model is presented in",
1070 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1072 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1073 "of each set and over all sets with respect to a filtered average.",
1074 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1075 "to len/2. len is supplied with the option [TT]-filter[tt].",
1076 "This filter reduces oscillations with period len/2 and len by a factor",
1077 "of 0.79 and 0.33 respectively.[PAR]",
1079 "Option [TT]-g[tt] fits the data to the function given with option",
1080 "[TT]-fitfn[tt].[PAR]",
1082 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1083 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1084 "zero or with a negative value are ignored.[PAR]"
1086 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1087 "on output from [gmx-hbond]. The input file can be taken directly",
1088 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1090 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1091 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1092 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1093 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1094 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1095 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1097 /* must correspond to enum avbar* declared at beginning of file */
1098 static const char *avbar_opt[avbarNR+1] = {
1099 NULL, "none", "stddev", "error", "90", NULL
1103 { "-time", FALSE, etBOOL, {&bHaveT},
1104 "Expect a time in the input" },
1105 { "-b", FALSE, etREAL, {&tb},
1106 "First time to read from set" },
1107 { "-e", FALSE, etREAL, {&te},
1108 "Last time to read from set" },
1109 { "-n", FALSE, etINT, {&nsets_in},
1110 "Read this number of sets separated by &" },
1111 { "-d", FALSE, etBOOL, {&bDer},
1112 "Use the derivative" },
1113 { "-dp", FALSE, etINT, {&d},
1114 "HIDDENThe derivative is the difference over this number of points" },
1115 { "-bw", FALSE, etREAL, {&binwidth},
1116 "Binwidth for the distribution" },
1117 { "-errbar", FALSE, etENUM, {avbar_opt},
1118 "Error bars for [TT]-av[tt]" },
1119 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1120 "Integrate data function(s) numerically using trapezium rule" },
1121 { "-aver_start", FALSE, etREAL, {&aver_start},
1122 "Start averaging the integral from here" },
1123 { "-xydy", FALSE, etBOOL, {&bXYdy},
1124 "Interpret second data set as error in the y values for integrating" },
1125 { "-regression", FALSE, etBOOL, {&bRegression},
1126 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1127 { "-luzar", FALSE, etBOOL, {&bLuzar},
1128 "Do a Luzar and Chandler analysis on a correlation function and "
1129 "related as produced by [gmx-hbond]. When in addition the "
1130 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1131 "interpreted as errors in c(t) and n(t)." },
1132 { "-temp", FALSE, etREAL, {&temp},
1133 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1134 { "-fitstart", FALSE, etREAL, {&fit_start},
1135 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1136 { "-fitend", FALSE, etREAL, {&fit_end},
1137 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1138 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1139 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1140 { "-nbmin", FALSE, etINT, {&nb_min},
1141 "HIDDENMinimum number of blocks for block averaging" },
1142 { "-resol", FALSE, etINT, {&resol},
1143 "HIDDENResolution for the block averaging, block size increases with"
1144 " a factor 2^(1/resol)" },
1145 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1146 "HIDDENAlways use a single exponential fit for the error estimate" },
1147 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1148 "HIDDENAllow a negative long-time correlation" },
1149 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1150 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1151 { "-filter", FALSE, etREAL, {&filtlen},
1152 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1153 { "-power", FALSE, etBOOL, {&bPower},
1154 "Fit data to: b t^a" },
1155 { "-subav", FALSE, etBOOL, {&bSubAv},
1156 "Subtract the average before autocorrelating" },
1157 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1158 "Calculate one ACF over all sets" },
1159 { "-nbalexp", FALSE, etINT, {&nBalExp},
1160 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1161 { "-baltime", FALSE, etREAL, {&balTime},
1162 "HIDDENTime up to which the ballistic component will be fitted" },
1163 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1164 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1165 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1166 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1167 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1168 /* "What type of gminate recombination to use"}, */
1169 /* { "-D", FALSE, etREAL, {&diffusion}, */
1170 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1172 #define NPA asize(pa)
1174 FILE *out, *out_fit;
1175 int n, nlast, s, nset, i, j = 0;
1176 real **val, *t, dt, tot, error;
1177 double *av, *sig, cum1, cum2, cum3, cum4, db;
1178 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1182 { efXVG, "-f", "graph", ffREAD },
1183 { efXVG, "-ac", "autocorr", ffOPTWR },
1184 { efXVG, "-msd", "msd", ffOPTWR },
1185 { efXVG, "-cc", "coscont", ffOPTWR },
1186 { efXVG, "-dist", "distr", ffOPTWR },
1187 { efXVG, "-av", "average", ffOPTWR },
1188 { efXVG, "-ee", "errest", ffOPTWR },
1189 { efXVG, "-bal", "ballisitc", ffOPTWR },
1190 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1191 { efLOG, "-g", "fitlog", ffOPTWR }
1193 #define NFILE asize(fnm)
1199 ppa = add_acf_pargs(&npargs, pa);
1201 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1202 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1207 acfile = opt2fn_null("-ac", NFILE, fnm);
1208 msdfile = opt2fn_null("-msd", NFILE, fnm);
1209 ccfile = opt2fn_null("-cc", NFILE, fnm);
1210 distfile = opt2fn_null("-dist", NFILE, fnm);
1211 avfile = opt2fn_null("-av", NFILE, fnm);
1212 eefile = opt2fn_null("-ee", NFILE, fnm);
1213 balfile = opt2fn_null("-bal", NFILE, fnm);
1214 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1215 /* When doing autocorrelation we don't want a fitlog for fitting
1216 * the function itself (not the acf) when the user did not ask for it.
1218 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1220 fitfile = opt2fn("-g", NFILE, fnm);
1224 fitfile = opt2fn_null("-g", NFILE, fnm);
1227 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1228 opt2parg_bSet("-b", npargs, ppa), tb,
1229 opt2parg_bSet("-e", npargs, ppa), te,
1230 nsets_in, &nset, &n, &dt, &t);
1231 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1235 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1238 for (s = 0; s < nset; s++)
1240 for (i = 0; (i < n); i++)
1242 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1251 printf("Calculating the integral using the trapezium rule\n");
1255 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1256 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1260 for (s = 0; s < nset; s++)
1262 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1263 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1268 if (fitfile != NULL)
1270 out_fit = gmx_ffopen(fitfile, "w");
1271 if (bXYdy && nset >= 2)
1273 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1277 for (s = 0; s < nset; s++)
1279 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1282 gmx_ffclose(out_fit);
1285 printf(" std. dev. relative deviation of\n");
1286 printf(" standard --------- cumulants from those of\n");
1287 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1288 printf(" cum. 3 cum. 4\n");
1291 for (s = 0; (s < nset); s++)
1297 for (i = 0; (i < n); i++)
1302 for (i = 0; (i < n); i++)
1304 db = val[s][i]-cum1;
1307 cum4 += db*db*db*db;
1313 sig[s] = sqrt(cum2);
1316 error = sqrt(cum2/(n-1));
1322 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1323 s+1, av[s], sig[s], error,
1324 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1325 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1331 filter(filtlen, n, nset, val, dt);
1336 out = xvgropen(msdfile, "Mean square displacement",
1337 "time", "MSD (nm\\S2\\N)", oenv);
1338 nlast = (int)(n*frac);
1339 for (s = 0; s < nset; s++)
1341 for (j = 0; j <= nlast; j++)
1345 fprintf(stderr, "\r%d", j);
1348 for (i = 0; i < n-j; i++)
1350 tot += sqr(val[s][i]-val[s][i+j]);
1353 fprintf(out, " %g %8g\n", dt*j, tot);
1357 fprintf(out, "&\n");
1361 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1365 plot_coscont(ccfile, n, nset, val, oenv);
1370 histogram(distfile, binwidth, n, nset, val, oenv);
1374 average(avfile, nenum(avbar_opt), n, nset, val, t);
1378 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1379 bEeFitAc, bEESEF, bEENLC, oenv);
1383 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1386 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1387 /* nFitPoints, fit_start, fit_end, oenv); */
1390 power_fit(n, nset, val, t);
1397 for (s = 0; s < nset; s++)
1399 for (i = 0; i < n; i++)
1405 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1406 eacNormal, bAverCorr);
1411 regression_analysis(n, bXYdy, t, nset, val);
1416 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1419 view_all(oenv, NFILE, fnm);