2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/utility/smalloc.h"
47 #include "gromacs/utility/fatalerror.h"
50 #include "gromacs/utility/futil.h"
54 #include "gromacs/statistics/statistics.h"
59 #include "gromacs/linearalgebra/matrix.h"
61 /* must correspond to char *avbar_opt[] declared in main() */
63 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
66 static void power_fit(int n, int nset, real **val, real *t)
68 real *x, *y, quality, a, b, r;
76 for (i = 0; i < n; i++)
86 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
87 for (i = 0; i < n; i++)
93 for (s = 0; s < nset; s++)
96 for (i = 0; i < n && val[s][i] >= 0; i++)
98 y[i] = log(val[s][i]);
102 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
104 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
105 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
106 s+1, quality, a, exp(b));
113 static real cosine_content(int nhp, int n, real *y)
114 /* Assumes n equidistant points */
116 double fac, cosyint, yyint;
124 fac = M_PI*nhp/(n-1);
128 for (i = 0; i < n; i++)
130 cosyint += cos(fac*i)*y[i];
134 return 2*cosyint*cosyint/(n*yyint);
137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
138 const output_env_t oenv)
144 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
147 for (s = 0; s < nset; s++)
149 cc = cosine_content(s+1, n, val[s]);
150 fprintf(fp, " %d %g\n", s+1, cc);
151 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
154 fprintf(stdout, "\n");
159 static void regression_analysis(int n, gmx_bool bXYdy,
160 real *x, int nset, real **val)
162 real S, chi2, a, b, da, db, r = 0;
165 if (bXYdy || (nset == 1))
167 printf("Fitting data to a function f(x) = ax + b\n");
168 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
169 printf("Error estimates will be given if w_i (sigma) values are given\n");
170 printf("(use option -xydy).\n\n");
173 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
175 gmx_fatal(FARGS, "Error fitting the data: %s",
176 gmx_stats_message(ok));
181 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
183 gmx_fatal(FARGS, "Error fitting the data: %s",
184 gmx_stats_message(ok));
188 printf("Chi2 = %g\n", chi2);
189 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
190 printf("Correlation coefficient = %.1f%%\n", 100*r);
194 printf("a = %g +/- %g\n", a, da);
195 printf("b = %g +/- %g\n", b, db);
199 printf("a = %g\n", a);
200 printf("b = %g\n", b);
205 double chi2, *a, **xx, *y;
210 for (j = 0; (j < nset-1); j++)
214 for (i = 0; (i < n); i++)
217 for (j = 1; (j < nset); j++)
219 xx[j-1][i] = val[j][i];
223 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
224 printf("Fitting %d data points in %d sets\n", n, nset-1);
225 printf("chi2 = %g\n", chi2);
227 for (i = 0; (i < nset-1); i++)
239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
240 const output_env_t oenv)
250 for (s = 0; s < nset; s++)
252 for (i = 0; i < n; i++)
258 else if (val[s][i] > max)
265 min = binwidth*floor(min/binwidth);
266 max = binwidth*ceil(max/binwidth);
273 nbin = (int)((max - min)/binwidth + 0.5) + 1;
274 fprintf(stderr, "Making distributions with %d bins\n", nbin);
276 fp = xvgropen(distfile, "Distribution", "", "", oenv);
277 for (s = 0; s < nset; s++)
279 for (i = 0; i < nbin; i++)
283 for (i = 0; i < n; i++)
285 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
287 for (i = 0; i < nbin; i++)
289 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
299 static int real_comp(const void *a, const void *b)
301 real dif = *(real *)a - *(real *)b;
317 static void average(const char *avfile, int avbar_opt,
318 int n, int nset, real **val, real *t)
325 fp = gmx_ffopen(avfile, "w");
326 if ((avbar_opt == avbarERROR) && (nset == 1))
328 avbar_opt = avbarNONE;
330 if (avbar_opt != avbarNONE)
332 if (avbar_opt == avbar90)
335 fprintf(fp, "@TYPE xydydy\n");
336 edge = (int)(nset*0.05+0.5);
337 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
338 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
342 fprintf(fp, "@TYPE xydy\n");
346 for (i = 0; i < n; i++)
349 for (s = 0; s < nset; s++)
354 fprintf(fp, " %g %g", t[i], av);
356 if (avbar_opt != avbarNONE)
358 if (avbar_opt == avbar90)
360 for (s = 0; s < nset; s++)
364 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
365 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
369 for (s = 0; s < nset; s++)
371 var += sqr(val[s][i]-av);
373 if (avbar_opt == avbarSTDDEV)
375 err = sqrt(var/nset);
379 err = sqrt(var/(nset*(nset-1)));
381 fprintf(fp, " %g", err);
388 if (avbar_opt == avbar90)
394 static real anal_ee_inf(real *parm, real T)
396 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
399 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
400 int nset, double *av, double *sig, real **val, real dt,
401 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
402 const output_env_t oenv)
405 int bs, prev_bs, nbs, nb;
410 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
412 real ee, a, tau1, tau2;
416 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
421 fp = xvgropen(eefile, "Error estimates",
422 "Block size (time)", "Error estimate", oenv);
423 if (output_env_get_print_xvgr_codes(oenv))
426 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
430 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
433 spacing = pow(2, 1.0/resol);
437 for (s = 0; s < nset; s++)
449 for (i = 0; i < nb; i++)
452 for (j = 0; j < bs; j++)
454 blav += val[s][bs*i+j];
456 var += sqr(av[s] - blav/bs);
465 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
482 for (i = 0; i < nbs/2; i++)
485 tbs[i] = tbs[nbs-1-i];
488 ybs[i] = ybs[nbs-1-i];
491 /* The initial slope of the normalized ybs^2 is 1.
492 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
493 * From this we take our initial guess for tau1.
502 while (i < nbs - 1 &&
503 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
507 fprintf(stdout, "Data set %d has strange time correlations:\n"
508 "the std. error using single points is larger than that of blocks of 2 points\n"
509 "The error estimate might be inaccurate, check the fit\n",
511 /* Use the total time as tau for the fitting weights */
512 tau_sig = (n - 1)*dt;
521 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
524 /* Generate more or less appropriate sigma's,
525 * also taking the density of points into account.
527 for (i = 0; i < nbs; i++)
531 dens = tbs[1]/tbs[0] - 1;
535 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
539 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
541 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
546 fitparm[0] = tau1_est;
548 /* We set the initial guess for tau2
549 * to halfway between tau1_est and the total time (on log scale).
551 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
552 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
553 bDebugMode(), effnERREST, fitparm, 0);
554 fitparm[3] = 1-fitparm[1];
556 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
557 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
561 if (fitparm[2] > (n-1)*dt)
564 "Warning: tau2 is longer than the length of the data (%g)\n"
565 " the statistics might be bad\n",
570 fprintf(stdout, "a fitted parameter is negative\n");
572 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
573 sig[s]*anal_ee_inf(fitparm, n*dt),
574 fitparm[1], fitparm[0], fitparm[2]);
575 /* Do a fit with tau2 fixed at the total time.
576 * One could also choose any other large value for tau2.
578 fitparm[0] = tau1_est;
580 fitparm[2] = (n-1)*dt;
581 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
582 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
583 effnERREST, fitparm, 4);
584 fitparm[3] = 1-fitparm[1];
586 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
587 || (fitparm[1] > 1 && !bAllowNegLTCorr))
591 fprintf(stdout, "a fitted parameter is negative\n");
592 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
593 sig[s]*anal_ee_inf(fitparm, n*dt),
594 fitparm[1], fitparm[0], fitparm[2]);
596 /* Do a single exponential fit */
597 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
598 fitparm[0] = tau1_est;
601 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
602 effnERREST, fitparm, 6);
603 fitparm[3] = 1-fitparm[1];
606 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
611 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
612 s+1, ee, a, tau1, tau2);
613 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
614 fprintf(fp, "@ legend string %d \"ee %6g\"\n",
615 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
616 for (i = 0; i < nbs; i++)
618 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
619 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
625 real *ac, acint, ac_fit[4];
628 for (i = 0; i < n; i++)
630 ac[i] = val[s][i] - av[s];
640 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
641 dt, eacNormal, 1, FALSE, TRUE,
642 FALSE, 0, 0, effnNONE);
646 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
648 for (i = 1; i <= fitlen/2; i++)
654 /* Generate more or less appropriate sigma's */
655 for (i = 0; i <= fitlen; i++)
657 fitsig[i] = sqrt(acint + dt*i);
660 ac_fit[0] = 0.5*acint;
662 ac_fit[2] = 10*acint;
663 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
664 bDebugMode(), effnEXP3, ac_fit, 0);
665 ac_fit[3] = 1 - ac_fit[1];
667 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
668 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
669 ac_fit[1], ac_fit[0], ac_fit[2]);
672 for (i = 0; i < nbs; i++)
674 fprintf(fp, "%g %g\n", tbs[i],
675 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
691 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
692 gmx_bool bError, real fit_start, real smooth_tail_start,
693 const output_env_t oenv)
695 const real tol = 1e-8;
700 please_cite(stdout, "Spoel2006b");
702 /* Compute negative derivative k(t) = -dc(t)/dt */
706 compute_derivative(nn, time, val[0], kt);
707 for (j = 0; (j < nn); j++)
714 for (j = 0; (j < nn); j++)
716 d2 += sqr(kt[j] - val[3][j]);
718 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
720 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
721 temp, smooth_tail_start, oenv);
726 analyse_corr(nn, time, val[0], val[2], val[4],
727 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
731 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
732 printf("Not doing anything. Sorry.\n");
736 static void filter(real flen, int n, int nset, real **val, real dt)
739 double *filt, sum, vf, fluc, fluctot;
741 f = (int)(flen/(2*dt));
745 for (i = 1; i <= f; i++)
747 filt[i] = cos(M_PI*dt*i/flen);
750 for (i = 0; i <= f; i++)
754 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
755 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
757 for (s = 0; s < nset; s++)
760 for (i = f; i < n-f; i++)
762 vf = filt[0]*val[s][i];
763 for (j = 1; j <= f; j++)
765 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
767 fluc += sqr(val[s][i] - vf);
771 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
773 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
774 fprintf(stdout, "\n");
779 static void do_fit(FILE *out, int n, gmx_bool bYdy,
780 int ny, real *x0, real **val,
781 int npargs, t_pargs *ppa, const output_env_t oenv)
783 real *c1 = NULL, *sig = NULL, *fitparm;
784 real tendfit, tbeginfit;
785 int i, efitfn, nparm;
787 efitfn = get_acffitfn();
788 nparm = nfp_ffn[efitfn];
789 fprintf(out, "Will fit to the following function:\n");
790 fprintf(out, "%s\n", longs_ffn[efitfn]);
796 fprintf(out, "Using two columns as y and sigma values\n");
802 if (opt2parg_bSet("-beginfit", npargs, ppa))
804 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
810 if (opt2parg_bSet("-endfit", npargs, ppa))
812 tendfit = opt2parg_real("-endfit", npargs, ppa);
819 snew(fitparm, nparm);
831 fitparm[1] = 0.5*c1[0];
835 fitparm[0] = fitparm[2] = 0.5*c1[0];
841 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
848 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
856 fprintf(out, "Warning: don't know how to initialize the parameters\n");
857 for (i = 0; (i < nparm); i++)
862 fprintf(out, "Starting parameters:\n");
863 for (i = 0; (i < nparm); i++)
865 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
867 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
868 oenv, bDebugMode(), efitfn, fitparm, 0))
870 for (i = 0; (i < nparm); i++)
872 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
877 fprintf(out, "No solution was found\n");
881 static void do_ballistic(const char *balFile, int nData,
882 real *t, real **val, int nSet,
883 real balTime, int nBalExp,
884 const output_env_t oenv)
886 double **ctd = NULL, *td = NULL;
887 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
888 static char *leg[] = {"Ac'(t)"};
892 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
897 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
898 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
900 for (set = 0; set < nSet; set++)
902 snew(ctd[set], nData);
903 for (i = 0; i < nData; i++)
905 ctd[set][i] = (double)val[set][i];
908 td[i] = (double)t[i];
912 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
915 for (i = 0; i < nData; i++)
917 fprintf(fp, " %g", t[i]);
918 for (set = 0; set < nSet; set++)
920 fprintf(fp, " %g", ctd[set][i]);
926 for (set = 0; set < nSet; set++)
935 printf("Number of data points is less than the number of parameters to fit\n."
936 "The system is underdetermined, hence no ballistic term can be found.\n\n");
940 static void do_geminate(const char *gemFile, int nData,
941 real *t, real **val, int nSet,
942 const real D, const real rcut, const real balTime,
943 const int nFitPoints, const real begFit, const real endFit,
944 const output_env_t oenv)
946 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
947 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
948 begFit, endFit, balTime, 1);
949 const char *leg[] = {"Ac\\sgem\\N(t)"};
957 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
958 xvgr_legend(fp, asize(leg), leg, oenv);
960 for (set = 0; set < nSet; set++)
962 snew(ctd[set], nData);
963 snew(ctdGem[set], nData);
964 for (i = 0; i < nData; i++)
966 ctd[set][i] = (double)val[set][i];
969 td[i] = (double)t[i];
972 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
975 for (i = 0; i < nData; i++)
977 fprintf(fp, " %g", t[i]);
978 for (set = 0; set < nSet; set++)
980 fprintf(fp, " %g", ctdGem[set][i]);
985 for (set = 0; set < nSet; set++)
995 int gmx_analyze(int argc, char *argv[])
997 static const char *desc[] = {
998 "[THISMODULE] reads an ASCII file and analyzes data sets.",
999 "A line in the input file may start with a time",
1000 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1001 "Multiple sets can also be",
1002 "read when they are separated by & (option [TT]-n[tt]);",
1003 "in this case only one [IT]y[it]-value is read from each line.",
1004 "All lines starting with # and @ are skipped.",
1005 "All analyses can also be done for the derivative of a set",
1006 "(option [TT]-d[tt]).[PAR]",
1008 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1009 "points are equidistant in time.[PAR]",
1011 "[THISMODULE] always shows the average and standard deviation of each",
1012 "set, as well as the relative deviation of the third",
1013 "and fourth cumulant from those of a Gaussian distribution with the same",
1014 "standard deviation.[PAR]",
1016 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1017 "Be sure that the time interval between data points is",
1018 "much shorter than the time scale of the autocorrelation.[PAR]",
1020 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1021 "i/2 periods. The formula is:[BR]"
1022 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1023 "This is useful for principal components obtained from covariance",
1024 "analysis, since the principal components of random diffusion are",
1025 "pure cosines.[PAR]",
1027 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1029 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1031 "Option [TT]-av[tt] produces the average over the sets.",
1032 "Error bars can be added with the option [TT]-errbar[tt].",
1033 "The errorbars can represent the standard deviation, the error",
1034 "(assuming the points are independent) or the interval containing",
1035 "90% of the points, by discarding 5% of the points at the top and",
1038 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1039 "A set is divided in a number of blocks and averages are calculated for",
1040 "each block. The error for the total average is calculated from",
1041 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1042 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1043 "These errors are plotted as a function of the block size.",
1044 "Also an analytical block average curve is plotted, assuming",
1045 "that the autocorrelation is a sum of two exponentials.",
1046 "The analytical curve for the block average is:[BR]",
1047 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1048 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1049 "where T is the total time.",
1050 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1051 "When the actual block average is very close to the analytical curve,",
1052 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1053 "The complete derivation is given in",
1054 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1056 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1057 "component from a hydrogen bond autocorrelation function by the fitting",
1058 "of a sum of exponentials, as described in e.g.",
1059 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1060 "is the one with the most negative coefficient in the exponential,",
1061 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1062 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1064 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1065 "(and optionally kD) to the hydrogen bond autocorrelation function",
1066 "according to the reversible geminate recombination model. Removal of",
1067 "the ballistic component first is strongly advised. The model is presented in",
1068 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1070 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1071 "of each set and over all sets with respect to a filtered average.",
1072 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1073 "to len/2. len is supplied with the option [TT]-filter[tt].",
1074 "This filter reduces oscillations with period len/2 and len by a factor",
1075 "of 0.79 and 0.33 respectively.[PAR]",
1077 "Option [TT]-g[tt] fits the data to the function given with option",
1078 "[TT]-fitfn[tt].[PAR]",
1080 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1081 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1082 "zero or with a negative value are ignored.[PAR]"
1084 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1085 "on output from [gmx-hbond]. The input file can be taken directly",
1086 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1088 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1089 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1090 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1091 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1092 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1093 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1095 /* must correspond to enum avbar* declared at beginning of file */
1096 static const char *avbar_opt[avbarNR+1] = {
1097 NULL, "none", "stddev", "error", "90", NULL
1101 { "-time", FALSE, etBOOL, {&bHaveT},
1102 "Expect a time in the input" },
1103 { "-b", FALSE, etREAL, {&tb},
1104 "First time to read from set" },
1105 { "-e", FALSE, etREAL, {&te},
1106 "Last time to read from set" },
1107 { "-n", FALSE, etINT, {&nsets_in},
1108 "Read this number of sets separated by &" },
1109 { "-d", FALSE, etBOOL, {&bDer},
1110 "Use the derivative" },
1111 { "-dp", FALSE, etINT, {&d},
1112 "HIDDENThe derivative is the difference over this number of points" },
1113 { "-bw", FALSE, etREAL, {&binwidth},
1114 "Binwidth for the distribution" },
1115 { "-errbar", FALSE, etENUM, {avbar_opt},
1116 "Error bars for [TT]-av[tt]" },
1117 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1118 "Integrate data function(s) numerically using trapezium rule" },
1119 { "-aver_start", FALSE, etREAL, {&aver_start},
1120 "Start averaging the integral from here" },
1121 { "-xydy", FALSE, etBOOL, {&bXYdy},
1122 "Interpret second data set as error in the y values for integrating" },
1123 { "-regression", FALSE, etBOOL, {&bRegression},
1124 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1125 { "-luzar", FALSE, etBOOL, {&bLuzar},
1126 "Do a Luzar and Chandler analysis on a correlation function and "
1127 "related as produced by [gmx-hbond]. When in addition the "
1128 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1129 "interpreted as errors in c(t) and n(t)." },
1130 { "-temp", FALSE, etREAL, {&temp},
1131 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1132 { "-fitstart", FALSE, etREAL, {&fit_start},
1133 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1134 { "-fitend", FALSE, etREAL, {&fit_end},
1135 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1136 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1137 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1138 { "-nbmin", FALSE, etINT, {&nb_min},
1139 "HIDDENMinimum number of blocks for block averaging" },
1140 { "-resol", FALSE, etINT, {&resol},
1141 "HIDDENResolution for the block averaging, block size increases with"
1142 " a factor 2^(1/resol)" },
1143 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1144 "HIDDENAlways use a single exponential fit for the error estimate" },
1145 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1146 "HIDDENAllow a negative long-time correlation" },
1147 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1148 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1149 { "-filter", FALSE, etREAL, {&filtlen},
1150 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1151 { "-power", FALSE, etBOOL, {&bPower},
1152 "Fit data to: b t^a" },
1153 { "-subav", FALSE, etBOOL, {&bSubAv},
1154 "Subtract the average before autocorrelating" },
1155 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1156 "Calculate one ACF over all sets" },
1157 { "-nbalexp", FALSE, etINT, {&nBalExp},
1158 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1159 { "-baltime", FALSE, etREAL, {&balTime},
1160 "HIDDENTime up to which the ballistic component will be fitted" },
1161 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1162 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1163 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1164 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1165 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1166 /* "What type of gminate recombination to use"}, */
1167 /* { "-D", FALSE, etREAL, {&diffusion}, */
1168 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1170 #define NPA asize(pa)
1172 FILE *out, *out_fit;
1173 int n, nlast, s, nset, i, j = 0;
1174 real **val, *t, dt, tot, error;
1175 double *av, *sig, cum1, cum2, cum3, cum4, db;
1176 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1180 { efXVG, "-f", "graph", ffREAD },
1181 { efXVG, "-ac", "autocorr", ffOPTWR },
1182 { efXVG, "-msd", "msd", ffOPTWR },
1183 { efXVG, "-cc", "coscont", ffOPTWR },
1184 { efXVG, "-dist", "distr", ffOPTWR },
1185 { efXVG, "-av", "average", ffOPTWR },
1186 { efXVG, "-ee", "errest", ffOPTWR },
1187 { efXVG, "-bal", "ballisitc", ffOPTWR },
1188 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1189 { efLOG, "-g", "fitlog", ffOPTWR }
1191 #define NFILE asize(fnm)
1197 ppa = add_acf_pargs(&npargs, pa);
1199 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1200 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1205 acfile = opt2fn_null("-ac", NFILE, fnm);
1206 msdfile = opt2fn_null("-msd", NFILE, fnm);
1207 ccfile = opt2fn_null("-cc", NFILE, fnm);
1208 distfile = opt2fn_null("-dist", NFILE, fnm);
1209 avfile = opt2fn_null("-av", NFILE, fnm);
1210 eefile = opt2fn_null("-ee", NFILE, fnm);
1211 balfile = opt2fn_null("-bal", NFILE, fnm);
1212 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1213 /* When doing autocorrelation we don't want a fitlog for fitting
1214 * the function itself (not the acf) when the user did not ask for it.
1216 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1218 fitfile = opt2fn("-g", NFILE, fnm);
1222 fitfile = opt2fn_null("-g", NFILE, fnm);
1225 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1226 opt2parg_bSet("-b", npargs, ppa), tb,
1227 opt2parg_bSet("-e", npargs, ppa), te,
1228 nsets_in, &nset, &n, &dt, &t);
1229 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1233 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1236 for (s = 0; s < nset; s++)
1238 for (i = 0; (i < n); i++)
1240 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1249 printf("Calculating the integral using the trapezium rule\n");
1253 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1254 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1258 for (s = 0; s < nset; s++)
1260 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1261 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1266 if (fitfile != NULL)
1268 out_fit = gmx_ffopen(fitfile, "w");
1269 if (bXYdy && nset >= 2)
1271 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1275 for (s = 0; s < nset; s++)
1277 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1280 gmx_ffclose(out_fit);
1283 printf(" std. dev. relative deviation of\n");
1284 printf(" standard --------- cumulants from those of\n");
1285 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1286 printf(" cum. 3 cum. 4\n");
1289 for (s = 0; (s < nset); s++)
1295 for (i = 0; (i < n); i++)
1300 for (i = 0; (i < n); i++)
1302 db = val[s][i]-cum1;
1305 cum4 += db*db*db*db;
1311 sig[s] = sqrt(cum2);
1314 error = sqrt(cum2/(n-1));
1320 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1321 s+1, av[s], sig[s], error,
1322 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1323 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1329 filter(filtlen, n, nset, val, dt);
1334 out = xvgropen(msdfile, "Mean square displacement",
1335 "time", "MSD (nm\\S2\\N)", oenv);
1336 nlast = (int)(n*frac);
1337 for (s = 0; s < nset; s++)
1339 for (j = 0; j <= nlast; j++)
1343 fprintf(stderr, "\r%d", j);
1346 for (i = 0; i < n-j; i++)
1348 tot += sqr(val[s][i]-val[s][i+j]);
1351 fprintf(out, " %g %8g\n", dt*j, tot);
1355 fprintf(out, "&\n");
1359 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1363 plot_coscont(ccfile, n, nset, val, oenv);
1368 histogram(distfile, binwidth, n, nset, val, oenv);
1372 average(avfile, nenum(avbar_opt), n, nset, val, t);
1376 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1377 bEeFitAc, bEESEF, bEENLC, oenv);
1381 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1384 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1385 /* nFitPoints, fit_start, fit_end, oenv); */
1388 power_fit(n, nset, val, t);
1395 for (s = 0; s < nset; s++)
1397 for (i = 0; i < n; i++)
1403 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1404 eacNormal, bAverCorr);
1409 regression_analysis(n, bXYdy, t, nset, val);
1414 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1417 view_all(oenv, NFILE, fnm);