2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/utility/futil.h"
56 #include "gromacs/statistics/statistics.h"
62 #include "gromacs/linearalgebra/matrix.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
96 for (s = 0; s < nset; s++)
99 for (i = 0; i < n && val[s][i] >= 0; i++)
101 y[i] = log(val[s][i]);
105 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
107 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
108 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
109 s+1, quality, a, exp(b));
116 static real cosine_content(int nhp, int n, real *y)
117 /* Assumes n equidistant points */
119 double fac, cosyint, yyint;
127 fac = M_PI*nhp/(n-1);
131 for (i = 0; i < n; i++)
133 cosyint += cos(fac*i)*y[i];
137 return 2*cosyint*cosyint/(n*yyint);
140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
141 const output_env_t oenv)
147 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
150 for (s = 0; s < nset; s++)
152 cc = cosine_content(s+1, n, val[s]);
153 fprintf(fp, " %d %g\n", s+1, cc);
154 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
157 fprintf(stdout, "\n");
162 static void regression_analysis(int n, gmx_bool bXYdy,
163 real *x, int nset, real **val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s",
179 gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s",
187 gmx_stats_message(ok));
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100*r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset-1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j-1][i] = val[j][i];
226 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset-1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset-1); i++)
242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
243 const output_env_t oenv)
253 for (s = 0; s < nset; s++)
255 for (i = 0; i < n; i++)
261 else if (val[s][i] > max)
268 min = binwidth*floor(min/binwidth);
269 max = binwidth*ceil(max/binwidth);
276 nbin = (int)((max - min)/binwidth + 0.5) + 1;
277 fprintf(stderr, "Making distributions with %d bins\n", nbin);
279 fp = xvgropen(distfile, "Distribution", "", "", oenv);
280 for (s = 0; s < nset; s++)
282 for (i = 0; i < nbin; i++)
286 for (i = 0; i < n; i++)
288 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
290 for (i = 0; i < nbin; i++)
292 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
302 static int real_comp(const void *a, const void *b)
304 real dif = *(real *)a - *(real *)b;
320 static void average(const char *avfile, int avbar_opt,
321 int n, int nset, real **val, real *t)
328 fp = gmx_ffopen(avfile, "w");
329 if ((avbar_opt == avbarERROR) && (nset == 1))
331 avbar_opt = avbarNONE;
333 if (avbar_opt != avbarNONE)
335 if (avbar_opt == avbar90)
338 fprintf(fp, "@TYPE xydydy\n");
339 edge = (int)(nset*0.05+0.5);
340 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
341 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
345 fprintf(fp, "@TYPE xydy\n");
349 for (i = 0; i < n; i++)
352 for (s = 0; s < nset; s++)
357 fprintf(fp, " %g %g", t[i], av);
359 if (avbar_opt != avbarNONE)
361 if (avbar_opt == avbar90)
363 for (s = 0; s < nset; s++)
367 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
368 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
372 for (s = 0; s < nset; s++)
374 var += sqr(val[s][i]-av);
376 if (avbar_opt == avbarSTDDEV)
378 err = sqrt(var/nset);
382 err = sqrt(var/(nset*(nset-1)));
384 fprintf(fp, " %g", err);
391 if (avbar_opt == avbar90)
397 static real anal_ee_inf(real *parm, real T)
399 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
402 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
403 int nset, double *av, double *sig, real **val, real dt,
404 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
405 const output_env_t oenv)
408 int bs, prev_bs, nbs, nb;
413 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
415 real ee, a, tau1, tau2;
419 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
424 fp = xvgropen(eefile, "Error estimates",
425 "Block size (time)", "Error estimate", oenv);
426 if (output_env_get_print_xvgr_codes(oenv))
429 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
433 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
436 spacing = pow(2, 1.0/resol);
440 for (s = 0; s < nset; s++)
452 for (i = 0; i < nb; i++)
455 for (j = 0; j < bs; j++)
457 blav += val[s][bs*i+j];
459 var += sqr(av[s] - blav/bs);
468 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
485 for (i = 0; i < nbs/2; i++)
488 tbs[i] = tbs[nbs-1-i];
491 ybs[i] = ybs[nbs-1-i];
494 /* The initial slope of the normalized ybs^2 is 1.
495 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
496 * From this we take our initial guess for tau1.
505 while (i < nbs - 1 &&
506 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
510 fprintf(stdout, "Data set %d has strange time correlations:\n"
511 "the std. error using single points is larger than that of blocks of 2 points\n"
512 "The error estimate might be inaccurate, check the fit\n",
514 /* Use the total time as tau for the fitting weights */
515 tau_sig = (n - 1)*dt;
524 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
527 /* Generate more or less appropriate sigma's,
528 * also taking the density of points into account.
530 for (i = 0; i < nbs; i++)
534 dens = tbs[1]/tbs[0] - 1;
538 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
542 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
544 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
549 fitparm[0] = tau1_est;
551 /* We set the initial guess for tau2
552 * to halfway between tau1_est and the total time (on log scale).
554 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
555 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
556 bDebugMode(), effnERREST, fitparm, 0);
557 fitparm[3] = 1-fitparm[1];
559 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
560 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
564 if (fitparm[2] > (n-1)*dt)
567 "Warning: tau2 is longer than the length of the data (%g)\n"
568 " the statistics might be bad\n",
573 fprintf(stdout, "a fitted parameter is negative\n");
575 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
576 sig[s]*anal_ee_inf(fitparm, n*dt),
577 fitparm[1], fitparm[0], fitparm[2]);
578 /* Do a fit with tau2 fixed at the total time.
579 * One could also choose any other large value for tau2.
581 fitparm[0] = tau1_est;
583 fitparm[2] = (n-1)*dt;
584 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
585 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
586 effnERREST, fitparm, 4);
587 fitparm[3] = 1-fitparm[1];
589 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
590 || (fitparm[1] > 1 && !bAllowNegLTCorr))
594 fprintf(stdout, "a fitted parameter is negative\n");
595 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
596 sig[s]*anal_ee_inf(fitparm, n*dt),
597 fitparm[1], fitparm[0], fitparm[2]);
599 /* Do a single exponential fit */
600 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
601 fitparm[0] = tau1_est;
604 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
605 effnERREST, fitparm, 6);
606 fitparm[3] = 1-fitparm[1];
609 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
614 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
615 s+1, ee, a, tau1, tau2);
616 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
617 fprintf(fp, "@ legend string %d \"ee %6g\"\n",
618 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
619 for (i = 0; i < nbs; i++)
621 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
622 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
628 real *ac, acint, ac_fit[4];
631 for (i = 0; i < n; i++)
633 ac[i] = val[s][i] - av[s];
643 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
644 dt, eacNormal, 1, FALSE, TRUE,
645 FALSE, 0, 0, effnNONE);
649 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
651 for (i = 1; i <= fitlen/2; i++)
657 /* Generate more or less appropriate sigma's */
658 for (i = 0; i <= fitlen; i++)
660 fitsig[i] = sqrt(acint + dt*i);
663 ac_fit[0] = 0.5*acint;
665 ac_fit[2] = 10*acint;
666 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
667 bDebugMode(), effnEXP3, ac_fit, 0);
668 ac_fit[3] = 1 - ac_fit[1];
670 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
671 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
672 ac_fit[1], ac_fit[0], ac_fit[2]);
675 for (i = 0; i < nbs; i++)
677 fprintf(fp, "%g %g\n", tbs[i],
678 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
694 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
695 gmx_bool bError, real fit_start, real smooth_tail_start,
696 const output_env_t oenv)
698 const real tol = 1e-8;
703 please_cite(stdout, "Spoel2006b");
705 /* Compute negative derivative k(t) = -dc(t)/dt */
709 compute_derivative(nn, time, val[0], kt);
710 for (j = 0; (j < nn); j++)
717 for (j = 0; (j < nn); j++)
719 d2 += sqr(kt[j] - val[3][j]);
721 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
723 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
724 temp, smooth_tail_start, oenv);
729 analyse_corr(nn, time, val[0], val[2], val[4],
730 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
734 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
735 printf("Not doing anything. Sorry.\n");
739 static void filter(real flen, int n, int nset, real **val, real dt)
742 double *filt, sum, vf, fluc, fluctot;
744 f = (int)(flen/(2*dt));
748 for (i = 1; i <= f; i++)
750 filt[i] = cos(M_PI*dt*i/flen);
753 for (i = 0; i <= f; i++)
757 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
758 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
760 for (s = 0; s < nset; s++)
763 for (i = f; i < n-f; i++)
765 vf = filt[0]*val[s][i];
766 for (j = 1; j <= f; j++)
768 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
770 fluc += sqr(val[s][i] - vf);
774 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
776 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
777 fprintf(stdout, "\n");
782 static void do_fit(FILE *out, int n, gmx_bool bYdy,
783 int ny, real *x0, real **val,
784 int npargs, t_pargs *ppa, const output_env_t oenv)
786 real *c1 = NULL, *sig = NULL, *fitparm;
787 real tendfit, tbeginfit;
788 int i, efitfn, nparm;
790 efitfn = get_acffitfn();
791 nparm = nfp_ffn[efitfn];
792 fprintf(out, "Will fit to the following function:\n");
793 fprintf(out, "%s\n", longs_ffn[efitfn]);
799 fprintf(out, "Using two columns as y and sigma values\n");
805 if (opt2parg_bSet("-beginfit", npargs, ppa))
807 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
813 if (opt2parg_bSet("-endfit", npargs, ppa))
815 tendfit = opt2parg_real("-endfit", npargs, ppa);
822 snew(fitparm, nparm);
834 fitparm[1] = 0.5*c1[0];
838 fitparm[0] = fitparm[2] = 0.5*c1[0];
844 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
851 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
859 fprintf(out, "Warning: don't know how to initialize the parameters\n");
860 for (i = 0; (i < nparm); i++)
865 fprintf(out, "Starting parameters:\n");
866 for (i = 0; (i < nparm); i++)
868 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
870 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
871 oenv, bDebugMode(), efitfn, fitparm, 0))
873 for (i = 0; (i < nparm); i++)
875 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
880 fprintf(out, "No solution was found\n");
884 static void do_ballistic(const char *balFile, int nData,
885 real *t, real **val, int nSet,
886 real balTime, int nBalExp,
887 const output_env_t oenv)
889 double **ctd = NULL, *td = NULL;
890 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
891 static char *leg[] = {"Ac'(t)"};
895 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
900 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
901 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
903 for (set = 0; set < nSet; set++)
905 snew(ctd[set], nData);
906 for (i = 0; i < nData; i++)
908 ctd[set][i] = (double)val[set][i];
911 td[i] = (double)t[i];
915 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
918 for (i = 0; i < nData; i++)
920 fprintf(fp, " %g", t[i]);
921 for (set = 0; set < nSet; set++)
923 fprintf(fp, " %g", ctd[set][i]);
929 for (set = 0; set < nSet; set++)
938 printf("Number of data points is less than the number of parameters to fit\n."
939 "The system is underdetermined, hence no ballistic term can be found.\n\n");
943 static void do_geminate(const char *gemFile, int nData,
944 real *t, real **val, int nSet,
945 const real D, const real rcut, const real balTime,
946 const int nFitPoints, const real begFit, const real endFit,
947 const output_env_t oenv)
949 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
950 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
951 begFit, endFit, balTime, 1);
952 const char *leg[] = {"Ac\\sgem\\N(t)"};
960 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
961 xvgr_legend(fp, asize(leg), leg, oenv);
963 for (set = 0; set < nSet; set++)
965 snew(ctd[set], nData);
966 snew(ctdGem[set], nData);
967 for (i = 0; i < nData; i++)
969 ctd[set][i] = (double)val[set][i];
972 td[i] = (double)t[i];
975 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
978 for (i = 0; i < nData; i++)
980 fprintf(fp, " %g", t[i]);
981 for (set = 0; set < nSet; set++)
983 fprintf(fp, " %g", ctdGem[set][i]);
988 for (set = 0; set < nSet; set++)
998 int gmx_analyze(int argc, char *argv[])
1000 static const char *desc[] = {
1001 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1002 "A line in the input file may start with a time",
1003 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1004 "Multiple sets can also be",
1005 "read when they are separated by & (option [TT]-n[tt]);",
1006 "in this case only one [IT]y[it]-value is read from each line.",
1007 "All lines starting with # and @ are skipped.",
1008 "All analyses can also be done for the derivative of a set",
1009 "(option [TT]-d[tt]).[PAR]",
1011 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1012 "points are equidistant in time.[PAR]",
1014 "[THISMODULE] always shows the average and standard deviation of each",
1015 "set, as well as the relative deviation of the third",
1016 "and fourth cumulant from those of a Gaussian distribution with the same",
1017 "standard deviation.[PAR]",
1019 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1020 "Be sure that the time interval between data points is",
1021 "much shorter than the time scale of the autocorrelation.[PAR]",
1023 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1024 "i/2 periods. The formula is:[BR]"
1025 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1026 "This is useful for principal components obtained from covariance",
1027 "analysis, since the principal components of random diffusion are",
1028 "pure cosines.[PAR]",
1030 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1032 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1034 "Option [TT]-av[tt] produces the average over the sets.",
1035 "Error bars can be added with the option [TT]-errbar[tt].",
1036 "The errorbars can represent the standard deviation, the error",
1037 "(assuming the points are independent) or the interval containing",
1038 "90% of the points, by discarding 5% of the points at the top and",
1041 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1042 "A set is divided in a number of blocks and averages are calculated for",
1043 "each block. The error for the total average is calculated from",
1044 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1045 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1046 "These errors are plotted as a function of the block size.",
1047 "Also an analytical block average curve is plotted, assuming",
1048 "that the autocorrelation is a sum of two exponentials.",
1049 "The analytical curve for the block average is:[BR]",
1050 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1051 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1052 "where T is the total time.",
1053 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1054 "When the actual block average is very close to the analytical curve,",
1055 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1056 "The complete derivation is given in",
1057 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1059 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1060 "component from a hydrogen bond autocorrelation function by the fitting",
1061 "of a sum of exponentials, as described in e.g.",
1062 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1063 "is the one with the most negative coefficient in the exponential,",
1064 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1065 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1067 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1068 "(and optionally kD) to the hydrogen bond autocorrelation function",
1069 "according to the reversible geminate recombination model. Removal of",
1070 "the ballistic component first is strongly advised. The model is presented in",
1071 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1073 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1074 "of each set and over all sets with respect to a filtered average.",
1075 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1076 "to len/2. len is supplied with the option [TT]-filter[tt].",
1077 "This filter reduces oscillations with period len/2 and len by a factor",
1078 "of 0.79 and 0.33 respectively.[PAR]",
1080 "Option [TT]-g[tt] fits the data to the function given with option",
1081 "[TT]-fitfn[tt].[PAR]",
1083 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1084 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1085 "zero or with a negative value are ignored.[PAR]"
1087 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1088 "on output from [gmx-hbond]. The input file can be taken directly",
1089 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1091 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1092 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1093 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1094 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1095 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1096 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1098 /* must correspond to enum avbar* declared at beginning of file */
1099 static const char *avbar_opt[avbarNR+1] = {
1100 NULL, "none", "stddev", "error", "90", NULL
1104 { "-time", FALSE, etBOOL, {&bHaveT},
1105 "Expect a time in the input" },
1106 { "-b", FALSE, etREAL, {&tb},
1107 "First time to read from set" },
1108 { "-e", FALSE, etREAL, {&te},
1109 "Last time to read from set" },
1110 { "-n", FALSE, etINT, {&nsets_in},
1111 "Read this number of sets separated by &" },
1112 { "-d", FALSE, etBOOL, {&bDer},
1113 "Use the derivative" },
1114 { "-dp", FALSE, etINT, {&d},
1115 "HIDDENThe derivative is the difference over this number of points" },
1116 { "-bw", FALSE, etREAL, {&binwidth},
1117 "Binwidth for the distribution" },
1118 { "-errbar", FALSE, etENUM, {avbar_opt},
1119 "Error bars for [TT]-av[tt]" },
1120 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1121 "Integrate data function(s) numerically using trapezium rule" },
1122 { "-aver_start", FALSE, etREAL, {&aver_start},
1123 "Start averaging the integral from here" },
1124 { "-xydy", FALSE, etBOOL, {&bXYdy},
1125 "Interpret second data set as error in the y values for integrating" },
1126 { "-regression", FALSE, etBOOL, {&bRegression},
1127 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1128 { "-luzar", FALSE, etBOOL, {&bLuzar},
1129 "Do a Luzar and Chandler analysis on a correlation function and "
1130 "related as produced by [gmx-hbond]. When in addition the "
1131 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1132 "interpreted as errors in c(t) and n(t)." },
1133 { "-temp", FALSE, etREAL, {&temp},
1134 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1135 { "-fitstart", FALSE, etREAL, {&fit_start},
1136 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1137 { "-fitend", FALSE, etREAL, {&fit_end},
1138 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1139 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1140 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1141 { "-nbmin", FALSE, etINT, {&nb_min},
1142 "HIDDENMinimum number of blocks for block averaging" },
1143 { "-resol", FALSE, etINT, {&resol},
1144 "HIDDENResolution for the block averaging, block size increases with"
1145 " a factor 2^(1/resol)" },
1146 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1147 "HIDDENAlways use a single exponential fit for the error estimate" },
1148 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1149 "HIDDENAllow a negative long-time correlation" },
1150 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1151 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1152 { "-filter", FALSE, etREAL, {&filtlen},
1153 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1154 { "-power", FALSE, etBOOL, {&bPower},
1155 "Fit data to: b t^a" },
1156 { "-subav", FALSE, etBOOL, {&bSubAv},
1157 "Subtract the average before autocorrelating" },
1158 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1159 "Calculate one ACF over all sets" },
1160 { "-nbalexp", FALSE, etINT, {&nBalExp},
1161 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1162 { "-baltime", FALSE, etREAL, {&balTime},
1163 "HIDDENTime up to which the ballistic component will be fitted" },
1164 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1165 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1166 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1167 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1168 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1169 /* "What type of gminate recombination to use"}, */
1170 /* { "-D", FALSE, etREAL, {&diffusion}, */
1171 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1173 #define NPA asize(pa)
1175 FILE *out, *out_fit;
1176 int n, nlast, s, nset, i, j = 0;
1177 real **val, *t, dt, tot, error;
1178 double *av, *sig, cum1, cum2, cum3, cum4, db;
1179 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1183 { efXVG, "-f", "graph", ffREAD },
1184 { efXVG, "-ac", "autocorr", ffOPTWR },
1185 { efXVG, "-msd", "msd", ffOPTWR },
1186 { efXVG, "-cc", "coscont", ffOPTWR },
1187 { efXVG, "-dist", "distr", ffOPTWR },
1188 { efXVG, "-av", "average", ffOPTWR },
1189 { efXVG, "-ee", "errest", ffOPTWR },
1190 { efXVG, "-bal", "ballisitc", ffOPTWR },
1191 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1192 { efLOG, "-g", "fitlog", ffOPTWR }
1194 #define NFILE asize(fnm)
1200 ppa = add_acf_pargs(&npargs, pa);
1202 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1203 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1208 acfile = opt2fn_null("-ac", NFILE, fnm);
1209 msdfile = opt2fn_null("-msd", NFILE, fnm);
1210 ccfile = opt2fn_null("-cc", NFILE, fnm);
1211 distfile = opt2fn_null("-dist", NFILE, fnm);
1212 avfile = opt2fn_null("-av", NFILE, fnm);
1213 eefile = opt2fn_null("-ee", NFILE, fnm);
1214 balfile = opt2fn_null("-bal", NFILE, fnm);
1215 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1216 /* When doing autocorrelation we don't want a fitlog for fitting
1217 * the function itself (not the acf) when the user did not ask for it.
1219 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1221 fitfile = opt2fn("-g", NFILE, fnm);
1225 fitfile = opt2fn_null("-g", NFILE, fnm);
1228 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1229 opt2parg_bSet("-b", npargs, ppa), tb,
1230 opt2parg_bSet("-e", npargs, ppa), te,
1231 nsets_in, &nset, &n, &dt, &t);
1232 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1236 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1239 for (s = 0; s < nset; s++)
1241 for (i = 0; (i < n); i++)
1243 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1252 printf("Calculating the integral using the trapezium rule\n");
1256 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1257 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1261 for (s = 0; s < nset; s++)
1263 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1264 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1269 if (fitfile != NULL)
1271 out_fit = gmx_ffopen(fitfile, "w");
1272 if (bXYdy && nset >= 2)
1274 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1278 for (s = 0; s < nset; s++)
1280 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1283 gmx_ffclose(out_fit);
1286 printf(" std. dev. relative deviation of\n");
1287 printf(" standard --------- cumulants from those of\n");
1288 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1289 printf(" cum. 3 cum. 4\n");
1292 for (s = 0; (s < nset); s++)
1298 for (i = 0; (i < n); i++)
1303 for (i = 0; (i < n); i++)
1305 db = val[s][i]-cum1;
1308 cum4 += db*db*db*db;
1314 sig[s] = sqrt(cum2);
1317 error = sqrt(cum2/(n-1));
1323 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1324 s+1, av[s], sig[s], error,
1325 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1326 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1332 filter(filtlen, n, nset, val, dt);
1337 out = xvgropen(msdfile, "Mean square displacement",
1338 "time", "MSD (nm\\S2\\N)", oenv);
1339 nlast = (int)(n*frac);
1340 for (s = 0; s < nset; s++)
1342 for (j = 0; j <= nlast; j++)
1346 fprintf(stderr, "\r%d", j);
1349 for (i = 0; i < n-j; i++)
1351 tot += sqr(val[s][i]-val[s][i+j]);
1354 fprintf(out, " %g %8g\n", dt*j, tot);
1358 fprintf(out, "&\n");
1362 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1366 plot_coscont(ccfile, n, nset, val, oenv);
1371 histogram(distfile, binwidth, n, nset, val, oenv);
1375 average(avfile, nenum(avbar_opt), n, nset, val, t);
1379 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1380 bEeFitAc, bEESEF, bEENLC, oenv);
1384 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1387 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1388 /* nFitPoints, fit_start, fit_end, oenv); */
1391 power_fit(n, nset, val, t);
1398 for (s = 0; s < nset; s++)
1400 for (i = 0; i < n; i++)
1406 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1407 eacNormal, bAverCorr);
1412 regression_analysis(n, bXYdy, t, nset, val);
1417 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1420 view_all(oenv, NFILE, fnm);