1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
46 #include "gmx_fatal.h"
54 #include "gmx_statistics.h"
59 #include "gromacs/linearalgebra/matrix.h"
61 /* must correspond to char *avbar_opt[] declared in main() */
63 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
66 static void power_fit(int n, int nset, real **val, real *t)
68 real *x, *y, quality, a, b, r;
76 for (i = 0; i < n; i++)
86 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
87 for (i = 0; i < n; i++)
93 for (s = 0; s < nset; s++)
96 for (i = 0; i < n && val[s][i] >= 0; i++)
98 y[i] = log(val[s][i]);
102 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
104 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
105 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
106 s+1, quality, a, exp(b));
113 static real cosine_content(int nhp, int n, real *y)
114 /* Assumes n equidistant points */
116 double fac, cosyint, yyint;
124 fac = M_PI*nhp/(n-1);
128 for (i = 0; i < n; i++)
130 cosyint += cos(fac*i)*y[i];
134 return 2*cosyint*cosyint/(n*yyint);
137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
138 const output_env_t oenv)
144 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
147 for (s = 0; s < nset; s++)
149 cc = cosine_content(s+1, n, val[s]);
150 fprintf(fp, " %d %g\n", s+1, cc);
151 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
154 fprintf(stdout, "\n");
159 static void regression_analysis(int n, gmx_bool bXYdy,
160 real *x, int nset, real **val)
162 real S, chi2, a, b, da, db, r = 0;
165 if (bXYdy || (nset == 1))
167 printf("Fitting data to a function f(x) = ax + b\n");
168 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
169 printf("Error estimates will be given if w_i (sigma) values are given\n");
170 printf("(use option -xydy).\n\n");
173 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
175 gmx_fatal(FARGS, "Error fitting the data: %s",
176 gmx_stats_message(ok));
181 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
183 gmx_fatal(FARGS, "Error fitting the data: %s",
184 gmx_stats_message(ok));
188 printf("Chi2 = %g\n", chi2);
189 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
190 printf("Correlation coefficient = %.1f%%\n", 100*r);
194 printf("a = %g +/- %g\n", a, da);
195 printf("b = %g +/- %g\n", b, db);
199 printf("a = %g\n", a);
200 printf("b = %g\n", b);
205 double chi2, *a, **xx, *y;
210 for (j = 0; (j < nset-1); j++)
214 for (i = 0; (i < n); i++)
217 for (j = 1; (j < nset); j++)
219 xx[j-1][i] = val[j][i];
223 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
224 printf("Fitting %d data points in %d sets\n", n, nset-1);
225 printf("chi2 = %g\n", chi2);
227 for (i = 0; (i < nset-1); i++)
239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
240 const output_env_t oenv)
246 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
247 long long int *histo;
254 for (s = 0; s < nset; s++)
256 for (i = 0; i < n; i++)
262 else if (val[s][i] > max)
269 min = binwidth*floor(min/binwidth);
270 max = binwidth*ceil(max/binwidth);
277 nbin = (int)((max - min)/binwidth + 0.5) + 1;
278 fprintf(stderr, "Making distributions with %d bins\n", nbin);
280 fp = xvgropen(distfile, "Distribution", "", "", oenv);
281 for (s = 0; s < nset; s++)
283 for (i = 0; i < nbin; i++)
287 for (i = 0; i < n; i++)
289 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
291 for (i = 0; i < nbin; i++)
293 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
303 static int real_comp(const void *a, const void *b)
305 real dif = *(real *)a - *(real *)b;
321 static void average(const char *avfile, int avbar_opt,
322 int n, int nset, real **val, real *t)
329 fp = ffopen(avfile, "w");
330 if ((avbar_opt == avbarERROR) && (nset == 1))
332 avbar_opt = avbarNONE;
334 if (avbar_opt != avbarNONE)
336 if (avbar_opt == avbar90)
339 fprintf(fp, "@TYPE xydydy\n");
340 edge = (int)(nset*0.05+0.5);
341 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
342 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
346 fprintf(fp, "@TYPE xydy\n");
350 for (i = 0; i < n; i++)
353 for (s = 0; s < nset; s++)
358 fprintf(fp, " %g %g", t[i], av);
360 if (avbar_opt != avbarNONE)
362 if (avbar_opt == avbar90)
364 for (s = 0; s < nset; s++)
368 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
369 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
373 for (s = 0; s < nset; s++)
375 var += sqr(val[s][i]-av);
377 if (avbar_opt == avbarSTDDEV)
379 err = sqrt(var/nset);
383 err = sqrt(var/(nset*(nset-1)));
385 fprintf(fp, " %g", err);
392 if (avbar_opt == avbar90)
398 static real anal_ee_inf(real *parm, real T)
400 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
403 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
404 int nset, double *av, double *sig, real **val, real dt,
405 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
406 const output_env_t oenv)
409 int bs, prev_bs, nbs, nb;
414 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
416 real ee, a, tau1, tau2;
420 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
425 fp = xvgropen(eefile, "Error estimates",
426 "Block size (time)", "Error estimate", oenv);
427 if (output_env_get_print_xvgr_codes(oenv))
430 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
434 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
437 spacing = pow(2, 1.0/resol);
441 for (s = 0; s < nset; s++)
453 for (i = 0; i < nb; i++)
456 for (j = 0; j < bs; j++)
458 blav += val[s][bs*i+j];
460 var += sqr(av[s] - blav/bs);
469 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
486 for (i = 0; i < nbs/2; i++)
489 tbs[i] = tbs[nbs-1-i];
492 ybs[i] = ybs[nbs-1-i];
495 /* The initial slope of the normalized ybs^2 is 1.
496 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
497 * From this we take our initial guess for tau1.
506 while (i < nbs - 1 &&
507 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
511 fprintf(stdout, "Data set %d has strange time correlations:\n"
512 "the std. error using single points is larger than that of blocks of 2 points\n"
513 "The error estimate might be inaccurate, check the fit\n",
515 /* Use the total time as tau for the fitting weights */
516 tau_sig = (n - 1)*dt;
525 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
528 /* Generate more or less appropriate sigma's,
529 * also taking the density of points into account.
531 for (i = 0; i < nbs; i++)
535 dens = tbs[1]/tbs[0] - 1;
539 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
543 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
545 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
550 fitparm[0] = tau1_est;
552 /* We set the initial guess for tau2
553 * to halfway between tau1_est and the total time (on log scale).
555 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
556 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
557 bDebugMode(), effnERREST, fitparm, 0);
558 fitparm[3] = 1-fitparm[1];
560 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
561 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
565 if (fitparm[2] > (n-1)*dt)
568 "Warning: tau2 is longer than the length of the data (%g)\n"
569 " the statistics might be bad\n",
574 fprintf(stdout, "a fitted parameter is negative\n");
576 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
577 sig[s]*anal_ee_inf(fitparm, n*dt),
578 fitparm[1], fitparm[0], fitparm[2]);
579 /* Do a fit with tau2 fixed at the total time.
580 * One could also choose any other large value for tau2.
582 fitparm[0] = tau1_est;
584 fitparm[2] = (n-1)*dt;
585 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
586 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
587 effnERREST, fitparm, 4);
588 fitparm[3] = 1-fitparm[1];
590 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
591 || (fitparm[1] > 1 && !bAllowNegLTCorr))
595 fprintf(stdout, "a fitted parameter is negative\n");
596 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
597 sig[s]*anal_ee_inf(fitparm, n*dt),
598 fitparm[1], fitparm[0], fitparm[2]);
600 /* Do a single exponential fit */
601 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
602 fitparm[0] = tau1_est;
605 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
606 effnERREST, fitparm, 6);
607 fitparm[3] = 1-fitparm[1];
610 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
615 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
616 s+1, ee, a, tau1, tau2);
617 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
618 fprintf(fp, "@ legend string %d \"ee %6g\"\n",
619 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
620 for (i = 0; i < nbs; i++)
622 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
623 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
629 real *ac, acint, ac_fit[4];
632 for (i = 0; i < n; i++)
634 ac[i] = val[s][i] - av[s];
644 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
645 dt, eacNormal, 1, FALSE, TRUE,
646 FALSE, 0, 0, effnNONE);
650 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
652 for (i = 1; i <= fitlen/2; i++)
658 /* Generate more or less appropriate sigma's */
659 for (i = 0; i <= fitlen; i++)
661 fitsig[i] = sqrt(acint + dt*i);
664 ac_fit[0] = 0.5*acint;
666 ac_fit[2] = 10*acint;
667 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
668 bDebugMode(), effnEXP3, ac_fit, 0);
669 ac_fit[3] = 1 - ac_fit[1];
671 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
672 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
673 ac_fit[1], ac_fit[0], ac_fit[2]);
676 for (i = 0; i < nbs; i++)
678 fprintf(fp, "%g %g\n", tbs[i],
679 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
695 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
696 gmx_bool bError, real fit_start, real smooth_tail_start,
697 const output_env_t oenv)
699 const real tol = 1e-8;
704 please_cite(stdout, "Spoel2006b");
706 /* Compute negative derivative k(t) = -dc(t)/dt */
710 compute_derivative(nn, time, val[0], kt);
711 for (j = 0; (j < nn); j++)
718 for (j = 0; (j < nn); j++)
720 d2 += sqr(kt[j] - val[3][j]);
722 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
724 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
725 temp, smooth_tail_start, oenv);
730 analyse_corr(nn, time, val[0], val[2], val[4],
731 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
735 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
736 printf("Not doing anything. Sorry.\n");
740 static void filter(real flen, int n, int nset, real **val, real dt)
743 double *filt, sum, vf, fluc, fluctot;
745 f = (int)(flen/(2*dt));
749 for (i = 1; i <= f; i++)
751 filt[i] = cos(M_PI*dt*i/flen);
754 for (i = 0; i <= f; i++)
758 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
759 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
761 for (s = 0; s < nset; s++)
764 for (i = f; i < n-f; i++)
766 vf = filt[0]*val[s][i];
767 for (j = 1; j <= f; j++)
769 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
771 fluc += sqr(val[s][i] - vf);
775 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
777 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
778 fprintf(stdout, "\n");
783 static void do_fit(FILE *out, int n, gmx_bool bYdy,
784 int ny, real *x0, real **val,
785 int npargs, t_pargs *ppa, const output_env_t oenv)
787 real *c1 = NULL, *sig = NULL, *fitparm;
788 real tendfit, tbeginfit;
789 int i, efitfn, nparm;
791 efitfn = get_acffitfn();
792 nparm = nfp_ffn[efitfn];
793 fprintf(out, "Will fit to the following function:\n");
794 fprintf(out, "%s\n", longs_ffn[efitfn]);
800 fprintf(out, "Using two columns as y and sigma values\n");
806 if (opt2parg_bSet("-beginfit", npargs, ppa))
808 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
814 if (opt2parg_bSet("-endfit", npargs, ppa))
816 tendfit = opt2parg_real("-endfit", npargs, ppa);
823 snew(fitparm, nparm);
835 fitparm[1] = 0.5*c1[0];
839 fitparm[0] = fitparm[2] = 0.5*c1[0];
845 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
852 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
860 fprintf(out, "Warning: don't know how to initialize the parameters\n");
861 for (i = 0; (i < nparm); i++)
866 fprintf(out, "Starting parameters:\n");
867 for (i = 0; (i < nparm); i++)
869 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
871 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
872 oenv, bDebugMode(), efitfn, fitparm, 0))
874 for (i = 0; (i < nparm); i++)
876 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
881 fprintf(out, "No solution was found\n");
885 static void do_ballistic(const char *balFile, int nData,
886 real *t, real **val, int nSet,
887 real balTime, int nBalExp,
888 const output_env_t oenv)
890 double **ctd = NULL, *td = NULL;
891 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
892 static char *leg[] = {"Ac'(t)"};
896 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
901 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
902 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
904 for (set = 0; set < nSet; set++)
906 snew(ctd[set], nData);
907 for (i = 0; i < nData; i++)
909 ctd[set][i] = (double)val[set][i];
912 td[i] = (double)t[i];
916 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
919 for (i = 0; i < nData; i++)
921 fprintf(fp, " %g", t[i]);
922 for (set = 0; set < nSet; set++)
924 fprintf(fp, " %g", ctd[set][i]);
930 for (set = 0; set < nSet; set++)
939 printf("Number of data points is less than the number of parameters to fit\n."
940 "The system is underdetermined, hence no ballistic term can be found.\n\n");
944 static void do_geminate(const char *gemFile, int nData,
945 real *t, real **val, int nSet,
946 const real D, const real rcut, const real balTime,
947 const int nFitPoints, const real begFit, const real endFit,
948 const output_env_t oenv)
950 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
951 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
952 begFit, endFit, balTime, 1);
953 const char *leg[] = {"Ac\\sgem\\N(t)"};
961 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
962 xvgr_legend(fp, asize(leg), leg, oenv);
964 for (set = 0; set < nSet; set++)
966 snew(ctd[set], nData);
967 snew(ctdGem[set], nData);
968 for (i = 0; i < nData; i++)
970 ctd[set][i] = (double)val[set][i];
973 td[i] = (double)t[i];
976 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
979 for (i = 0; i < nData; i++)
981 fprintf(fp, " %g", t[i]);
982 for (set = 0; set < nSet; set++)
984 fprintf(fp, " %g", ctdGem[set][i]);
989 for (set = 0; set < nSet; set++)
999 int gmx_analyze(int argc, char *argv[])
1001 static const char *desc[] = {
1002 "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
1003 "A line in the input file may start with a time",
1004 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1005 "Multiple sets can also be",
1006 "read when they are separated by & (option [TT]-n[tt]);",
1007 "in this case only one [IT]y[it]-value is read from each line.",
1008 "All lines starting with # and @ are skipped.",
1009 "All analyses can also be done for the derivative of a set",
1010 "(option [TT]-d[tt]).[PAR]",
1012 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1013 "points are equidistant in time.[PAR]",
1015 "[TT]g_analyze[tt] always shows the average and standard deviation of each",
1016 "set, as well as the relative deviation of the third",
1017 "and fourth cumulant from those of a Gaussian distribution with the same",
1018 "standard deviation.[PAR]",
1020 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1021 "Be sure that the time interval between data points is",
1022 "much shorter than the time scale of the autocorrelation.[PAR]",
1024 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1025 "i/2 periods. The formula is:[BR]"
1026 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1027 "This is useful for principal components obtained from covariance",
1028 "analysis, since the principal components of random diffusion are",
1029 "pure cosines.[PAR]",
1031 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1033 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1035 "Option [TT]-av[tt] produces the average over the sets.",
1036 "Error bars can be added with the option [TT]-errbar[tt].",
1037 "The errorbars can represent the standard deviation, the error",
1038 "(assuming the points are independent) or the interval containing",
1039 "90% of the points, by discarding 5% of the points at the top and",
1042 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1043 "A set is divided in a number of blocks and averages are calculated for",
1044 "each block. The error for the total average is calculated from",
1045 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1046 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1047 "These errors are plotted as a function of the block size.",
1048 "Also an analytical block average curve is plotted, assuming",
1049 "that the autocorrelation is a sum of two exponentials.",
1050 "The analytical curve for the block average is:[BR]",
1051 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1052 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1053 "where T is the total time.",
1054 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1055 "When the actual block average is very close to the analytical curve,",
1056 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1057 "The complete derivation is given in",
1058 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1060 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1061 "component from a hydrogen bond autocorrelation function by the fitting",
1062 "of a sum of exponentials, as described in e.g.",
1063 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1064 "is the one with the most negative coefficient in the exponential,",
1065 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1066 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1068 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1069 "(and optionally kD) to the hydrogen bond autocorrelation function",
1070 "according to the reversible geminate recombination model. Removal of",
1071 "the ballistic component first is strongly advised. The model is presented in",
1072 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1074 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1075 "of each set and over all sets with respect to a filtered average.",
1076 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1077 "to len/2. len is supplied with the option [TT]-filter[tt].",
1078 "This filter reduces oscillations with period len/2 and len by a factor",
1079 "of 0.79 and 0.33 respectively.[PAR]",
1081 "Option [TT]-g[tt] fits the data to the function given with option",
1082 "[TT]-fitfn[tt].[PAR]",
1084 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1085 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1086 "zero or with a negative value are ignored.[PAR]"
1088 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1089 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
1090 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
1092 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1093 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1094 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1095 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1096 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1097 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1099 /* must correspond to enum avbar* declared at beginning of file */
1100 static const char *avbar_opt[avbarNR+1] = {
1101 NULL, "none", "stddev", "error", "90", NULL
1105 { "-time", FALSE, etBOOL, {&bHaveT},
1106 "Expect a time in the input" },
1107 { "-b", FALSE, etREAL, {&tb},
1108 "First time to read from set" },
1109 { "-e", FALSE, etREAL, {&te},
1110 "Last time to read from set" },
1111 { "-n", FALSE, etINT, {&nsets_in},
1112 "Read this number of sets separated by &" },
1113 { "-d", FALSE, etBOOL, {&bDer},
1114 "Use the derivative" },
1115 { "-dp", FALSE, etINT, {&d},
1116 "HIDDENThe derivative is the difference over this number of points" },
1117 { "-bw", FALSE, etREAL, {&binwidth},
1118 "Binwidth for the distribution" },
1119 { "-errbar", FALSE, etENUM, {avbar_opt},
1120 "Error bars for [TT]-av[tt]" },
1121 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1122 "Integrate data function(s) numerically using trapezium rule" },
1123 { "-aver_start", FALSE, etREAL, {&aver_start},
1124 "Start averaging the integral from here" },
1125 { "-xydy", FALSE, etBOOL, {&bXYdy},
1126 "Interpret second data set as error in the y values for integrating" },
1127 { "-regression", FALSE, etBOOL, {&bRegression},
1128 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1129 { "-luzar", FALSE, etBOOL, {&bLuzar},
1130 "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1131 { "-temp", FALSE, etREAL, {&temp},
1132 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1133 { "-fitstart", FALSE, etREAL, {&fit_start},
1134 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1135 { "-fitend", FALSE, etREAL, {&fit_end},
1136 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1137 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1138 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1139 { "-nbmin", FALSE, etINT, {&nb_min},
1140 "HIDDENMinimum number of blocks for block averaging" },
1141 { "-resol", FALSE, etINT, {&resol},
1142 "HIDDENResolution for the block averaging, block size increases with"
1143 " a factor 2^(1/resol)" },
1144 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1145 "HIDDENAlways use a single exponential fit for the error estimate" },
1146 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1147 "HIDDENAllow a negative long-time correlation" },
1148 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1149 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1150 { "-filter", FALSE, etREAL, {&filtlen},
1151 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1152 { "-power", FALSE, etBOOL, {&bPower},
1153 "Fit data to: b t^a" },
1154 { "-subav", FALSE, etBOOL, {&bSubAv},
1155 "Subtract the average before autocorrelating" },
1156 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1157 "Calculate one ACF over all sets" },
1158 { "-nbalexp", FALSE, etINT, {&nBalExp},
1159 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1160 { "-baltime", FALSE, etREAL, {&balTime},
1161 "HIDDENTime up to which the ballistic component will be fitted" },
1162 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1163 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1164 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1165 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1166 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1167 /* "What type of gminate recombination to use"}, */
1168 /* { "-D", FALSE, etREAL, {&diffusion}, */
1169 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1171 #define NPA asize(pa)
1173 FILE *out, *out_fit;
1174 int n, nlast, s, nset, i, j = 0;
1175 real **val, *t, dt, tot, error;
1176 double *av, *sig, cum1, cum2, cum3, cum4, db;
1177 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1181 { efXVG, "-f", "graph", ffREAD },
1182 { efXVG, "-ac", "autocorr", ffOPTWR },
1183 { efXVG, "-msd", "msd", ffOPTWR },
1184 { efXVG, "-cc", "coscont", ffOPTWR },
1185 { efXVG, "-dist", "distr", ffOPTWR },
1186 { efXVG, "-av", "average", ffOPTWR },
1187 { efXVG, "-ee", "errest", ffOPTWR },
1188 { efXVG, "-bal", "ballisitc", ffOPTWR },
1189 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1190 { efLOG, "-g", "fitlog", ffOPTWR }
1192 #define NFILE asize(fnm)
1198 ppa = add_acf_pargs(&npargs, pa);
1200 parse_common_args(&argc, argv, PCA_CAN_VIEW,
1201 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
1203 acfile = opt2fn_null("-ac", NFILE, fnm);
1204 msdfile = opt2fn_null("-msd", NFILE, fnm);
1205 ccfile = opt2fn_null("-cc", NFILE, fnm);
1206 distfile = opt2fn_null("-dist", NFILE, fnm);
1207 avfile = opt2fn_null("-av", NFILE, fnm);
1208 eefile = opt2fn_null("-ee", NFILE, fnm);
1209 balfile = opt2fn_null("-bal", NFILE, fnm);
1210 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1211 /* When doing autocorrelation we don't want a fitlog for fitting
1212 * the function itself (not the acf) when the user did not ask for it.
1214 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1216 fitfile = opt2fn("-g", NFILE, fnm);
1220 fitfile = opt2fn_null("-g", NFILE, fnm);
1223 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1224 opt2parg_bSet("-b", npargs, ppa), tb,
1225 opt2parg_bSet("-e", npargs, ppa), te,
1226 nsets_in, &nset, &n, &dt, &t);
1227 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1231 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1234 for (s = 0; s < nset; s++)
1236 for (i = 0; (i < n); i++)
1238 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1247 printf("Calculating the integral using the trapezium rule\n");
1251 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1252 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1256 for (s = 0; s < nset; s++)
1258 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1259 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1264 if (fitfile != NULL)
1266 out_fit = ffopen(fitfile, "w");
1267 if (bXYdy && nset >= 2)
1269 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1273 for (s = 0; s < nset; s++)
1275 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1281 printf(" std. dev. relative deviation of\n");
1282 printf(" standard --------- cumulants from those of\n");
1283 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1284 printf(" cum. 3 cum. 4\n");
1287 for (s = 0; (s < nset); s++)
1293 for (i = 0; (i < n); i++)
1298 for (i = 0; (i < n); i++)
1300 db = val[s][i]-cum1;
1303 cum4 += db*db*db*db;
1309 sig[s] = sqrt(cum2);
1312 error = sqrt(cum2/(n-1));
1318 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1319 s+1, av[s], sig[s], error,
1320 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1321 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1327 filter(filtlen, n, nset, val, dt);
1332 out = xvgropen(msdfile, "Mean square displacement",
1333 "time", "MSD (nm\\S2\\N)", oenv);
1334 nlast = (int)(n*frac);
1335 for (s = 0; s < nset; s++)
1337 for (j = 0; j <= nlast; j++)
1341 fprintf(stderr, "\r%d", j);
1344 for (i = 0; i < n-j; i++)
1346 tot += sqr(val[s][i]-val[s][i+j]);
1349 fprintf(out, " %g %8g\n", dt*j, tot);
1353 fprintf(out, "&\n");
1357 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1361 plot_coscont(ccfile, n, nset, val, oenv);
1366 histogram(distfile, binwidth, n, nset, val, oenv);
1370 average(avfile, nenum(avbar_opt), n, nset, val, t);
1374 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1375 bEeFitAc, bEESEF, bEENLC, oenv);
1379 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1382 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1383 /* nFitPoints, fit_start, fit_end, oenv); */
1386 power_fit(n, nset, val, t);
1393 for (s = 0; s < nset; s++)
1395 for (i = 0; i < n; i++)
1401 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1402 eacNormal, bAverCorr);
1407 regression_analysis(n, bXYdy, t, nset, val);
1412 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1415 view_all(oenv, NFILE, fnm);