2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/geminate.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/legacyheaders/copyrite.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/readinp.h"
54 #include "gromacs/legacyheaders/txtdump.h"
55 #include "gromacs/legacyheaders/typedefs.h"
56 #include "gromacs/legacyheaders/viewit.h"
57 #include "gromacs/linearalgebra/matrix.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/statistics/statistics.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
96 for (s = 0; s < nset; s++)
99 for (i = 0; i < n && val[s][i] >= 0; i++)
101 y[i] = log(val[s][i]);
105 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
107 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
108 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
109 s+1, quality, a, exp(b));
116 static real cosine_content(int nhp, int n, real *y)
117 /* Assumes n equidistant points */
119 double fac, cosyint, yyint;
127 fac = M_PI*nhp/(n-1);
131 for (i = 0; i < n; i++)
133 cosyint += cos(fac*i)*y[i];
137 return 2*cosyint*cosyint/(n*yyint);
140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
141 const output_env_t oenv)
147 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
150 for (s = 0; s < nset; s++)
152 cc = cosine_content(s+1, n, val[s]);
153 fprintf(fp, " %d %g\n", s+1, cc);
154 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
157 fprintf(stdout, "\n");
162 static void regression_analysis(int n, gmx_bool bXYdy,
163 real *x, int nset, real **val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s",
179 gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s",
187 gmx_stats_message(ok));
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100*r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset-1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j-1][i] = val[j][i];
226 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset-1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset-1); i++)
242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
243 const output_env_t oenv)
253 for (s = 0; s < nset; s++)
255 for (i = 0; i < n; i++)
261 else if (val[s][i] > max)
268 min = binwidth*floor(min/binwidth);
269 max = binwidth*ceil(max/binwidth);
276 nbin = (int)((max - min)/binwidth + 0.5) + 1;
277 fprintf(stderr, "Making distributions with %d bins\n", nbin);
279 fp = xvgropen(distfile, "Distribution", "", "", oenv);
280 for (s = 0; s < nset; s++)
282 for (i = 0; i < nbin; i++)
286 for (i = 0; i < n; i++)
288 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
290 for (i = 0; i < nbin; i++)
292 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
296 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
302 static int real_comp(const void *a, const void *b)
304 real dif = *(real *)a - *(real *)b;
320 static void average(const char *avfile, int avbar_opt,
321 int n, int nset, real **val, real *t)
328 fp = gmx_ffopen(avfile, "w");
329 if ((avbar_opt == avbarERROR) && (nset == 1))
331 avbar_opt = avbarNONE;
333 if (avbar_opt != avbarNONE)
335 if (avbar_opt == avbar90)
338 fprintf(fp, "@TYPE xydydy\n");
339 edge = (int)(nset*0.05+0.5);
340 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
341 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
345 fprintf(fp, "@TYPE xydy\n");
349 for (i = 0; i < n; i++)
352 for (s = 0; s < nset; s++)
357 fprintf(fp, " %g %g", t[i], av);
359 if (avbar_opt != avbarNONE)
361 if (avbar_opt == avbar90)
363 for (s = 0; s < nset; s++)
367 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
368 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
372 for (s = 0; s < nset; s++)
374 var += sqr(val[s][i]-av);
376 if (avbar_opt == avbarSTDDEV)
378 err = sqrt(var/nset);
382 err = sqrt(var/(nset*(nset-1)));
384 fprintf(fp, " %g", err);
391 if (avbar_opt == avbar90)
397 static real anal_ee_inf(double *parm, real T)
399 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
402 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
403 int nset, double *av, double *sig, real **val, real dt,
404 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
405 const output_env_t oenv)
408 int bs, prev_bs, nbs, nb;
413 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
415 real ee, a, tau1, tau2;
419 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
424 fp = xvgropen(eefile, "Error estimates",
425 "Block size (time)", "Error estimate", oenv);
426 if (output_env_get_print_xvgr_codes(oenv))
429 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
433 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
436 spacing = pow(2, 1.0/resol);
440 for (s = 0; s < nset; s++)
452 for (i = 0; i < nb; i++)
455 for (j = 0; j < bs; j++)
457 blav += val[s][bs*i+j];
459 var += sqr(av[s] - blav/bs);
468 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
485 for (i = 0; i < nbs/2; i++)
488 tbs[i] = tbs[nbs-1-i];
491 ybs[i] = ybs[nbs-1-i];
494 /* The initial slope of the normalized ybs^2 is 1.
495 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
496 * From this we take our initial guess for tau1.
505 while (i < nbs - 1 &&
506 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
510 fprintf(stdout, "Data set %d has strange time correlations:\n"
511 "the std. error using single points is larger than that of blocks of 2 points\n"
512 "The error estimate might be inaccurate, check the fit\n",
514 /* Use the total time as tau for the fitting weights */
515 tau_sig = (n - 1)*dt;
524 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
527 /* Generate more or less appropriate sigma's,
528 * also taking the density of points into account.
530 for (i = 0; i < nbs; i++)
534 dens = tbs[1]/tbs[0] - 1;
538 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
542 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
544 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
549 fitparm[0] = tau1_est;
551 /* We set the initial guess for tau2
552 * to halfway between tau1_est and the total time (on log scale).
554 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
555 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
556 bDebugMode(), effnERREST, fitparm, 0);
557 fitparm[3] = 1-fitparm[1];
559 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
560 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
564 if (fitparm[2] > (n-1)*dt)
567 "Warning: tau2 is longer than the length of the data (%g)\n"
568 " the statistics might be bad\n",
573 fprintf(stdout, "a fitted parameter is negative\n");
575 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
576 sig[s]*anal_ee_inf(fitparm, n*dt),
577 fitparm[1], fitparm[0], fitparm[2]);
578 /* Do a fit with tau2 fixed at the total time.
579 * One could also choose any other large value for tau2.
581 fitparm[0] = tau1_est;
583 fitparm[2] = (n-1)*dt;
584 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
585 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
586 effnERREST, fitparm, 4);
587 fitparm[3] = 1-fitparm[1];
589 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
590 || (fitparm[1] > 1 && !bAllowNegLTCorr))
594 fprintf(stdout, "a fitted parameter is negative\n");
595 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
596 sig[s]*anal_ee_inf(fitparm, n*dt),
597 fitparm[1], fitparm[0], fitparm[2]);
599 /* Do a single exponential fit */
600 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
601 fitparm[0] = tau1_est;
604 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
605 effnERREST, fitparm, 6);
606 fitparm[3] = 1-fitparm[1];
609 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
614 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
615 s+1, ee, a, tau1, tau2);
616 if (output_env_get_xvg_format(oenv) == exvgXMGR)
618 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
619 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
621 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
623 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
624 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
626 for (i = 0; i < nbs; i++)
628 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
629 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
639 for (i = 0; i < n; i++)
641 ac[i] = val[s][i] - av[s];
651 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
652 dt, eacNormal, 1, FALSE, TRUE,
653 FALSE, 0, 0, effnNONE);
657 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
659 for (i = 1; i <= fitlen/2; i++)
665 /* Generate more or less appropriate sigma's */
666 for (i = 0; i <= fitlen; i++)
668 fitsig[i] = sqrt(acint + dt*i);
671 ac_fit[0] = 0.5*acint;
673 ac_fit[2] = 10*acint;
674 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
675 bDebugMode(), effnEXP3, ac_fit, 0);
676 ac_fit[3] = 1 - ac_fit[1];
678 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
679 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
680 ac_fit[1], ac_fit[0], ac_fit[2]);
682 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
683 for (i = 0; i < nbs; i++)
685 fprintf(fp, "%g %g\n", tbs[i],
686 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
693 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
702 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
703 gmx_bool bError, real fit_start, real smooth_tail_start,
704 const output_env_t oenv)
706 const real tol = 1e-8;
711 please_cite(stdout, "Spoel2006b");
713 /* Compute negative derivative k(t) = -dc(t)/dt */
717 compute_derivative(nn, time, val[0], kt);
718 for (j = 0; (j < nn); j++)
725 for (j = 0; (j < nn); j++)
727 d2 += sqr(kt[j] - val[3][j]);
729 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
731 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
732 temp, smooth_tail_start, oenv);
737 analyse_corr(nn, time, val[0], val[2], val[4],
738 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
742 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
743 printf("Not doing anything. Sorry.\n");
747 static void filter(real flen, int n, int nset, real **val, real dt)
750 double *filt, sum, vf, fluc, fluctot;
752 f = (int)(flen/(2*dt));
756 for (i = 1; i <= f; i++)
758 filt[i] = cos(M_PI*dt*i/flen);
761 for (i = 0; i <= f; i++)
765 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
766 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
768 for (s = 0; s < nset; s++)
771 for (i = f; i < n-f; i++)
773 vf = filt[0]*val[s][i];
774 for (j = 1; j <= f; j++)
776 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
778 fluc += sqr(val[s][i] - vf);
782 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
784 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
785 fprintf(stdout, "\n");
790 static void do_fit(FILE *out, int n, gmx_bool bYdy,
791 int ny, real *x0, real **val,
792 int npargs, t_pargs *ppa, const output_env_t oenv)
794 real *c1 = NULL, *sig = NULL;
796 real tendfit, tbeginfit;
797 int i, efitfn, nparm;
799 efitfn = get_acffitfn();
800 nparm = effnNparams(efitfn);
801 fprintf(out, "Will fit to the following function:\n");
802 fprintf(out, "%s\n", effnDescription(efitfn));
808 fprintf(out, "Using two columns as y and sigma values\n");
814 if (opt2parg_bSet("-beginfit", npargs, ppa))
816 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
822 if (opt2parg_bSet("-endfit", npargs, ppa))
824 tendfit = opt2parg_real("-endfit", npargs, ppa);
831 snew(fitparm, nparm);
843 fitparm[1] = 0.5*c1[0];
847 fitparm[0] = fitparm[2] = 0.5*c1[0];
853 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
860 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
868 fprintf(out, "Warning: don't know how to initialize the parameters\n");
869 for (i = 0; (i < nparm); i++)
874 fprintf(out, "Starting parameters:\n");
875 for (i = 0; (i < nparm); i++)
877 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
879 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
880 oenv, bDebugMode(), efitfn, fitparm, 0))
882 for (i = 0; (i < nparm); i++)
884 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
889 fprintf(out, "No solution was found\n");
893 static void do_ballistic(const char *balFile, int nData,
894 real *t, real **val, int nSet,
895 real balTime, int nBalExp,
896 const output_env_t oenv)
898 double **ctd = NULL, *td = NULL;
899 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
900 static char *leg[] = {"Ac'(t)"};
904 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
909 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
910 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
912 for (set = 0; set < nSet; set++)
914 snew(ctd[set], nData);
915 for (i = 0; i < nData; i++)
917 ctd[set][i] = (double)val[set][i];
920 td[i] = (double)t[i];
924 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
927 for (i = 0; i < nData; i++)
929 fprintf(fp, " %g", t[i]);
930 for (set = 0; set < nSet; set++)
932 fprintf(fp, " %g", ctd[set][i]);
938 for (set = 0; set < nSet; set++)
948 printf("Number of data points is less than the number of parameters to fit\n."
949 "The system is underdetermined, hence no ballistic term can be found.\n\n");
953 static void do_geminate(const char *gemFile, int nData,
954 real *t, real **val, int nSet,
955 const real D, const real rcut, const real balTime,
956 const int nFitPoints, const real begFit, const real endFit,
957 const output_env_t oenv)
959 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
960 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
961 begFit, endFit, balTime, 1);
962 const char *leg[] = {"Ac\\sgem\\N(t)"};
970 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
971 xvgr_legend(fp, asize(leg), leg, oenv);
973 for (set = 0; set < nSet; set++)
975 snew(ctd[set], nData);
976 snew(ctdGem[set], nData);
977 for (i = 0; i < nData; i++)
979 ctd[set][i] = (double)val[set][i];
982 td[i] = (double)t[i];
985 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
988 for (i = 0; i < nData; i++)
990 fprintf(fp, " %g", t[i]);
991 for (set = 0; set < nSet; set++)
993 fprintf(fp, " %g", ctdGem[set][i]);
998 for (set = 0; set < nSet; set++)
1009 int gmx_analyze(int argc, char *argv[])
1011 static const char *desc[] = {
1012 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1013 "A line in the input file may start with a time",
1014 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1015 "Multiple sets can also be",
1016 "read when they are separated by & (option [TT]-n[tt]);",
1017 "in this case only one [IT]y[it]-value is read from each line.",
1018 "All lines starting with # and @ are skipped.",
1019 "All analyses can also be done for the derivative of a set",
1020 "(option [TT]-d[tt]).[PAR]",
1022 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1023 "points are equidistant in time.[PAR]",
1025 "[THISMODULE] always shows the average and standard deviation of each",
1026 "set, as well as the relative deviation of the third",
1027 "and fourth cumulant from those of a Gaussian distribution with the same",
1028 "standard deviation.[PAR]",
1030 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1031 "Be sure that the time interval between data points is",
1032 "much shorter than the time scale of the autocorrelation.[PAR]",
1034 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1035 "i/2 periods. The formula is::",
1037 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
1039 "This is useful for principal components obtained from covariance",
1040 "analysis, since the principal components of random diffusion are",
1041 "pure cosines.[PAR]",
1043 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1045 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1047 "Option [TT]-av[tt] produces the average over the sets.",
1048 "Error bars can be added with the option [TT]-errbar[tt].",
1049 "The errorbars can represent the standard deviation, the error",
1050 "(assuming the points are independent) or the interval containing",
1051 "90% of the points, by discarding 5% of the points at the top and",
1054 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1055 "A set is divided in a number of blocks and averages are calculated for",
1056 "each block. The error for the total average is calculated from",
1057 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1058 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1059 "These errors are plotted as a function of the block size.",
1060 "Also an analytical block average curve is plotted, assuming",
1061 "that the autocorrelation is a sum of two exponentials.",
1062 "The analytical curve for the block average is::",
1064 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1065 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1067 "where T is the total time.",
1068 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1069 "When the actual block average is very close to the analytical curve,",
1070 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1071 "The complete derivation is given in",
1072 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1074 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1075 "component from a hydrogen bond autocorrelation function by the fitting",
1076 "of a sum of exponentials, as described in e.g.",
1077 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1078 "is the one with the most negative coefficient in the exponential,",
1079 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1080 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1082 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1083 "(and optionally kD) to the hydrogen bond autocorrelation function",
1084 "according to the reversible geminate recombination model. Removal of",
1085 "the ballistic component first is strongly advised. The model is presented in",
1086 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1088 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1089 "of each set and over all sets with respect to a filtered average.",
1090 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1091 "to len/2. len is supplied with the option [TT]-filter[tt].",
1092 "This filter reduces oscillations with period len/2 and len by a factor",
1093 "of 0.79 and 0.33 respectively.[PAR]",
1095 "Option [TT]-g[tt] fits the data to the function given with option",
1096 "[TT]-fitfn[tt].[PAR]",
1098 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1099 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1100 "zero or with a negative value are ignored.[PAR]"
1102 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1103 "on output from [gmx-hbond]. The input file can be taken directly",
1104 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1106 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1107 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1108 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1109 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1110 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1111 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1113 /* must correspond to enum avbar* declared at beginning of file */
1114 static const char *avbar_opt[avbarNR+1] = {
1115 NULL, "none", "stddev", "error", "90", NULL
1119 { "-time", FALSE, etBOOL, {&bHaveT},
1120 "Expect a time in the input" },
1121 { "-b", FALSE, etREAL, {&tb},
1122 "First time to read from set" },
1123 { "-e", FALSE, etREAL, {&te},
1124 "Last time to read from set" },
1125 { "-n", FALSE, etINT, {&nsets_in},
1126 "Read this number of sets separated by &" },
1127 { "-d", FALSE, etBOOL, {&bDer},
1128 "Use the derivative" },
1129 { "-dp", FALSE, etINT, {&d},
1130 "HIDDENThe derivative is the difference over this number of points" },
1131 { "-bw", FALSE, etREAL, {&binwidth},
1132 "Binwidth for the distribution" },
1133 { "-errbar", FALSE, etENUM, {avbar_opt},
1134 "Error bars for [TT]-av[tt]" },
1135 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1136 "Integrate data function(s) numerically using trapezium rule" },
1137 { "-aver_start", FALSE, etREAL, {&aver_start},
1138 "Start averaging the integral from here" },
1139 { "-xydy", FALSE, etBOOL, {&bXYdy},
1140 "Interpret second data set as error in the y values for integrating" },
1141 { "-regression", FALSE, etBOOL, {&bRegression},
1142 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1143 { "-luzar", FALSE, etBOOL, {&bLuzar},
1144 "Do a Luzar and Chandler analysis on a correlation function and "
1145 "related as produced by [gmx-hbond]. When in addition the "
1146 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1147 "interpreted as errors in c(t) and n(t)." },
1148 { "-temp", FALSE, etREAL, {&temp},
1149 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1150 { "-fitstart", FALSE, etREAL, {&fit_start},
1151 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1152 { "-fitend", FALSE, etREAL, {&fit_end},
1153 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1154 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1155 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1156 { "-nbmin", FALSE, etINT, {&nb_min},
1157 "HIDDENMinimum number of blocks for block averaging" },
1158 { "-resol", FALSE, etINT, {&resol},
1159 "HIDDENResolution for the block averaging, block size increases with"
1160 " a factor 2^(1/resol)" },
1161 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1162 "HIDDENAlways use a single exponential fit for the error estimate" },
1163 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1164 "HIDDENAllow a negative long-time correlation" },
1165 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1166 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1167 { "-filter", FALSE, etREAL, {&filtlen},
1168 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1169 { "-power", FALSE, etBOOL, {&bPower},
1170 "Fit data to: b t^a" },
1171 { "-subav", FALSE, etBOOL, {&bSubAv},
1172 "Subtract the average before autocorrelating" },
1173 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1174 "Calculate one ACF over all sets" },
1175 { "-nbalexp", FALSE, etINT, {&nBalExp},
1176 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1177 { "-baltime", FALSE, etREAL, {&balTime},
1178 "HIDDENTime up to which the ballistic component will be fitted" },
1179 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1180 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1181 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1182 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1183 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1184 /* "What type of gminate recombination to use"}, */
1185 /* { "-D", FALSE, etREAL, {&diffusion}, */
1186 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1188 #define NPA asize(pa)
1190 FILE *out, *out_fit;
1191 int n, nlast, s, nset, i, j = 0;
1192 real **val, *t, dt, tot, error;
1193 double *av, *sig, cum1, cum2, cum3, cum4, db;
1194 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1198 { efXVG, "-f", "graph", ffREAD },
1199 { efXVG, "-ac", "autocorr", ffOPTWR },
1200 { efXVG, "-msd", "msd", ffOPTWR },
1201 { efXVG, "-cc", "coscont", ffOPTWR },
1202 { efXVG, "-dist", "distr", ffOPTWR },
1203 { efXVG, "-av", "average", ffOPTWR },
1204 { efXVG, "-ee", "errest", ffOPTWR },
1205 { efXVG, "-bal", "ballisitc", ffOPTWR },
1206 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1207 { efLOG, "-g", "fitlog", ffOPTWR }
1209 #define NFILE asize(fnm)
1215 ppa = add_acf_pargs(&npargs, pa);
1217 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1218 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1223 acfile = opt2fn_null("-ac", NFILE, fnm);
1224 msdfile = opt2fn_null("-msd", NFILE, fnm);
1225 ccfile = opt2fn_null("-cc", NFILE, fnm);
1226 distfile = opt2fn_null("-dist", NFILE, fnm);
1227 avfile = opt2fn_null("-av", NFILE, fnm);
1228 eefile = opt2fn_null("-ee", NFILE, fnm);
1229 balfile = opt2fn_null("-bal", NFILE, fnm);
1230 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1231 /* When doing autocorrelation we don't want a fitlog for fitting
1232 * the function itself (not the acf) when the user did not ask for it.
1234 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1236 fitfile = opt2fn("-g", NFILE, fnm);
1240 fitfile = opt2fn_null("-g", NFILE, fnm);
1243 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1244 opt2parg_bSet("-b", npargs, ppa), tb,
1245 opt2parg_bSet("-e", npargs, ppa), te,
1246 nsets_in, &nset, &n, &dt, &t);
1247 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1251 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1254 for (s = 0; s < nset; s++)
1256 for (i = 0; (i < n); i++)
1258 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1267 printf("Calculating the integral using the trapezium rule\n");
1271 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1272 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1276 for (s = 0; s < nset; s++)
1278 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1279 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1284 if (fitfile != NULL)
1286 out_fit = gmx_ffopen(fitfile, "w");
1287 if (bXYdy && nset >= 2)
1289 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1293 for (s = 0; s < nset; s++)
1295 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1298 gmx_ffclose(out_fit);
1301 printf(" std. dev. relative deviation of\n");
1302 printf(" standard --------- cumulants from those of\n");
1303 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1304 printf(" cum. 3 cum. 4\n");
1307 for (s = 0; (s < nset); s++)
1313 for (i = 0; (i < n); i++)
1318 for (i = 0; (i < n); i++)
1320 db = val[s][i]-cum1;
1323 cum4 += db*db*db*db;
1329 sig[s] = sqrt(cum2);
1332 error = sqrt(cum2/(n-1));
1338 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1339 s+1, av[s], sig[s], error,
1340 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1341 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1347 filter(filtlen, n, nset, val, dt);
1352 out = xvgropen(msdfile, "Mean square displacement",
1353 "time", "MSD (nm\\S2\\N)", oenv);
1354 nlast = (int)(n*frac);
1355 for (s = 0; s < nset; s++)
1357 for (j = 0; j <= nlast; j++)
1361 fprintf(stderr, "\r%d", j);
1364 for (i = 0; i < n-j; i++)
1366 tot += sqr(val[s][i]-val[s][i+j]);
1369 fprintf(out, " %g %8g\n", dt*j, tot);
1373 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1377 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1381 plot_coscont(ccfile, n, nset, val, oenv);
1386 histogram(distfile, binwidth, n, nset, val, oenv);
1390 average(avfile, nenum(avbar_opt), n, nset, val, t);
1394 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1395 bEeFitAc, bEESEF, bEENLC, oenv);
1399 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1402 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1403 /* nFitPoints, fit_start, fit_end, oenv); */
1406 power_fit(n, nset, val, t);
1413 for (s = 0; s < nset; s++)
1415 for (i = 0; i < n; i++)
1421 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1422 eacNormal, bAverCorr);
1427 regression_analysis(n, bXYdy, t, nset, val);
1432 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1435 view_all(oenv, NFILE, fnm);