2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/geminate.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/legacyheaders/copyrite.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/readinp.h"
54 #include "gromacs/legacyheaders/txtdump.h"
55 #include "gromacs/legacyheaders/typedefs.h"
56 #include "gromacs/legacyheaders/viewit.h"
57 #include "gromacs/linearalgebra/matrix.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/statistics/statistics.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
96 for (s = 0; s < nset; s++)
99 for (i = 0; i < n && val[s][i] >= 0; i++)
101 y[i] = log(val[s][i]);
105 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
107 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
108 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
109 s+1, quality, a, exp(b));
116 static real cosine_content(int nhp, int n, real *y)
117 /* Assumes n equidistant points */
119 double fac, cosyint, yyint;
127 fac = M_PI*nhp/(n-1);
131 for (i = 0; i < n; i++)
133 cosyint += cos(fac*i)*y[i];
137 return 2*cosyint*cosyint/(n*yyint);
140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
141 const output_env_t oenv)
147 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
150 for (s = 0; s < nset; s++)
152 cc = cosine_content(s+1, n, val[s]);
153 fprintf(fp, " %d %g\n", s+1, cc);
154 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
157 fprintf(stdout, "\n");
162 static void regression_analysis(int n, gmx_bool bXYdy,
163 real *x, int nset, real **val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s",
179 gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s",
187 gmx_stats_message(ok));
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100*r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset-1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j-1][i] = val[j][i];
226 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset-1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset-1); i++)
242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
243 const output_env_t oenv)
253 for (s = 0; s < nset; s++)
255 for (i = 0; i < n; i++)
261 else if (val[s][i] > max)
268 min = binwidth*floor(min/binwidth);
269 max = binwidth*ceil(max/binwidth);
276 nbin = (int)((max - min)/binwidth + 0.5) + 1;
277 fprintf(stderr, "Making distributions with %d bins\n", nbin);
279 fp = xvgropen(distfile, "Distribution", "", "", oenv);
280 for (s = 0; s < nset; s++)
282 for (i = 0; i < nbin; i++)
286 for (i = 0; i < n; i++)
288 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
290 for (i = 0; i < nbin; i++)
292 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
296 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
302 static int real_comp(const void *a, const void *b)
304 real dif = *(real *)a - *(real *)b;
320 static void average(const char *avfile, int avbar_opt,
321 int n, int nset, real **val, real *t)
328 fp = gmx_ffopen(avfile, "w");
329 if ((avbar_opt == avbarERROR) && (nset == 1))
331 avbar_opt = avbarNONE;
333 if (avbar_opt != avbarNONE)
335 if (avbar_opt == avbar90)
338 fprintf(fp, "@TYPE xydydy\n");
339 edge = (int)(nset*0.05+0.5);
340 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
341 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
345 fprintf(fp, "@TYPE xydy\n");
349 for (i = 0; i < n; i++)
352 for (s = 0; s < nset; s++)
357 fprintf(fp, " %g %g", t[i], av);
359 if (avbar_opt != avbarNONE)
361 if (avbar_opt == avbar90)
363 for (s = 0; s < nset; s++)
367 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
368 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
372 for (s = 0; s < nset; s++)
374 var += sqr(val[s][i]-av);
376 if (avbar_opt == avbarSTDDEV)
378 err = sqrt(var/nset);
382 err = sqrt(var/(nset*(nset-1)));
384 fprintf(fp, " %g", err);
391 if (avbar_opt == avbar90)
397 static real anal_ee_inf(double *parm, real T)
399 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
402 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
403 int nset, double *av, double *sig, real **val, real dt,
404 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
405 const output_env_t oenv)
408 int bs, prev_bs, nbs, nb;
413 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
415 real ee, a, tau1, tau2;
419 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
424 fp = xvgropen(eefile, "Error estimates",
425 "Block size (time)", "Error estimate", oenv);
426 if (output_env_get_print_xvgr_codes(oenv))
429 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
433 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
436 spacing = pow(2, 1.0/resol);
440 for (s = 0; s < nset; s++)
452 for (i = 0; i < nb; i++)
455 for (j = 0; j < bs; j++)
457 blav += val[s][bs*i+j];
459 var += sqr(av[s] - blav/bs);
468 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
485 for (i = 0; i < nbs/2; i++)
488 tbs[i] = tbs[nbs-1-i];
491 ybs[i] = ybs[nbs-1-i];
494 /* The initial slope of the normalized ybs^2 is 1.
495 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
496 * From this we take our initial guess for tau1.
505 while (i < nbs - 1 &&
506 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
510 fprintf(stdout, "Data set %d has strange time correlations:\n"
511 "the std. error using single points is larger than that of blocks of 2 points\n"
512 "The error estimate might be inaccurate, check the fit\n",
514 /* Use the total time as tau for the fitting weights */
515 tau_sig = (n - 1)*dt;
524 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
527 /* Generate more or less appropriate sigma's,
528 * also taking the density of points into account.
530 for (i = 0; i < nbs; i++)
534 dens = tbs[1]/tbs[0] - 1;
538 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
542 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
544 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
549 fitparm[0] = tau1_est;
551 /* We set the initial guess for tau2
552 * to halfway between tau1_est and the total time (on log scale).
554 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
555 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
556 bDebugMode(), effnERREST, fitparm, 0);
557 fitparm[3] = 1-fitparm[1];
559 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
560 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
564 if (fitparm[2] > (n-1)*dt)
567 "Warning: tau2 is longer than the length of the data (%g)\n"
568 " the statistics might be bad\n",
573 fprintf(stdout, "a fitted parameter is negative\n");
575 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
576 sig[s]*anal_ee_inf(fitparm, n*dt),
577 fitparm[1], fitparm[0], fitparm[2]);
578 /* Do a fit with tau2 fixed at the total time.
579 * One could also choose any other large value for tau2.
581 fitparm[0] = tau1_est;
583 fitparm[2] = (n-1)*dt;
584 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
585 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
586 effnERREST, fitparm, 4);
587 fitparm[3] = 1-fitparm[1];
589 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
590 || (fitparm[1] > 1 && !bAllowNegLTCorr))
594 fprintf(stdout, "a fitted parameter is negative\n");
595 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
596 sig[s]*anal_ee_inf(fitparm, n*dt),
597 fitparm[1], fitparm[0], fitparm[2]);
599 /* Do a single exponential fit */
600 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
601 fitparm[0] = tau1_est;
604 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
605 effnERREST, fitparm, 6);
606 fitparm[3] = 1-fitparm[1];
609 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
614 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
615 s+1, ee, a, tau1, tau2);
616 if (output_env_get_xvg_format(oenv) == exvgXMGR)
618 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
619 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
621 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
623 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
624 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
626 for (i = 0; i < nbs; i++)
628 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
629 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
639 for (i = 0; i < n; i++)
641 ac[i] = val[s][i] - av[s];
651 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
652 dt, eacNormal, 1, FALSE, TRUE,
653 FALSE, 0, 0, effnNONE);
657 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
659 for (i = 1; i <= fitlen/2; i++)
665 /* Generate more or less appropriate sigma's */
666 for (i = 0; i <= fitlen; i++)
668 fitsig[i] = sqrt(acint + dt*i);
671 ac_fit[0] = 0.5*acint;
673 ac_fit[2] = 10*acint;
674 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
675 bDebugMode(), effnEXP3, ac_fit, 0);
676 ac_fit[3] = 1 - ac_fit[1];
678 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
679 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
680 ac_fit[1], ac_fit[0], ac_fit[2]);
682 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
683 for (i = 0; i < nbs; i++)
685 fprintf(fp, "%g %g\n", tbs[i],
686 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
693 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
702 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
703 gmx_bool bError, real fit_start, real smooth_tail_start,
704 const output_env_t oenv)
706 const real tol = 1e-8;
711 please_cite(stdout, "Spoel2006b");
713 /* Compute negative derivative k(t) = -dc(t)/dt */
717 compute_derivative(nn, time, val[0], kt);
718 for (j = 0; (j < nn); j++)
725 for (j = 0; (j < nn); j++)
727 d2 += sqr(kt[j] - val[3][j]);
729 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
731 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
732 temp, smooth_tail_start, oenv);
737 analyse_corr(nn, time, val[0], val[2], val[4],
738 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
742 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
743 printf("Not doing anything. Sorry.\n");
747 static void filter(real flen, int n, int nset, real **val, real dt)
750 double *filt, sum, vf, fluc, fluctot;
752 f = (int)(flen/(2*dt));
756 for (i = 1; i <= f; i++)
758 filt[i] = cos(M_PI*dt*i/flen);
761 for (i = 0; i <= f; i++)
765 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
766 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
768 for (s = 0; s < nset; s++)
771 for (i = f; i < n-f; i++)
773 vf = filt[0]*val[s][i];
774 for (j = 1; j <= f; j++)
776 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
778 fluc += sqr(val[s][i] - vf);
782 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
784 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
785 fprintf(stdout, "\n");
790 static void do_fit(FILE *out, int n, gmx_bool bYdy,
791 int ny, real *x0, real **val,
792 int npargs, t_pargs *ppa, const output_env_t oenv)
794 real *c1 = NULL, *sig = NULL;
796 real tendfit, tbeginfit;
797 int i, efitfn, nparm;
799 efitfn = get_acffitfn();
800 nparm = effnNparams(efitfn);
801 fprintf(out, "Will fit to the following function:\n");
802 fprintf(out, "%s\n", effnDescription(efitfn));
808 fprintf(out, "Using two columns as y and sigma values\n");
814 if (opt2parg_bSet("-beginfit", npargs, ppa))
816 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
822 if (opt2parg_bSet("-endfit", npargs, ppa))
824 tendfit = opt2parg_real("-endfit", npargs, ppa);
831 snew(fitparm, nparm);
843 fitparm[1] = 0.5*c1[0];
847 fitparm[0] = fitparm[2] = 0.5*c1[0];
853 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
860 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
868 fprintf(out, "Warning: don't know how to initialize the parameters\n");
869 for (i = 0; (i < nparm); i++)
874 fprintf(out, "Starting parameters:\n");
875 for (i = 0; (i < nparm); i++)
877 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
879 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
880 oenv, bDebugMode(), efitfn, fitparm, 0))
882 for (i = 0; (i < nparm); i++)
884 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
889 fprintf(out, "No solution was found\n");
893 static void do_ballistic(const char *balFile, int nData,
894 real *t, real **val, int nSet,
895 real balTime, int nBalExp,
896 const output_env_t oenv)
898 double **ctd = NULL, *td = NULL;
899 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
900 static char *leg[] = {"Ac'(t)"};
904 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
909 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
910 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
912 for (set = 0; set < nSet; set++)
914 snew(ctd[set], nData);
915 for (i = 0; i < nData; i++)
917 ctd[set][i] = (double)val[set][i];
920 td[i] = (double)t[i];
924 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
927 for (i = 0; i < nData; i++)
929 fprintf(fp, " %g", t[i]);
930 for (set = 0; set < nSet; set++)
932 fprintf(fp, " %g", ctd[set][i]);
938 for (set = 0; set < nSet; set++)
947 printf("Number of data points is less than the number of parameters to fit\n."
948 "The system is underdetermined, hence no ballistic term can be found.\n\n");
952 static void do_geminate(const char *gemFile, int nData,
953 real *t, real **val, int nSet,
954 const real D, const real rcut, const real balTime,
955 const int nFitPoints, const real begFit, const real endFit,
956 const output_env_t oenv)
958 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
959 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
960 begFit, endFit, balTime, 1);
961 const char *leg[] = {"Ac\\sgem\\N(t)"};
969 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
970 xvgr_legend(fp, asize(leg), leg, oenv);
972 for (set = 0; set < nSet; set++)
974 snew(ctd[set], nData);
975 snew(ctdGem[set], nData);
976 for (i = 0; i < nData; i++)
978 ctd[set][i] = (double)val[set][i];
981 td[i] = (double)t[i];
984 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
987 for (i = 0; i < nData; i++)
989 fprintf(fp, " %g", t[i]);
990 for (set = 0; set < nSet; set++)
992 fprintf(fp, " %g", ctdGem[set][i]);
997 for (set = 0; set < nSet; set++)
1007 int gmx_analyze(int argc, char *argv[])
1009 static const char *desc[] = {
1010 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1011 "A line in the input file may start with a time",
1012 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1013 "Multiple sets can also be",
1014 "read when they are separated by & (option [TT]-n[tt]);",
1015 "in this case only one [IT]y[it]-value is read from each line.",
1016 "All lines starting with # and @ are skipped.",
1017 "All analyses can also be done for the derivative of a set",
1018 "(option [TT]-d[tt]).[PAR]",
1020 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1021 "points are equidistant in time.[PAR]",
1023 "[THISMODULE] always shows the average and standard deviation of each",
1024 "set, as well as the relative deviation of the third",
1025 "and fourth cumulant from those of a Gaussian distribution with the same",
1026 "standard deviation.[PAR]",
1028 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1029 "Be sure that the time interval between data points is",
1030 "much shorter than the time scale of the autocorrelation.[PAR]",
1032 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1033 "i/2 periods. The formula is:[BR]"
1034 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1035 "This is useful for principal components obtained from covariance",
1036 "analysis, since the principal components of random diffusion are",
1037 "pure cosines.[PAR]",
1039 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1041 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1043 "Option [TT]-av[tt] produces the average over the sets.",
1044 "Error bars can be added with the option [TT]-errbar[tt].",
1045 "The errorbars can represent the standard deviation, the error",
1046 "(assuming the points are independent) or the interval containing",
1047 "90% of the points, by discarding 5% of the points at the top and",
1050 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1051 "A set is divided in a number of blocks and averages are calculated for",
1052 "each block. The error for the total average is calculated from",
1053 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1054 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1055 "These errors are plotted as a function of the block size.",
1056 "Also an analytical block average curve is plotted, assuming",
1057 "that the autocorrelation is a sum of two exponentials.",
1058 "The analytical curve for the block average is:[BR]",
1059 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1060 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1061 "where T is the total time.",
1062 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1063 "When the actual block average is very close to the analytical curve,",
1064 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1065 "The complete derivation is given in",
1066 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1068 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1069 "component from a hydrogen bond autocorrelation function by the fitting",
1070 "of a sum of exponentials, as described in e.g.",
1071 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1072 "is the one with the most negative coefficient in the exponential,",
1073 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1074 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1076 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1077 "(and optionally kD) to the hydrogen bond autocorrelation function",
1078 "according to the reversible geminate recombination model. Removal of",
1079 "the ballistic component first is strongly advised. The model is presented in",
1080 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1082 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1083 "of each set and over all sets with respect to a filtered average.",
1084 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1085 "to len/2. len is supplied with the option [TT]-filter[tt].",
1086 "This filter reduces oscillations with period len/2 and len by a factor",
1087 "of 0.79 and 0.33 respectively.[PAR]",
1089 "Option [TT]-g[tt] fits the data to the function given with option",
1090 "[TT]-fitfn[tt].[PAR]",
1092 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1093 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1094 "zero or with a negative value are ignored.[PAR]"
1096 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1097 "on output from [gmx-hbond]. The input file can be taken directly",
1098 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1100 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1101 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1102 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1103 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1104 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1105 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1107 /* must correspond to enum avbar* declared at beginning of file */
1108 static const char *avbar_opt[avbarNR+1] = {
1109 NULL, "none", "stddev", "error", "90", NULL
1113 { "-time", FALSE, etBOOL, {&bHaveT},
1114 "Expect a time in the input" },
1115 { "-b", FALSE, etREAL, {&tb},
1116 "First time to read from set" },
1117 { "-e", FALSE, etREAL, {&te},
1118 "Last time to read from set" },
1119 { "-n", FALSE, etINT, {&nsets_in},
1120 "Read this number of sets separated by &" },
1121 { "-d", FALSE, etBOOL, {&bDer},
1122 "Use the derivative" },
1123 { "-dp", FALSE, etINT, {&d},
1124 "HIDDENThe derivative is the difference over this number of points" },
1125 { "-bw", FALSE, etREAL, {&binwidth},
1126 "Binwidth for the distribution" },
1127 { "-errbar", FALSE, etENUM, {avbar_opt},
1128 "Error bars for [TT]-av[tt]" },
1129 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1130 "Integrate data function(s) numerically using trapezium rule" },
1131 { "-aver_start", FALSE, etREAL, {&aver_start},
1132 "Start averaging the integral from here" },
1133 { "-xydy", FALSE, etBOOL, {&bXYdy},
1134 "Interpret second data set as error in the y values for integrating" },
1135 { "-regression", FALSE, etBOOL, {&bRegression},
1136 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1137 { "-luzar", FALSE, etBOOL, {&bLuzar},
1138 "Do a Luzar and Chandler analysis on a correlation function and "
1139 "related as produced by [gmx-hbond]. When in addition the "
1140 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1141 "interpreted as errors in c(t) and n(t)." },
1142 { "-temp", FALSE, etREAL, {&temp},
1143 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1144 { "-fitstart", FALSE, etREAL, {&fit_start},
1145 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1146 { "-fitend", FALSE, etREAL, {&fit_end},
1147 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1148 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1149 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1150 { "-nbmin", FALSE, etINT, {&nb_min},
1151 "HIDDENMinimum number of blocks for block averaging" },
1152 { "-resol", FALSE, etINT, {&resol},
1153 "HIDDENResolution for the block averaging, block size increases with"
1154 " a factor 2^(1/resol)" },
1155 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1156 "HIDDENAlways use a single exponential fit for the error estimate" },
1157 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1158 "HIDDENAllow a negative long-time correlation" },
1159 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1160 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1161 { "-filter", FALSE, etREAL, {&filtlen},
1162 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1163 { "-power", FALSE, etBOOL, {&bPower},
1164 "Fit data to: b t^a" },
1165 { "-subav", FALSE, etBOOL, {&bSubAv},
1166 "Subtract the average before autocorrelating" },
1167 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1168 "Calculate one ACF over all sets" },
1169 { "-nbalexp", FALSE, etINT, {&nBalExp},
1170 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1171 { "-baltime", FALSE, etREAL, {&balTime},
1172 "HIDDENTime up to which the ballistic component will be fitted" },
1173 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1174 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1175 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1176 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1177 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1178 /* "What type of gminate recombination to use"}, */
1179 /* { "-D", FALSE, etREAL, {&diffusion}, */
1180 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1182 #define NPA asize(pa)
1184 FILE *out, *out_fit;
1185 int n, nlast, s, nset, i, j = 0;
1186 real **val, *t, dt, tot, error;
1187 double *av, *sig, cum1, cum2, cum3, cum4, db;
1188 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1192 { efXVG, "-f", "graph", ffREAD },
1193 { efXVG, "-ac", "autocorr", ffOPTWR },
1194 { efXVG, "-msd", "msd", ffOPTWR },
1195 { efXVG, "-cc", "coscont", ffOPTWR },
1196 { efXVG, "-dist", "distr", ffOPTWR },
1197 { efXVG, "-av", "average", ffOPTWR },
1198 { efXVG, "-ee", "errest", ffOPTWR },
1199 { efXVG, "-bal", "ballisitc", ffOPTWR },
1200 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1201 { efLOG, "-g", "fitlog", ffOPTWR }
1203 #define NFILE asize(fnm)
1209 ppa = add_acf_pargs(&npargs, pa);
1211 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1212 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1217 acfile = opt2fn_null("-ac", NFILE, fnm);
1218 msdfile = opt2fn_null("-msd", NFILE, fnm);
1219 ccfile = opt2fn_null("-cc", NFILE, fnm);
1220 distfile = opt2fn_null("-dist", NFILE, fnm);
1221 avfile = opt2fn_null("-av", NFILE, fnm);
1222 eefile = opt2fn_null("-ee", NFILE, fnm);
1223 balfile = opt2fn_null("-bal", NFILE, fnm);
1224 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1225 /* When doing autocorrelation we don't want a fitlog for fitting
1226 * the function itself (not the acf) when the user did not ask for it.
1228 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1230 fitfile = opt2fn("-g", NFILE, fnm);
1234 fitfile = opt2fn_null("-g", NFILE, fnm);
1237 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1238 opt2parg_bSet("-b", npargs, ppa), tb,
1239 opt2parg_bSet("-e", npargs, ppa), te,
1240 nsets_in, &nset, &n, &dt, &t);
1241 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1245 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1248 for (s = 0; s < nset; s++)
1250 for (i = 0; (i < n); i++)
1252 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1261 printf("Calculating the integral using the trapezium rule\n");
1265 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1266 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1270 for (s = 0; s < nset; s++)
1272 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1273 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1278 if (fitfile != NULL)
1280 out_fit = gmx_ffopen(fitfile, "w");
1281 if (bXYdy && nset >= 2)
1283 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1287 for (s = 0; s < nset; s++)
1289 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1292 gmx_ffclose(out_fit);
1295 printf(" std. dev. relative deviation of\n");
1296 printf(" standard --------- cumulants from those of\n");
1297 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1298 printf(" cum. 3 cum. 4\n");
1301 for (s = 0; (s < nset); s++)
1307 for (i = 0; (i < n); i++)
1312 for (i = 0; (i < n); i++)
1314 db = val[s][i]-cum1;
1317 cum4 += db*db*db*db;
1323 sig[s] = sqrt(cum2);
1326 error = sqrt(cum2/(n-1));
1332 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1333 s+1, av[s], sig[s], error,
1334 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1335 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1341 filter(filtlen, n, nset, val, dt);
1346 out = xvgropen(msdfile, "Mean square displacement",
1347 "time", "MSD (nm\\S2\\N)", oenv);
1348 nlast = (int)(n*frac);
1349 for (s = 0; s < nset; s++)
1351 for (j = 0; j <= nlast; j++)
1355 fprintf(stderr, "\r%d", j);
1358 for (i = 0; i < n-j; i++)
1360 tot += sqr(val[s][i]-val[s][i+j]);
1363 fprintf(out, " %g %8g\n", dt*j, tot);
1367 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1371 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1375 plot_coscont(ccfile, n, nset, val, oenv);
1380 histogram(distfile, binwidth, n, nset, val, oenv);
1384 average(avfile, nenum(avbar_opt), n, nset, val, t);
1388 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1389 bEeFitAc, bEESEF, bEENLC, oenv);
1393 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1396 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1397 /* nFitPoints, fit_start, fit_end, oenv); */
1400 power_fit(n, nset, val, t);
1407 for (s = 0; s < nset; s++)
1409 for (i = 0; i < n; i++)
1415 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1416 eacNormal, bAverCorr);
1421 regression_analysis(n, bXYdy, t, nset, val);
1426 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1429 view_all(oenv, NFILE, fnm);