2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/legacyheaders/typedefs.h"
47 #include "gromacs/utility/smalloc.h"
48 #include "gromacs/legacyheaders/macros.h"
49 #include "gromacs/utility/fatalerror.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/legacyheaders/copyrite.h"
52 #include "gromacs/utility/futil.h"
53 #include "gromacs/legacyheaders/readinp.h"
54 #include "gromacs/legacyheaders/txtdump.h"
56 #include "gromacs/statistics/statistics.h"
57 #include "gromacs/fileio/xvgr.h"
58 #include "gromacs/legacyheaders/viewit.h"
62 #include "gromacs/linearalgebra/matrix.h"
64 /* must correspond to char *avbar_opt[] declared in main() */
66 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
69 static void power_fit(int n, int nset, real **val, real *t)
71 real *x, *y, quality, a, b, r;
79 for (i = 0; i < n; i++)
89 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
90 for (i = 0; i < n; i++)
96 for (s = 0; s < nset; s++)
99 for (i = 0; i < n && val[s][i] >= 0; i++)
101 y[i] = log(val[s][i]);
105 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
107 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
108 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
109 s+1, quality, a, exp(b));
116 static real cosine_content(int nhp, int n, real *y)
117 /* Assumes n equidistant points */
119 double fac, cosyint, yyint;
127 fac = M_PI*nhp/(n-1);
131 for (i = 0; i < n; i++)
133 cosyint += cos(fac*i)*y[i];
137 return 2*cosyint*cosyint/(n*yyint);
140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
141 const output_env_t oenv)
147 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
150 for (s = 0; s < nset; s++)
152 cc = cosine_content(s+1, n, val[s]);
153 fprintf(fp, " %d %g\n", s+1, cc);
154 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
157 fprintf(stdout, "\n");
162 static void regression_analysis(int n, gmx_bool bXYdy,
163 real *x, int nset, real **val)
165 real S, chi2, a, b, da, db, r = 0;
168 if (bXYdy || (nset == 1))
170 printf("Fitting data to a function f(x) = ax + b\n");
171 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
172 printf("Error estimates will be given if w_i (sigma) values are given\n");
173 printf("(use option -xydy).\n\n");
176 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
178 gmx_fatal(FARGS, "Error fitting the data: %s",
179 gmx_stats_message(ok));
184 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
186 gmx_fatal(FARGS, "Error fitting the data: %s",
187 gmx_stats_message(ok));
191 printf("Chi2 = %g\n", chi2);
192 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
193 printf("Correlation coefficient = %.1f%%\n", 100*r);
197 printf("a = %g +/- %g\n", a, da);
198 printf("b = %g +/- %g\n", b, db);
202 printf("a = %g\n", a);
203 printf("b = %g\n", b);
208 double chi2, *a, **xx, *y;
213 for (j = 0; (j < nset-1); j++)
217 for (i = 0; (i < n); i++)
220 for (j = 1; (j < nset); j++)
222 xx[j-1][i] = val[j][i];
226 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
227 printf("Fitting %d data points in %d sets\n", n, nset-1);
228 printf("chi2 = %g\n", chi2);
230 for (i = 0; (i < nset-1); i++)
242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
243 const output_env_t oenv)
253 for (s = 0; s < nset; s++)
255 for (i = 0; i < n; i++)
261 else if (val[s][i] > max)
268 min = binwidth*floor(min/binwidth);
269 max = binwidth*ceil(max/binwidth);
276 nbin = (int)((max - min)/binwidth + 0.5) + 1;
277 fprintf(stderr, "Making distributions with %d bins\n", nbin);
279 fp = xvgropen(distfile, "Distribution", "", "", oenv);
280 for (s = 0; s < nset; s++)
282 for (i = 0; i < nbin; i++)
286 for (i = 0; i < n; i++)
288 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
290 for (i = 0; i < nbin; i++)
292 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
296 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
302 static int real_comp(const void *a, const void *b)
304 real dif = *(real *)a - *(real *)b;
320 static void average(const char *avfile, int avbar_opt,
321 int n, int nset, real **val, real *t)
328 fp = gmx_ffopen(avfile, "w");
329 if ((avbar_opt == avbarERROR) && (nset == 1))
331 avbar_opt = avbarNONE;
333 if (avbar_opt != avbarNONE)
335 if (avbar_opt == avbar90)
338 fprintf(fp, "@TYPE xydydy\n");
339 edge = (int)(nset*0.05+0.5);
340 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
341 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
345 fprintf(fp, "@TYPE xydy\n");
349 for (i = 0; i < n; i++)
352 for (s = 0; s < nset; s++)
357 fprintf(fp, " %g %g", t[i], av);
359 if (avbar_opt != avbarNONE)
361 if (avbar_opt == avbar90)
363 for (s = 0; s < nset; s++)
367 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
368 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
372 for (s = 0; s < nset; s++)
374 var += sqr(val[s][i]-av);
376 if (avbar_opt == avbarSTDDEV)
378 err = sqrt(var/nset);
382 err = sqrt(var/(nset*(nset-1)));
384 fprintf(fp, " %g", err);
391 if (avbar_opt == avbar90)
397 static real anal_ee_inf(real *parm, real T)
399 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
402 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
403 int nset, double *av, double *sig, real **val, real dt,
404 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
405 const output_env_t oenv)
408 int bs, prev_bs, nbs, nb;
413 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
415 real ee, a, tau1, tau2;
419 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
424 fp = xvgropen(eefile, "Error estimates",
425 "Block size (time)", "Error estimate", oenv);
426 if (output_env_get_print_xvgr_codes(oenv))
429 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
433 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
436 spacing = pow(2, 1.0/resol);
440 for (s = 0; s < nset; s++)
452 for (i = 0; i < nb; i++)
455 for (j = 0; j < bs; j++)
457 blav += val[s][bs*i+j];
459 var += sqr(av[s] - blav/bs);
468 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
485 for (i = 0; i < nbs/2; i++)
488 tbs[i] = tbs[nbs-1-i];
491 ybs[i] = ybs[nbs-1-i];
494 /* The initial slope of the normalized ybs^2 is 1.
495 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
496 * From this we take our initial guess for tau1.
505 while (i < nbs - 1 &&
506 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
510 fprintf(stdout, "Data set %d has strange time correlations:\n"
511 "the std. error using single points is larger than that of blocks of 2 points\n"
512 "The error estimate might be inaccurate, check the fit\n",
514 /* Use the total time as tau for the fitting weights */
515 tau_sig = (n - 1)*dt;
524 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
527 /* Generate more or less appropriate sigma's,
528 * also taking the density of points into account.
530 for (i = 0; i < nbs; i++)
534 dens = tbs[1]/tbs[0] - 1;
538 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
542 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
544 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
549 fitparm[0] = tau1_est;
551 /* We set the initial guess for tau2
552 * to halfway between tau1_est and the total time (on log scale).
554 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
555 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
556 bDebugMode(), effnERREST, fitparm, 0);
557 fitparm[3] = 1-fitparm[1];
559 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
560 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
564 if (fitparm[2] > (n-1)*dt)
567 "Warning: tau2 is longer than the length of the data (%g)\n"
568 " the statistics might be bad\n",
573 fprintf(stdout, "a fitted parameter is negative\n");
575 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
576 sig[s]*anal_ee_inf(fitparm, n*dt),
577 fitparm[1], fitparm[0], fitparm[2]);
578 /* Do a fit with tau2 fixed at the total time.
579 * One could also choose any other large value for tau2.
581 fitparm[0] = tau1_est;
583 fitparm[2] = (n-1)*dt;
584 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
585 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
586 effnERREST, fitparm, 4);
587 fitparm[3] = 1-fitparm[1];
589 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
590 || (fitparm[1] > 1 && !bAllowNegLTCorr))
594 fprintf(stdout, "a fitted parameter is negative\n");
595 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
596 sig[s]*anal_ee_inf(fitparm, n*dt),
597 fitparm[1], fitparm[0], fitparm[2]);
599 /* Do a single exponential fit */
600 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
601 fitparm[0] = tau1_est;
604 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
605 effnERREST, fitparm, 6);
606 fitparm[3] = 1-fitparm[1];
609 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
614 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
615 s+1, ee, a, tau1, tau2);
616 if (output_env_get_xvg_format(oenv) == exvgXMGR)
618 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
619 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
621 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
623 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
624 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
626 for (i = 0; i < nbs; i++)
628 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
629 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
635 real *ac, acint, ac_fit[4];
638 for (i = 0; i < n; i++)
640 ac[i] = val[s][i] - av[s];
650 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
651 dt, eacNormal, 1, FALSE, TRUE,
652 FALSE, 0, 0, effnNONE);
656 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
658 for (i = 1; i <= fitlen/2; i++)
664 /* Generate more or less appropriate sigma's */
665 for (i = 0; i <= fitlen; i++)
667 fitsig[i] = sqrt(acint + dt*i);
670 ac_fit[0] = 0.5*acint;
672 ac_fit[2] = 10*acint;
673 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
674 bDebugMode(), effnEXP3, ac_fit, 0);
675 ac_fit[3] = 1 - ac_fit[1];
677 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
678 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
679 ac_fit[1], ac_fit[0], ac_fit[2]);
681 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
682 for (i = 0; i < nbs; i++)
684 fprintf(fp, "%g %g\n", tbs[i],
685 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
692 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
701 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
702 gmx_bool bError, real fit_start, real smooth_tail_start,
703 const output_env_t oenv)
705 const real tol = 1e-8;
710 please_cite(stdout, "Spoel2006b");
712 /* Compute negative derivative k(t) = -dc(t)/dt */
716 compute_derivative(nn, time, val[0], kt);
717 for (j = 0; (j < nn); j++)
724 for (j = 0; (j < nn); j++)
726 d2 += sqr(kt[j] - val[3][j]);
728 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
730 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
731 temp, smooth_tail_start, oenv);
736 analyse_corr(nn, time, val[0], val[2], val[4],
737 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
741 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
742 printf("Not doing anything. Sorry.\n");
746 static void filter(real flen, int n, int nset, real **val, real dt)
749 double *filt, sum, vf, fluc, fluctot;
751 f = (int)(flen/(2*dt));
755 for (i = 1; i <= f; i++)
757 filt[i] = cos(M_PI*dt*i/flen);
760 for (i = 0; i <= f; i++)
764 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
765 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
767 for (s = 0; s < nset; s++)
770 for (i = f; i < n-f; i++)
772 vf = filt[0]*val[s][i];
773 for (j = 1; j <= f; j++)
775 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
777 fluc += sqr(val[s][i] - vf);
781 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
783 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
784 fprintf(stdout, "\n");
789 static void do_fit(FILE *out, int n, gmx_bool bYdy,
790 int ny, real *x0, real **val,
791 int npargs, t_pargs *ppa, const output_env_t oenv)
793 real *c1 = NULL, *sig = NULL, *fitparm;
794 real tendfit, tbeginfit;
795 int i, efitfn, nparm;
797 efitfn = get_acffitfn();
798 nparm = nfp_ffn[efitfn];
799 fprintf(out, "Will fit to the following function:\n");
800 fprintf(out, "%s\n", longs_ffn[efitfn]);
806 fprintf(out, "Using two columns as y and sigma values\n");
812 if (opt2parg_bSet("-beginfit", npargs, ppa))
814 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
820 if (opt2parg_bSet("-endfit", npargs, ppa))
822 tendfit = opt2parg_real("-endfit", npargs, ppa);
829 snew(fitparm, nparm);
841 fitparm[1] = 0.5*c1[0];
845 fitparm[0] = fitparm[2] = 0.5*c1[0];
851 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
858 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
866 fprintf(out, "Warning: don't know how to initialize the parameters\n");
867 for (i = 0; (i < nparm); i++)
872 fprintf(out, "Starting parameters:\n");
873 for (i = 0; (i < nparm); i++)
875 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
877 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
878 oenv, bDebugMode(), efitfn, fitparm, 0))
880 for (i = 0; (i < nparm); i++)
882 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
887 fprintf(out, "No solution was found\n");
891 static void do_ballistic(const char *balFile, int nData,
892 real *t, real **val, int nSet,
893 real balTime, int nBalExp,
894 const output_env_t oenv)
896 double **ctd = NULL, *td = NULL;
897 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
898 static char *leg[] = {"Ac'(t)"};
902 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
907 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
908 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
910 for (set = 0; set < nSet; set++)
912 snew(ctd[set], nData);
913 for (i = 0; i < nData; i++)
915 ctd[set][i] = (double)val[set][i];
918 td[i] = (double)t[i];
922 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
925 for (i = 0; i < nData; i++)
927 fprintf(fp, " %g", t[i]);
928 for (set = 0; set < nSet; set++)
930 fprintf(fp, " %g", ctd[set][i]);
936 for (set = 0; set < nSet; set++)
945 printf("Number of data points is less than the number of parameters to fit\n."
946 "The system is underdetermined, hence no ballistic term can be found.\n\n");
950 static void do_geminate(const char *gemFile, int nData,
951 real *t, real **val, int nSet,
952 const real D, const real rcut, const real balTime,
953 const int nFitPoints, const real begFit, const real endFit,
954 const output_env_t oenv)
956 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
957 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
958 begFit, endFit, balTime, 1);
959 const char *leg[] = {"Ac\\sgem\\N(t)"};
967 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
968 xvgr_legend(fp, asize(leg), leg, oenv);
970 for (set = 0; set < nSet; set++)
972 snew(ctd[set], nData);
973 snew(ctdGem[set], nData);
974 for (i = 0; i < nData; i++)
976 ctd[set][i] = (double)val[set][i];
979 td[i] = (double)t[i];
982 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
985 for (i = 0; i < nData; i++)
987 fprintf(fp, " %g", t[i]);
988 for (set = 0; set < nSet; set++)
990 fprintf(fp, " %g", ctdGem[set][i]);
995 for (set = 0; set < nSet; set++)
1005 int gmx_analyze(int argc, char *argv[])
1007 static const char *desc[] = {
1008 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1009 "A line in the input file may start with a time",
1010 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1011 "Multiple sets can also be",
1012 "read when they are separated by & (option [TT]-n[tt]);",
1013 "in this case only one [IT]y[it]-value is read from each line.",
1014 "All lines starting with # and @ are skipped.",
1015 "All analyses can also be done for the derivative of a set",
1016 "(option [TT]-d[tt]).[PAR]",
1018 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1019 "points are equidistant in time.[PAR]",
1021 "[THISMODULE] always shows the average and standard deviation of each",
1022 "set, as well as the relative deviation of the third",
1023 "and fourth cumulant from those of a Gaussian distribution with the same",
1024 "standard deviation.[PAR]",
1026 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1027 "Be sure that the time interval between data points is",
1028 "much shorter than the time scale of the autocorrelation.[PAR]",
1030 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1031 "i/2 periods. The formula is:[BR]"
1032 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1033 "This is useful for principal components obtained from covariance",
1034 "analysis, since the principal components of random diffusion are",
1035 "pure cosines.[PAR]",
1037 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1039 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1041 "Option [TT]-av[tt] produces the average over the sets.",
1042 "Error bars can be added with the option [TT]-errbar[tt].",
1043 "The errorbars can represent the standard deviation, the error",
1044 "(assuming the points are independent) or the interval containing",
1045 "90% of the points, by discarding 5% of the points at the top and",
1048 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1049 "A set is divided in a number of blocks and averages are calculated for",
1050 "each block. The error for the total average is calculated from",
1051 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1052 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1053 "These errors are plotted as a function of the block size.",
1054 "Also an analytical block average curve is plotted, assuming",
1055 "that the autocorrelation is a sum of two exponentials.",
1056 "The analytical curve for the block average is:[BR]",
1057 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1058 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1059 "where T is the total time.",
1060 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1061 "When the actual block average is very close to the analytical curve,",
1062 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1063 "The complete derivation is given in",
1064 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1066 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1067 "component from a hydrogen bond autocorrelation function by the fitting",
1068 "of a sum of exponentials, as described in e.g.",
1069 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1070 "is the one with the most negative coefficient in the exponential,",
1071 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1072 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1074 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1075 "(and optionally kD) to the hydrogen bond autocorrelation function",
1076 "according to the reversible geminate recombination model. Removal of",
1077 "the ballistic component first is strongly advised. The model is presented in",
1078 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1080 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1081 "of each set and over all sets with respect to a filtered average.",
1082 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1083 "to len/2. len is supplied with the option [TT]-filter[tt].",
1084 "This filter reduces oscillations with period len/2 and len by a factor",
1085 "of 0.79 and 0.33 respectively.[PAR]",
1087 "Option [TT]-g[tt] fits the data to the function given with option",
1088 "[TT]-fitfn[tt].[PAR]",
1090 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1091 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1092 "zero or with a negative value are ignored.[PAR]"
1094 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1095 "on output from [gmx-hbond]. The input file can be taken directly",
1096 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1098 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1099 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1100 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1101 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1102 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1103 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1105 /* must correspond to enum avbar* declared at beginning of file */
1106 static const char *avbar_opt[avbarNR+1] = {
1107 NULL, "none", "stddev", "error", "90", NULL
1111 { "-time", FALSE, etBOOL, {&bHaveT},
1112 "Expect a time in the input" },
1113 { "-b", FALSE, etREAL, {&tb},
1114 "First time to read from set" },
1115 { "-e", FALSE, etREAL, {&te},
1116 "Last time to read from set" },
1117 { "-n", FALSE, etINT, {&nsets_in},
1118 "Read this number of sets separated by &" },
1119 { "-d", FALSE, etBOOL, {&bDer},
1120 "Use the derivative" },
1121 { "-dp", FALSE, etINT, {&d},
1122 "HIDDENThe derivative is the difference over this number of points" },
1123 { "-bw", FALSE, etREAL, {&binwidth},
1124 "Binwidth for the distribution" },
1125 { "-errbar", FALSE, etENUM, {avbar_opt},
1126 "Error bars for [TT]-av[tt]" },
1127 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1128 "Integrate data function(s) numerically using trapezium rule" },
1129 { "-aver_start", FALSE, etREAL, {&aver_start},
1130 "Start averaging the integral from here" },
1131 { "-xydy", FALSE, etBOOL, {&bXYdy},
1132 "Interpret second data set as error in the y values for integrating" },
1133 { "-regression", FALSE, etBOOL, {&bRegression},
1134 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1135 { "-luzar", FALSE, etBOOL, {&bLuzar},
1136 "Do a Luzar and Chandler analysis on a correlation function and "
1137 "related as produced by [gmx-hbond]. When in addition the "
1138 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1139 "interpreted as errors in c(t) and n(t)." },
1140 { "-temp", FALSE, etREAL, {&temp},
1141 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1142 { "-fitstart", FALSE, etREAL, {&fit_start},
1143 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1144 { "-fitend", FALSE, etREAL, {&fit_end},
1145 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1146 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1147 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1148 { "-nbmin", FALSE, etINT, {&nb_min},
1149 "HIDDENMinimum number of blocks for block averaging" },
1150 { "-resol", FALSE, etINT, {&resol},
1151 "HIDDENResolution for the block averaging, block size increases with"
1152 " a factor 2^(1/resol)" },
1153 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1154 "HIDDENAlways use a single exponential fit for the error estimate" },
1155 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1156 "HIDDENAllow a negative long-time correlation" },
1157 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1158 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1159 { "-filter", FALSE, etREAL, {&filtlen},
1160 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1161 { "-power", FALSE, etBOOL, {&bPower},
1162 "Fit data to: b t^a" },
1163 { "-subav", FALSE, etBOOL, {&bSubAv},
1164 "Subtract the average before autocorrelating" },
1165 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1166 "Calculate one ACF over all sets" },
1167 { "-nbalexp", FALSE, etINT, {&nBalExp},
1168 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1169 { "-baltime", FALSE, etREAL, {&balTime},
1170 "HIDDENTime up to which the ballistic component will be fitted" },
1171 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1172 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1173 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1174 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1175 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1176 /* "What type of gminate recombination to use"}, */
1177 /* { "-D", FALSE, etREAL, {&diffusion}, */
1178 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1180 #define NPA asize(pa)
1182 FILE *out, *out_fit;
1183 int n, nlast, s, nset, i, j = 0;
1184 real **val, *t, dt, tot, error;
1185 double *av, *sig, cum1, cum2, cum3, cum4, db;
1186 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1190 { efXVG, "-f", "graph", ffREAD },
1191 { efXVG, "-ac", "autocorr", ffOPTWR },
1192 { efXVG, "-msd", "msd", ffOPTWR },
1193 { efXVG, "-cc", "coscont", ffOPTWR },
1194 { efXVG, "-dist", "distr", ffOPTWR },
1195 { efXVG, "-av", "average", ffOPTWR },
1196 { efXVG, "-ee", "errest", ffOPTWR },
1197 { efXVG, "-bal", "ballisitc", ffOPTWR },
1198 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1199 { efLOG, "-g", "fitlog", ffOPTWR }
1201 #define NFILE asize(fnm)
1207 ppa = add_acf_pargs(&npargs, pa);
1209 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1210 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1215 acfile = opt2fn_null("-ac", NFILE, fnm);
1216 msdfile = opt2fn_null("-msd", NFILE, fnm);
1217 ccfile = opt2fn_null("-cc", NFILE, fnm);
1218 distfile = opt2fn_null("-dist", NFILE, fnm);
1219 avfile = opt2fn_null("-av", NFILE, fnm);
1220 eefile = opt2fn_null("-ee", NFILE, fnm);
1221 balfile = opt2fn_null("-bal", NFILE, fnm);
1222 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1223 /* When doing autocorrelation we don't want a fitlog for fitting
1224 * the function itself (not the acf) when the user did not ask for it.
1226 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1228 fitfile = opt2fn("-g", NFILE, fnm);
1232 fitfile = opt2fn_null("-g", NFILE, fnm);
1235 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1236 opt2parg_bSet("-b", npargs, ppa), tb,
1237 opt2parg_bSet("-e", npargs, ppa), te,
1238 nsets_in, &nset, &n, &dt, &t);
1239 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1243 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1246 for (s = 0; s < nset; s++)
1248 for (i = 0; (i < n); i++)
1250 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1259 printf("Calculating the integral using the trapezium rule\n");
1263 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1264 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1268 for (s = 0; s < nset; s++)
1270 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1271 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1276 if (fitfile != NULL)
1278 out_fit = gmx_ffopen(fitfile, "w");
1279 if (bXYdy && nset >= 2)
1281 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1285 for (s = 0; s < nset; s++)
1287 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1290 gmx_ffclose(out_fit);
1293 printf(" std. dev. relative deviation of\n");
1294 printf(" standard --------- cumulants from those of\n");
1295 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1296 printf(" cum. 3 cum. 4\n");
1299 for (s = 0; (s < nset); s++)
1305 for (i = 0; (i < n); i++)
1310 for (i = 0; (i < n); i++)
1312 db = val[s][i]-cum1;
1315 cum4 += db*db*db*db;
1321 sig[s] = sqrt(cum2);
1324 error = sqrt(cum2/(n-1));
1330 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1331 s+1, av[s], sig[s], error,
1332 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1333 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1339 filter(filtlen, n, nset, val, dt);
1344 out = xvgropen(msdfile, "Mean square displacement",
1345 "time", "MSD (nm\\S2\\N)", oenv);
1346 nlast = (int)(n*frac);
1347 for (s = 0; s < nset; s++)
1349 for (j = 0; j <= nlast; j++)
1353 fprintf(stderr, "\r%d", j);
1356 for (i = 0; i < n-j; i++)
1358 tot += sqr(val[s][i]-val[s][i+j]);
1361 fprintf(out, " %g %8g\n", dt*j, tot);
1365 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1369 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1373 plot_coscont(ccfile, n, nset, val, oenv);
1378 histogram(distfile, binwidth, n, nset, val, oenv);
1382 average(avfile, nenum(avbar_opt), n, nset, val, t);
1386 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1387 bEeFitAc, bEESEF, bEENLC, oenv);
1391 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1394 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1395 /* nFitPoints, fit_start, fit_end, oenv); */
1398 power_fit(n, nset, val, t);
1405 for (s = 0; s < nset; s++)
1407 for (i = 0; i < n; i++)
1413 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1414 eacNormal, bAverCorr);
1419 regression_analysis(n, bXYdy, t, nset, val);
1424 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1427 view_all(oenv, NFILE, fnm);