2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/xvgr.h"
45 #include "gromacs/gmxana/geminate.h"
46 #include "gromacs/gmxana/gmx_ana.h"
47 #include "gromacs/gmxana/gstat.h"
48 #include "gromacs/legacyheaders/copyrite.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/readinp.h"
51 #include "gromacs/legacyheaders/txtdump.h"
52 #include "gromacs/legacyheaders/typedefs.h"
53 #include "gromacs/legacyheaders/viewit.h"
54 #include "gromacs/linearalgebra/matrix.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/statistics/statistics.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/smalloc.h"
61 /* must correspond to char *avbar_opt[] declared in main() */
63 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
66 static void power_fit(int n, int nset, real **val, real *t)
68 real *x, *y, quality, a, b, r;
76 for (i = 0; i < n; i++)
86 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
87 for (i = 0; i < n; i++)
93 for (s = 0; s < nset; s++)
96 for (i = 0; i < n && val[s][i] >= 0; i++)
98 y[i] = log(val[s][i]);
102 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
104 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
105 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
106 s+1, quality, a, exp(b));
113 static real cosine_content(int nhp, int n, real *y)
114 /* Assumes n equidistant points */
116 double fac, cosyint, yyint;
124 fac = M_PI*nhp/(n-1);
128 for (i = 0; i < n; i++)
130 cosyint += cos(fac*i)*y[i];
134 return 2*cosyint*cosyint/(n*yyint);
137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
138 const output_env_t oenv)
144 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
147 for (s = 0; s < nset; s++)
149 cc = cosine_content(s+1, n, val[s]);
150 fprintf(fp, " %d %g\n", s+1, cc);
151 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
154 fprintf(stdout, "\n");
159 static void regression_analysis(int n, gmx_bool bXYdy,
160 real *x, int nset, real **val)
162 real S, chi2, a, b, da, db, r = 0;
165 if (bXYdy || (nset == 1))
167 printf("Fitting data to a function f(x) = ax + b\n");
168 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
169 printf("Error estimates will be given if w_i (sigma) values are given\n");
170 printf("(use option -xydy).\n\n");
173 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
175 gmx_fatal(FARGS, "Error fitting the data: %s",
176 gmx_stats_message(ok));
181 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
183 gmx_fatal(FARGS, "Error fitting the data: %s",
184 gmx_stats_message(ok));
188 printf("Chi2 = %g\n", chi2);
189 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
190 printf("Correlation coefficient = %.1f%%\n", 100*r);
194 printf("a = %g +/- %g\n", a, da);
195 printf("b = %g +/- %g\n", b, db);
199 printf("a = %g\n", a);
200 printf("b = %g\n", b);
205 double chi2, *a, **xx, *y;
210 for (j = 0; (j < nset-1); j++)
214 for (i = 0; (i < n); i++)
217 for (j = 1; (j < nset); j++)
219 xx[j-1][i] = val[j][i];
223 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
224 printf("Fitting %d data points in %d sets\n", n, nset-1);
225 printf("chi2 = %g\n", chi2);
227 for (i = 0; (i < nset-1); i++)
239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
240 const output_env_t oenv)
250 for (s = 0; s < nset; s++)
252 for (i = 0; i < n; i++)
258 else if (val[s][i] > max)
265 min = binwidth*floor(min/binwidth);
266 max = binwidth*ceil(max/binwidth);
273 nbin = (int)((max - min)/binwidth + 0.5) + 1;
274 fprintf(stderr, "Making distributions with %d bins\n", nbin);
276 fp = xvgropen(distfile, "Distribution", "", "", oenv);
277 for (s = 0; s < nset; s++)
279 for (i = 0; i < nbin; i++)
283 for (i = 0; i < n; i++)
285 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
287 for (i = 0; i < nbin; i++)
289 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
293 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
299 static int real_comp(const void *a, const void *b)
301 real dif = *(real *)a - *(real *)b;
317 static void average(const char *avfile, int avbar_opt,
318 int n, int nset, real **val, real *t)
325 fp = gmx_ffopen(avfile, "w");
326 if ((avbar_opt == avbarERROR) && (nset == 1))
328 avbar_opt = avbarNONE;
330 if (avbar_opt != avbarNONE)
332 if (avbar_opt == avbar90)
335 fprintf(fp, "@TYPE xydydy\n");
336 edge = (int)(nset*0.05+0.5);
337 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
338 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
342 fprintf(fp, "@TYPE xydy\n");
346 for (i = 0; i < n; i++)
349 for (s = 0; s < nset; s++)
354 fprintf(fp, " %g %g", t[i], av);
356 if (avbar_opt != avbarNONE)
358 if (avbar_opt == avbar90)
360 for (s = 0; s < nset; s++)
364 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
365 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
369 for (s = 0; s < nset; s++)
371 var += sqr(val[s][i]-av);
373 if (avbar_opt == avbarSTDDEV)
375 err = sqrt(var/nset);
379 err = sqrt(var/(nset*(nset-1)));
381 fprintf(fp, " %g", err);
388 if (avbar_opt == avbar90)
394 static real anal_ee_inf(real *parm, real T)
396 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
399 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
400 int nset, double *av, double *sig, real **val, real dt,
401 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
402 const output_env_t oenv)
405 int bs, prev_bs, nbs, nb;
410 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
412 real ee, a, tau1, tau2;
416 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
421 fp = xvgropen(eefile, "Error estimates",
422 "Block size (time)", "Error estimate", oenv);
423 if (output_env_get_print_xvgr_codes(oenv))
426 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
430 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
433 spacing = pow(2, 1.0/resol);
437 for (s = 0; s < nset; s++)
449 for (i = 0; i < nb; i++)
452 for (j = 0; j < bs; j++)
454 blav += val[s][bs*i+j];
456 var += sqr(av[s] - blav/bs);
465 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
482 for (i = 0; i < nbs/2; i++)
485 tbs[i] = tbs[nbs-1-i];
488 ybs[i] = ybs[nbs-1-i];
491 /* The initial slope of the normalized ybs^2 is 1.
492 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
493 * From this we take our initial guess for tau1.
502 while (i < nbs - 1 &&
503 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
507 fprintf(stdout, "Data set %d has strange time correlations:\n"
508 "the std. error using single points is larger than that of blocks of 2 points\n"
509 "The error estimate might be inaccurate, check the fit\n",
511 /* Use the total time as tau for the fitting weights */
512 tau_sig = (n - 1)*dt;
521 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
524 /* Generate more or less appropriate sigma's,
525 * also taking the density of points into account.
527 for (i = 0; i < nbs; i++)
531 dens = tbs[1]/tbs[0] - 1;
535 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
539 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
541 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
546 fitparm[0] = tau1_est;
548 /* We set the initial guess for tau2
549 * to halfway between tau1_est and the total time (on log scale).
551 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
552 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
553 bDebugMode(), effnERREST, fitparm, 0);
554 fitparm[3] = 1-fitparm[1];
556 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
557 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
561 if (fitparm[2] > (n-1)*dt)
564 "Warning: tau2 is longer than the length of the data (%g)\n"
565 " the statistics might be bad\n",
570 fprintf(stdout, "a fitted parameter is negative\n");
572 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
573 sig[s]*anal_ee_inf(fitparm, n*dt),
574 fitparm[1], fitparm[0], fitparm[2]);
575 /* Do a fit with tau2 fixed at the total time.
576 * One could also choose any other large value for tau2.
578 fitparm[0] = tau1_est;
580 fitparm[2] = (n-1)*dt;
581 fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
582 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
583 effnERREST, fitparm, 4);
584 fitparm[3] = 1-fitparm[1];
586 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
587 || (fitparm[1] > 1 && !bAllowNegLTCorr))
591 fprintf(stdout, "a fitted parameter is negative\n");
592 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
593 sig[s]*anal_ee_inf(fitparm, n*dt),
594 fitparm[1], fitparm[0], fitparm[2]);
596 /* Do a single exponential fit */
597 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
598 fitparm[0] = tau1_est;
601 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
602 effnERREST, fitparm, 6);
603 fitparm[3] = 1-fitparm[1];
606 ee = sig[s]*anal_ee_inf(fitparm, n*dt);
611 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
612 s+1, ee, a, tau1, tau2);
613 if (output_env_get_xvg_format(oenv) == exvgXMGR)
615 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
616 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
618 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
620 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
621 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
623 for (i = 0; i < nbs; i++)
625 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
626 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
632 real *ac, acint, ac_fit[4];
635 for (i = 0; i < n; i++)
637 ac[i] = val[s][i] - av[s];
647 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
648 dt, eacNormal, 1, FALSE, TRUE,
649 FALSE, 0, 0, effnNONE);
653 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
655 for (i = 1; i <= fitlen/2; i++)
661 /* Generate more or less appropriate sigma's */
662 for (i = 0; i <= fitlen; i++)
664 fitsig[i] = sqrt(acint + dt*i);
667 ac_fit[0] = 0.5*acint;
669 ac_fit[2] = 10*acint;
670 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
671 bDebugMode(), effnEXP3, ac_fit, 0);
672 ac_fit[3] = 1 - ac_fit[1];
674 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
675 s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
676 ac_fit[1], ac_fit[0], ac_fit[2]);
678 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
679 for (i = 0; i < nbs; i++)
681 fprintf(fp, "%g %g\n", tbs[i],
682 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
689 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
698 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
699 gmx_bool bError, real fit_start, real smooth_tail_start,
700 const output_env_t oenv)
702 const real tol = 1e-8;
707 please_cite(stdout, "Spoel2006b");
709 /* Compute negative derivative k(t) = -dc(t)/dt */
713 compute_derivative(nn, time, val[0], kt);
714 for (j = 0; (j < nn); j++)
721 for (j = 0; (j < nn); j++)
723 d2 += sqr(kt[j] - val[3][j]);
725 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
727 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
728 temp, smooth_tail_start, oenv);
733 analyse_corr(nn, time, val[0], val[2], val[4],
734 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
738 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
739 printf("Not doing anything. Sorry.\n");
743 static void filter(real flen, int n, int nset, real **val, real dt)
746 double *filt, sum, vf, fluc, fluctot;
748 f = (int)(flen/(2*dt));
752 for (i = 1; i <= f; i++)
754 filt[i] = cos(M_PI*dt*i/flen);
757 for (i = 0; i <= f; i++)
761 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
762 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
764 for (s = 0; s < nset; s++)
767 for (i = f; i < n-f; i++)
769 vf = filt[0]*val[s][i];
770 for (j = 1; j <= f; j++)
772 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
774 fluc += sqr(val[s][i] - vf);
778 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
780 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
781 fprintf(stdout, "\n");
786 static void do_fit(FILE *out, int n, gmx_bool bYdy,
787 int ny, real *x0, real **val,
788 int npargs, t_pargs *ppa, const output_env_t oenv)
790 real *c1 = NULL, *sig = NULL, *fitparm;
791 real tendfit, tbeginfit;
792 int i, efitfn, nparm;
794 efitfn = get_acffitfn();
795 nparm = nfp_ffn[efitfn];
796 fprintf(out, "Will fit to the following function:\n");
797 fprintf(out, "%s\n", longs_ffn[efitfn]);
803 fprintf(out, "Using two columns as y and sigma values\n");
809 if (opt2parg_bSet("-beginfit", npargs, ppa))
811 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
817 if (opt2parg_bSet("-endfit", npargs, ppa))
819 tendfit = opt2parg_real("-endfit", npargs, ppa);
826 snew(fitparm, nparm);
838 fitparm[1] = 0.5*c1[0];
842 fitparm[0] = fitparm[2] = 0.5*c1[0];
848 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
855 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
863 fprintf(out, "Warning: don't know how to initialize the parameters\n");
864 for (i = 0; (i < nparm); i++)
869 fprintf(out, "Starting parameters:\n");
870 for (i = 0; (i < nparm); i++)
872 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
874 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
875 oenv, bDebugMode(), efitfn, fitparm, 0))
877 for (i = 0; (i < nparm); i++)
879 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
884 fprintf(out, "No solution was found\n");
888 static void do_ballistic(const char *balFile, int nData,
889 real *t, real **val, int nSet,
890 real balTime, int nBalExp,
891 const output_env_t oenv)
893 double **ctd = NULL, *td = NULL;
894 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
895 static char *leg[] = {"Ac'(t)"};
899 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
904 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
905 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
907 for (set = 0; set < nSet; set++)
909 snew(ctd[set], nData);
910 for (i = 0; i < nData; i++)
912 ctd[set][i] = (double)val[set][i];
915 td[i] = (double)t[i];
919 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
922 for (i = 0; i < nData; i++)
924 fprintf(fp, " %g", t[i]);
925 for (set = 0; set < nSet; set++)
927 fprintf(fp, " %g", ctd[set][i]);
933 for (set = 0; set < nSet; set++)
942 printf("Number of data points is less than the number of parameters to fit\n."
943 "The system is underdetermined, hence no ballistic term can be found.\n\n");
947 static void do_geminate(const char *gemFile, int nData,
948 real *t, real **val, int nSet,
949 const real D, const real rcut, const real balTime,
950 const int nFitPoints, const real begFit, const real endFit,
951 const output_env_t oenv)
953 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
954 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
955 begFit, endFit, balTime, 1);
956 const char *leg[] = {"Ac\\sgem\\N(t)"};
964 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
965 xvgr_legend(fp, asize(leg), leg, oenv);
967 for (set = 0; set < nSet; set++)
969 snew(ctd[set], nData);
970 snew(ctdGem[set], nData);
971 for (i = 0; i < nData; i++)
973 ctd[set][i] = (double)val[set][i];
976 td[i] = (double)t[i];
979 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
982 for (i = 0; i < nData; i++)
984 fprintf(fp, " %g", t[i]);
985 for (set = 0; set < nSet; set++)
987 fprintf(fp, " %g", ctdGem[set][i]);
992 for (set = 0; set < nSet; set++)
1002 int gmx_analyze(int argc, char *argv[])
1004 static const char *desc[] = {
1005 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1006 "A line in the input file may start with a time",
1007 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1008 "Multiple sets can also be",
1009 "read when they are separated by & (option [TT]-n[tt]);",
1010 "in this case only one [IT]y[it]-value is read from each line.",
1011 "All lines starting with # and @ are skipped.",
1012 "All analyses can also be done for the derivative of a set",
1013 "(option [TT]-d[tt]).[PAR]",
1015 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1016 "points are equidistant in time.[PAR]",
1018 "[THISMODULE] always shows the average and standard deviation of each",
1019 "set, as well as the relative deviation of the third",
1020 "and fourth cumulant from those of a Gaussian distribution with the same",
1021 "standard deviation.[PAR]",
1023 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1024 "Be sure that the time interval between data points is",
1025 "much shorter than the time scale of the autocorrelation.[PAR]",
1027 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1028 "i/2 periods. The formula is:[BR]"
1029 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1030 "This is useful for principal components obtained from covariance",
1031 "analysis, since the principal components of random diffusion are",
1032 "pure cosines.[PAR]",
1034 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1036 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1038 "Option [TT]-av[tt] produces the average over the sets.",
1039 "Error bars can be added with the option [TT]-errbar[tt].",
1040 "The errorbars can represent the standard deviation, the error",
1041 "(assuming the points are independent) or the interval containing",
1042 "90% of the points, by discarding 5% of the points at the top and",
1045 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1046 "A set is divided in a number of blocks and averages are calculated for",
1047 "each block. The error for the total average is calculated from",
1048 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1049 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1050 "These errors are plotted as a function of the block size.",
1051 "Also an analytical block average curve is plotted, assuming",
1052 "that the autocorrelation is a sum of two exponentials.",
1053 "The analytical curve for the block average is:[BR]",
1054 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1055 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1056 "where T is the total time.",
1057 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1058 "When the actual block average is very close to the analytical curve,",
1059 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1060 "The complete derivation is given in",
1061 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1063 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1064 "component from a hydrogen bond autocorrelation function by the fitting",
1065 "of a sum of exponentials, as described in e.g.",
1066 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1067 "is the one with the most negative coefficient in the exponential,",
1068 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1069 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1071 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1072 "(and optionally kD) to the hydrogen bond autocorrelation function",
1073 "according to the reversible geminate recombination model. Removal of",
1074 "the ballistic component first is strongly advised. The model is presented in",
1075 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1077 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1078 "of each set and over all sets with respect to a filtered average.",
1079 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1080 "to len/2. len is supplied with the option [TT]-filter[tt].",
1081 "This filter reduces oscillations with period len/2 and len by a factor",
1082 "of 0.79 and 0.33 respectively.[PAR]",
1084 "Option [TT]-g[tt] fits the data to the function given with option",
1085 "[TT]-fitfn[tt].[PAR]",
1087 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1088 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1089 "zero or with a negative value are ignored.[PAR]"
1091 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1092 "on output from [gmx-hbond]. The input file can be taken directly",
1093 "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1095 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1096 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1097 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1098 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1099 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1100 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1102 /* must correspond to enum avbar* declared at beginning of file */
1103 static const char *avbar_opt[avbarNR+1] = {
1104 NULL, "none", "stddev", "error", "90", NULL
1108 { "-time", FALSE, etBOOL, {&bHaveT},
1109 "Expect a time in the input" },
1110 { "-b", FALSE, etREAL, {&tb},
1111 "First time to read from set" },
1112 { "-e", FALSE, etREAL, {&te},
1113 "Last time to read from set" },
1114 { "-n", FALSE, etINT, {&nsets_in},
1115 "Read this number of sets separated by &" },
1116 { "-d", FALSE, etBOOL, {&bDer},
1117 "Use the derivative" },
1118 { "-dp", FALSE, etINT, {&d},
1119 "HIDDENThe derivative is the difference over this number of points" },
1120 { "-bw", FALSE, etREAL, {&binwidth},
1121 "Binwidth for the distribution" },
1122 { "-errbar", FALSE, etENUM, {avbar_opt},
1123 "Error bars for [TT]-av[tt]" },
1124 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1125 "Integrate data function(s) numerically using trapezium rule" },
1126 { "-aver_start", FALSE, etREAL, {&aver_start},
1127 "Start averaging the integral from here" },
1128 { "-xydy", FALSE, etBOOL, {&bXYdy},
1129 "Interpret second data set as error in the y values for integrating" },
1130 { "-regression", FALSE, etBOOL, {&bRegression},
1131 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1132 { "-luzar", FALSE, etBOOL, {&bLuzar},
1133 "Do a Luzar and Chandler analysis on a correlation function and "
1134 "related as produced by [gmx-hbond]. When in addition the "
1135 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1136 "interpreted as errors in c(t) and n(t)." },
1137 { "-temp", FALSE, etREAL, {&temp},
1138 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1139 { "-fitstart", FALSE, etREAL, {&fit_start},
1140 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1141 { "-fitend", FALSE, etREAL, {&fit_end},
1142 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1143 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1144 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1145 { "-nbmin", FALSE, etINT, {&nb_min},
1146 "HIDDENMinimum number of blocks for block averaging" },
1147 { "-resol", FALSE, etINT, {&resol},
1148 "HIDDENResolution for the block averaging, block size increases with"
1149 " a factor 2^(1/resol)" },
1150 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1151 "HIDDENAlways use a single exponential fit for the error estimate" },
1152 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1153 "HIDDENAllow a negative long-time correlation" },
1154 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1155 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1156 { "-filter", FALSE, etREAL, {&filtlen},
1157 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1158 { "-power", FALSE, etBOOL, {&bPower},
1159 "Fit data to: b t^a" },
1160 { "-subav", FALSE, etBOOL, {&bSubAv},
1161 "Subtract the average before autocorrelating" },
1162 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1163 "Calculate one ACF over all sets" },
1164 { "-nbalexp", FALSE, etINT, {&nBalExp},
1165 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1166 { "-baltime", FALSE, etREAL, {&balTime},
1167 "HIDDENTime up to which the ballistic component will be fitted" },
1168 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1169 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1170 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1171 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1172 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1173 /* "What type of gminate recombination to use"}, */
1174 /* { "-D", FALSE, etREAL, {&diffusion}, */
1175 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1177 #define NPA asize(pa)
1179 FILE *out, *out_fit;
1180 int n, nlast, s, nset, i, j = 0;
1181 real **val, *t, dt, tot, error;
1182 double *av, *sig, cum1, cum2, cum3, cum4, db;
1183 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1187 { efXVG, "-f", "graph", ffREAD },
1188 { efXVG, "-ac", "autocorr", ffOPTWR },
1189 { efXVG, "-msd", "msd", ffOPTWR },
1190 { efXVG, "-cc", "coscont", ffOPTWR },
1191 { efXVG, "-dist", "distr", ffOPTWR },
1192 { efXVG, "-av", "average", ffOPTWR },
1193 { efXVG, "-ee", "errest", ffOPTWR },
1194 { efXVG, "-bal", "ballisitc", ffOPTWR },
1195 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1196 { efLOG, "-g", "fitlog", ffOPTWR }
1198 #define NFILE asize(fnm)
1204 ppa = add_acf_pargs(&npargs, pa);
1206 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1207 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1212 acfile = opt2fn_null("-ac", NFILE, fnm);
1213 msdfile = opt2fn_null("-msd", NFILE, fnm);
1214 ccfile = opt2fn_null("-cc", NFILE, fnm);
1215 distfile = opt2fn_null("-dist", NFILE, fnm);
1216 avfile = opt2fn_null("-av", NFILE, fnm);
1217 eefile = opt2fn_null("-ee", NFILE, fnm);
1218 balfile = opt2fn_null("-bal", NFILE, fnm);
1219 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1220 /* When doing autocorrelation we don't want a fitlog for fitting
1221 * the function itself (not the acf) when the user did not ask for it.
1223 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1225 fitfile = opt2fn("-g", NFILE, fnm);
1229 fitfile = opt2fn_null("-g", NFILE, fnm);
1232 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1233 opt2parg_bSet("-b", npargs, ppa), tb,
1234 opt2parg_bSet("-e", npargs, ppa), te,
1235 nsets_in, &nset, &n, &dt, &t);
1236 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1240 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1243 for (s = 0; s < nset; s++)
1245 for (i = 0; (i < n); i++)
1247 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1256 printf("Calculating the integral using the trapezium rule\n");
1260 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1261 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1265 for (s = 0; s < nset; s++)
1267 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1268 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1273 if (fitfile != NULL)
1275 out_fit = gmx_ffopen(fitfile, "w");
1276 if (bXYdy && nset >= 2)
1278 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1282 for (s = 0; s < nset; s++)
1284 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1287 gmx_ffclose(out_fit);
1290 printf(" std. dev. relative deviation of\n");
1291 printf(" standard --------- cumulants from those of\n");
1292 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1293 printf(" cum. 3 cum. 4\n");
1296 for (s = 0; (s < nset); s++)
1302 for (i = 0; (i < n); i++)
1307 for (i = 0; (i < n); i++)
1309 db = val[s][i]-cum1;
1312 cum4 += db*db*db*db;
1318 sig[s] = sqrt(cum2);
1321 error = sqrt(cum2/(n-1));
1327 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1328 s+1, av[s], sig[s], error,
1329 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1330 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1336 filter(filtlen, n, nset, val, dt);
1341 out = xvgropen(msdfile, "Mean square displacement",
1342 "time", "MSD (nm\\S2\\N)", oenv);
1343 nlast = (int)(n*frac);
1344 for (s = 0; s < nset; s++)
1346 for (j = 0; j <= nlast; j++)
1350 fprintf(stderr, "\r%d", j);
1353 for (i = 0; i < n-j; i++)
1355 tot += sqr(val[s][i]-val[s][i+j]);
1358 fprintf(out, " %g %8g\n", dt*j, tot);
1362 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1366 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1370 plot_coscont(ccfile, n, nset, val, oenv);
1375 histogram(distfile, binwidth, n, nset, val, oenv);
1379 average(avfile, nenum(avbar_opt), n, nset, val, t);
1383 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1384 bEeFitAc, bEESEF, bEENLC, oenv);
1388 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1391 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1392 /* nFitPoints, fit_start, fit_end, oenv); */
1395 power_fit(n, nset, val, t);
1402 for (s = 0; s < nset; s++)
1404 for (i = 0; i < n; i++)
1410 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1411 eacNormal, bAverCorr);
1416 regression_analysis(n, bXYdy, t, nset, val);
1421 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1424 view_all(oenv, NFILE, fnm);