2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/geminate.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/legacyheaders/copyrite.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/readinp.h"
54 #include "gromacs/legacyheaders/txtdump.h"
55 #include "gromacs/legacyheaders/typedefs.h"
56 #include "gromacs/legacyheaders/viewit.h"
57 #include "gromacs/linearalgebra/matrix.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/statistics/statistics.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/snprintf.h"
65 /* must correspond to char *avbar_opt[] declared in main() */
67 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
70 static void power_fit(int n, int nset, real **val, real *t)
72 real *x, *y, quality, a, b, r;
80 for (i = 0; i < n; i++)
90 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
91 for (i = 0; i < n; i++)
97 for (s = 0; s < nset; s++)
100 for (i = 0; i < n && val[s][i] >= 0; i++)
102 y[i] = log(val[s][i]);
106 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
108 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
109 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
110 s+1, quality, a, exp(b));
117 static real cosine_content(int nhp, int n, real *y)
118 /* Assumes n equidistant points */
120 double fac, cosyint, yyint;
128 fac = M_PI*nhp/(n-1);
132 for (i = 0; i < n; i++)
134 cosyint += cos(fac*i)*y[i];
138 return 2*cosyint*cosyint/(n*yyint);
141 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
142 const output_env_t oenv)
148 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
151 for (s = 0; s < nset; s++)
153 cc = cosine_content(s+1, n, val[s]);
154 fprintf(fp, " %d %g\n", s+1, cc);
155 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
158 fprintf(stdout, "\n");
163 static void regression_analysis(int n, gmx_bool bXYdy,
164 real *x, int nset, real **val)
166 real S, chi2, a, b, da, db, r = 0;
169 if (bXYdy || (nset == 1))
171 printf("Fitting data to a function f(x) = ax + b\n");
172 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
173 printf("Error estimates will be given if w_i (sigma) values are given\n");
174 printf("(use option -xydy).\n\n");
177 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
179 gmx_fatal(FARGS, "Error fitting the data: %s",
180 gmx_stats_message(ok));
185 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
187 gmx_fatal(FARGS, "Error fitting the data: %s",
188 gmx_stats_message(ok));
192 printf("Chi2 = %g\n", chi2);
193 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
194 printf("Correlation coefficient = %.1f%%\n", 100*r);
198 printf("a = %g +/- %g\n", a, da);
199 printf("b = %g +/- %g\n", b, db);
203 printf("a = %g\n", a);
204 printf("b = %g\n", b);
209 double chi2, *a, **xx, *y;
214 for (j = 0; (j < nset-1); j++)
218 for (i = 0; (i < n); i++)
221 for (j = 1; (j < nset); j++)
223 xx[j-1][i] = val[j][i];
227 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
228 printf("Fitting %d data points in %d sets\n", n, nset-1);
229 printf("chi2 = %g\n", chi2);
231 for (i = 0; (i < nset-1); i++)
243 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
244 const output_env_t oenv)
254 for (s = 0; s < nset; s++)
256 for (i = 0; i < n; i++)
262 else if (val[s][i] > max)
269 min = binwidth*floor(min/binwidth);
270 max = binwidth*ceil(max/binwidth);
277 nbin = (int)((max - min)/binwidth + 0.5) + 1;
278 fprintf(stderr, "Making distributions with %d bins\n", nbin);
280 fp = xvgropen(distfile, "Distribution", "", "", oenv);
281 for (s = 0; s < nset; s++)
283 for (i = 0; i < nbin; i++)
287 for (i = 0; i < n; i++)
289 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
291 for (i = 0; i < nbin; i++)
293 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
297 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
303 static int real_comp(const void *a, const void *b)
305 real dif = *(real *)a - *(real *)b;
321 static void average(const char *avfile, int avbar_opt,
322 int n, int nset, real **val, real *t)
329 fp = gmx_ffopen(avfile, "w");
330 if ((avbar_opt == avbarERROR) && (nset == 1))
332 avbar_opt = avbarNONE;
334 if (avbar_opt != avbarNONE)
336 if (avbar_opt == avbar90)
339 fprintf(fp, "@TYPE xydydy\n");
340 edge = (int)(nset*0.05+0.5);
341 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
342 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
346 fprintf(fp, "@TYPE xydy\n");
350 for (i = 0; i < n; i++)
353 for (s = 0; s < nset; s++)
358 fprintf(fp, " %g %g", t[i], av);
360 if (avbar_opt != avbarNONE)
362 if (avbar_opt == avbar90)
364 for (s = 0; s < nset; s++)
368 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
369 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
373 for (s = 0; s < nset; s++)
375 var += sqr(val[s][i]-av);
377 if (avbar_opt == avbarSTDDEV)
379 err = sqrt(var/nset);
383 err = sqrt(var/(nset*(nset-1)));
385 fprintf(fp, " %g", err);
392 if (avbar_opt == avbar90)
398 /*! \brief Compute final error estimate.
400 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
402 static real optimal_error_estimate(double sigma, double fitparm[], real tTotal)
404 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
405 if ((tTotal <= 0) || (ss <= 0))
407 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
412 return sigma*sqrt(2*ss/tTotal);
415 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
416 int nset, double *av, double *sig, real **val, real dt,
417 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
418 const output_env_t oenv)
421 int bs, prev_bs, nbs, nb;
426 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
428 real ee, a, tau1, tau2;
432 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
437 fp = xvgropen(eefile, "Error estimates",
438 "Block size (time)", "Error estimate", oenv);
439 if (output_env_get_print_xvgr_codes(oenv))
442 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
446 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
449 spacing = pow(2, 1.0/resol);
453 for (s = 0; s < nset; s++)
465 for (i = 0; i < nb; i++)
468 for (j = 0; j < bs; j++)
470 blav += val[s][bs*i+j];
472 var += sqr(av[s] - blav/bs);
481 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
498 for (i = 0; i < nbs/2; i++)
501 tbs[i] = tbs[nbs-1-i];
504 ybs[i] = ybs[nbs-1-i];
507 /* The initial slope of the normalized ybs^2 is 1.
508 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
509 * From this we take our initial guess for tau1.
518 while (i < nbs - 1 &&
519 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
523 fprintf(stdout, "Data set %d has strange time correlations:\n"
524 "the std. error using single points is larger than that of blocks of 2 points\n"
525 "The error estimate might be inaccurate, check the fit\n",
527 /* Use the total time as tau for the fitting weights */
528 tau_sig = (n - 1)*dt;
537 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
540 /* Generate more or less appropriate sigma's,
541 * also taking the density of points into account.
543 for (i = 0; i < nbs; i++)
547 dens = tbs[1]/tbs[0] - 1;
551 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
555 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
557 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
562 fitparm[0] = tau1_est;
564 /* We set the initial guess for tau2
565 * to halfway between tau1_est and the total time (on log scale).
567 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
568 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
569 bDebugMode(), effnERREST, fitparm, 0,
572 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
573 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
577 if (fitparm[2] > (n-1)*dt)
580 "Warning: tau2 is longer than the length of the data (%g)\n"
581 " the statistics might be bad\n",
586 fprintf(stdout, "a fitted parameter is negative\n");
588 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
589 optimal_error_estimate(sig[s], fitparm, n*dt),
590 fitparm[1], fitparm[0], fitparm[2]);
591 /* Do a fit with tau2 fixed at the total time.
592 * One could also choose any other large value for tau2.
594 fitparm[0] = tau1_est;
596 fitparm[2] = (n-1)*dt;
597 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
598 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
599 effnERREST, fitparm, 4, NULL);
601 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
602 || (fitparm[1] > 1 && !bAllowNegLTCorr))
606 fprintf(stdout, "a fitted parameter is negative\n");
607 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
608 optimal_error_estimate(sig[s], fitparm, n*dt),
609 fitparm[1], fitparm[0], fitparm[2]);
611 /* Do a single exponential fit */
612 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
613 fitparm[0] = tau1_est;
616 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
617 effnERREST, fitparm, 6, NULL);
620 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
625 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
626 s+1, ee, a, tau1, tau2);
627 if (output_env_get_xvg_format(oenv) == exvgXMGR)
629 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
630 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
631 optimal_error_estimate(sig[s], fitparm, n*dt));
633 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
635 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
636 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
637 optimal_error_estimate(sig[s], fitparm, n*dt));
639 for (i = 0; i < nbs; i++)
641 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
642 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
652 for (i = 0; i < n; i++)
654 ac[i] = val[s][i] - av[s];
664 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
665 dt, eacNormal, 1, FALSE, TRUE,
666 FALSE, 0, 0, effnNONE);
670 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
672 for (i = 1; i <= fitlen/2; i++)
678 /* Generate more or less appropriate sigma's */
679 for (i = 0; i <= fitlen; i++)
681 fitsig[i] = sqrt(acint + dt*i);
684 ac_fit[0] = 0.5*acint;
686 ac_fit[2] = 10*acint;
687 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
688 bDebugMode(), effnEXPEXP, ac_fit, 0, NULL);
689 ac_fit[3] = 1 - ac_fit[1];
691 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
692 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
693 ac_fit[1], ac_fit[0], ac_fit[2]);
695 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
696 for (i = 0; i < nbs; i++)
698 fprintf(fp, "%g %g\n", tbs[i],
699 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
706 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
715 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
716 gmx_bool bError, real fit_start, real smooth_tail_start,
717 const output_env_t oenv)
719 const real tol = 1e-8;
724 please_cite(stdout, "Spoel2006b");
726 /* Compute negative derivative k(t) = -dc(t)/dt */
730 compute_derivative(nn, time, val[0], kt);
731 for (j = 0; (j < nn); j++)
738 for (j = 0; (j < nn); j++)
740 d2 += sqr(kt[j] - val[3][j]);
742 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
744 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
745 temp, smooth_tail_start, oenv);
750 analyse_corr(nn, time, val[0], val[2], val[4],
751 val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
755 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
756 printf("Not doing anything. Sorry.\n");
760 static void filter(real flen, int n, int nset, real **val, real dt)
763 double *filt, sum, vf, fluc, fluctot;
765 f = (int)(flen/(2*dt));
769 for (i = 1; i <= f; i++)
771 filt[i] = cos(M_PI*dt*i/flen);
774 for (i = 0; i <= f; i++)
778 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
779 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
781 for (s = 0; s < nset; s++)
784 for (i = f; i < n-f; i++)
786 vf = filt[0]*val[s][i];
787 for (j = 1; j <= f; j++)
789 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
791 fluc += sqr(val[s][i] - vf);
795 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
797 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
798 fprintf(stdout, "\n");
803 static void do_fit(FILE *out, int n, gmx_bool bYdy,
804 int ny, real *x0, real **val,
805 int npargs, t_pargs *ppa, const output_env_t oenv,
806 const char *fn_fitted)
808 real *c1 = NULL, *sig = NULL;
810 real tendfit, tbeginfit;
811 int i, efitfn, nparm;
813 efitfn = get_acffitfn();
814 nparm = effnNparams(efitfn);
815 fprintf(out, "Will fit to the following function:\n");
816 fprintf(out, "%s\n", effnDescription(efitfn));
822 fprintf(out, "Using two columns as y and sigma values\n");
828 if (opt2parg_bSet("-beginfit", npargs, ppa))
830 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
836 if (opt2parg_bSet("-endfit", npargs, ppa))
838 tendfit = opt2parg_real("-endfit", npargs, ppa);
845 snew(fitparm, nparm);
857 fitparm[1] = 0.5*c1[0];
861 fitparm[0] = fitparm[2] = 0.5*c1[0];
867 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
874 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
882 fprintf(out, "Warning: don't know how to initialize the parameters\n");
883 for (i = 0; (i < nparm); i++)
888 fprintf(out, "Starting parameters:\n");
889 for (i = 0; (i < nparm); i++)
891 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
893 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
894 oenv, bDebugMode(), efitfn, fitparm, 0,
897 for (i = 0; (i < nparm); i++)
899 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
904 fprintf(out, "No solution was found\n");
908 static void do_ballistic(const char *balFile, int nData,
909 real *t, real **val, int nSet,
910 real balTime, int nBalExp,
911 const output_env_t oenv)
913 double **ctd = NULL, *td = NULL;
914 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
915 static char *leg[] = {"Ac'(t)"};
919 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
924 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
925 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
927 for (set = 0; set < nSet; set++)
929 snew(ctd[set], nData);
930 for (i = 0; i < nData; i++)
932 ctd[set][i] = (double)val[set][i];
935 td[i] = (double)t[i];
939 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
942 for (i = 0; i < nData; i++)
944 fprintf(fp, " %g", t[i]);
945 for (set = 0; set < nSet; set++)
947 fprintf(fp, " %g", ctd[set][i]);
953 for (set = 0; set < nSet; set++)
963 printf("Number of data points is less than the number of parameters to fit\n."
964 "The system is underdetermined, hence no ballistic term can be found.\n\n");
968 static void do_geminate(const char *gemFile, int nData,
969 real *t, real **val, int nSet,
970 const real D, const real rcut, const real balTime,
971 const int nFitPoints, const real begFit, const real endFit,
972 const output_env_t oenv)
974 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
975 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
976 begFit, endFit, balTime, 1);
977 const char *leg[] = {"Ac\\sgem\\N(t)"};
985 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
986 xvgr_legend(fp, asize(leg), leg, oenv);
988 for (set = 0; set < nSet; set++)
990 snew(ctd[set], nData);
991 snew(ctdGem[set], nData);
992 for (i = 0; i < nData; i++)
994 ctd[set][i] = (double)val[set][i];
997 td[i] = (double)t[i];
1000 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
1003 for (i = 0; i < nData; i++)
1005 fprintf(fp, " %g", t[i]);
1006 for (set = 0; set < nSet; set++)
1008 fprintf(fp, " %g", ctdGem[set][i]);
1013 for (set = 0; set < nSet; set++)
1024 static void print_fitted_function(const char *fitfile,
1025 const char *fn_fitted,
1035 FILE *out_fit = gmx_ffopen(fitfile, "w");
1036 if (bXYdy && nset >= 2)
1038 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
1045 if (NULL != fn_fitted)
1047 buflen = strlen(fn_fitted)+32;
1049 strncpy(buf2, fn_fitted, buflen);
1050 buf2[strlen(buf2)-4] = '\0';
1052 for (s = 0; s < nset; s++)
1055 if (NULL != fn_fitted)
1058 snprintf(buf, n, "%s_%d.xvg", buf2, s);
1060 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
1065 gmx_ffclose(out_fit);
1068 int gmx_analyze(int argc, char *argv[])
1070 static const char *desc[] = {
1071 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1072 "A line in the input file may start with a time",
1073 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1074 "Multiple sets can also be",
1075 "read when they are separated by & (option [TT]-n[tt]);",
1076 "in this case only one [IT]y[it]-value is read from each line.",
1077 "All lines starting with # and @ are skipped.",
1078 "All analyses can also be done for the derivative of a set",
1079 "(option [TT]-d[tt]).[PAR]",
1081 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1082 "points are equidistant in time.[PAR]",
1084 "[THISMODULE] always shows the average and standard deviation of each",
1085 "set, as well as the relative deviation of the third",
1086 "and fourth cumulant from those of a Gaussian distribution with the same",
1087 "standard deviation.[PAR]",
1089 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1090 "Be sure that the time interval between data points is",
1091 "much shorter than the time scale of the autocorrelation.[PAR]",
1093 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1094 "i/2 periods. The formula is::",
1096 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
1098 "This is useful for principal components obtained from covariance",
1099 "analysis, since the principal components of random diffusion are",
1100 "pure cosines.[PAR]",
1102 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1104 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1106 "Option [TT]-av[tt] produces the average over the sets.",
1107 "Error bars can be added with the option [TT]-errbar[tt].",
1108 "The errorbars can represent the standard deviation, the error",
1109 "(assuming the points are independent) or the interval containing",
1110 "90% of the points, by discarding 5% of the points at the top and",
1113 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1114 "A set is divided in a number of blocks and averages are calculated for",
1115 "each block. The error for the total average is calculated from",
1116 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1117 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1118 "These errors are plotted as a function of the block size.",
1119 "Also an analytical block average curve is plotted, assuming",
1120 "that the autocorrelation is a sum of two exponentials.",
1121 "The analytical curve for the block average is::",
1123 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1124 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1126 "where T is the total time.",
1127 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1128 "When the actual block average is very close to the analytical curve,",
1129 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1130 "The complete derivation is given in",
1131 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1133 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1134 "component from a hydrogen bond autocorrelation function by the fitting",
1135 "of a sum of exponentials, as described in e.g.",
1136 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1137 "is the one with the most negative coefficient in the exponential,",
1138 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1139 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1141 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1142 "(and optionally kD) to the hydrogen bond autocorrelation function",
1143 "according to the reversible geminate recombination model. Removal of",
1144 "the ballistic component first is strongly advised. The model is presented in",
1145 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1147 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1148 "of each set and over all sets with respect to a filtered average.",
1149 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1150 "to len/2. len is supplied with the option [TT]-filter[tt].",
1151 "This filter reduces oscillations with period len/2 and len by a factor",
1152 "of 0.79 and 0.33 respectively.[PAR]",
1154 "Option [TT]-g[tt] fits the data to the function given with option",
1155 "[TT]-fitfn[tt].[PAR]",
1157 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1158 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1159 "zero or with a negative value are ignored.[PAR]"
1161 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1162 "on output from [gmx-hbond]. The input file can be taken directly",
1163 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1164 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1165 "curves that make sense in the context of molecular dynamics, mainly",
1166 "exponential curves. More information is in the manual. To check the output",
1167 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1168 "original data and the fitted function to a new data file. The fitting",
1169 "parameters are stored as comment in the output file."
1171 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1172 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1173 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1174 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1175 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1176 static real temp = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1178 /* must correspond to enum avbar* declared at beginning of file */
1179 static const char *avbar_opt[avbarNR+1] = {
1180 NULL, "none", "stddev", "error", "90", NULL
1184 { "-time", FALSE, etBOOL, {&bHaveT},
1185 "Expect a time in the input" },
1186 { "-b", FALSE, etREAL, {&tb},
1187 "First time to read from set" },
1188 { "-e", FALSE, etREAL, {&te},
1189 "Last time to read from set" },
1190 { "-n", FALSE, etINT, {&nsets_in},
1191 "Read this number of sets separated by &" },
1192 { "-d", FALSE, etBOOL, {&bDer},
1193 "Use the derivative" },
1194 { "-dp", FALSE, etINT, {&d},
1195 "HIDDENThe derivative is the difference over this number of points" },
1196 { "-bw", FALSE, etREAL, {&binwidth},
1197 "Binwidth for the distribution" },
1198 { "-errbar", FALSE, etENUM, {avbar_opt},
1199 "Error bars for [TT]-av[tt]" },
1200 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1201 "Integrate data function(s) numerically using trapezium rule" },
1202 { "-aver_start", FALSE, etREAL, {&aver_start},
1203 "Start averaging the integral from here" },
1204 { "-xydy", FALSE, etBOOL, {&bXYdy},
1205 "Interpret second data set as error in the y values for integrating" },
1206 { "-regression", FALSE, etBOOL, {&bRegression},
1207 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1208 { "-luzar", FALSE, etBOOL, {&bLuzar},
1209 "Do a Luzar and Chandler analysis on a correlation function and "
1210 "related as produced by [gmx-hbond]. When in addition the "
1211 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1212 "interpreted as errors in c(t) and n(t)." },
1213 { "-temp", FALSE, etREAL, {&temp},
1214 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1215 { "-fitstart", FALSE, etREAL, {&fit_start},
1216 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1217 { "-fitend", FALSE, etREAL, {&fit_end},
1218 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1219 { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1220 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1221 { "-nbmin", FALSE, etINT, {&nb_min},
1222 "HIDDENMinimum number of blocks for block averaging" },
1223 { "-resol", FALSE, etINT, {&resol},
1224 "HIDDENResolution for the block averaging, block size increases with"
1225 " a factor 2^(1/resol)" },
1226 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1227 "HIDDENAlways use a single exponential fit for the error estimate" },
1228 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1229 "HIDDENAllow a negative long-time correlation" },
1230 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1231 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1232 { "-filter", FALSE, etREAL, {&filtlen},
1233 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1234 { "-power", FALSE, etBOOL, {&bPower},
1235 "Fit data to: b t^a" },
1236 { "-subav", FALSE, etBOOL, {&bSubAv},
1237 "Subtract the average before autocorrelating" },
1238 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1239 "Calculate one ACF over all sets" },
1240 { "-nbalexp", FALSE, etINT, {&nBalExp},
1241 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1242 { "-baltime", FALSE, etREAL, {&balTime},
1243 "HIDDENTime up to which the ballistic component will be fitted" },
1244 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1245 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1246 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1247 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1248 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1249 /* "What type of gminate recombination to use"}, */
1250 /* { "-D", FALSE, etREAL, {&diffusion}, */
1251 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1253 #define NPA asize(pa)
1255 FILE *out, *out_fit;
1256 int n, nlast, s, nset, i, j = 0;
1257 real **val, *t, dt, tot, error;
1258 double *av, *sig, cum1, cum2, cum3, cum4, db;
1259 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1263 { efXVG, "-f", "graph", ffREAD },
1264 { efXVG, "-ac", "autocorr", ffOPTWR },
1265 { efXVG, "-msd", "msd", ffOPTWR },
1266 { efXVG, "-cc", "coscont", ffOPTWR },
1267 { efXVG, "-dist", "distr", ffOPTWR },
1268 { efXVG, "-av", "average", ffOPTWR },
1269 { efXVG, "-ee", "errest", ffOPTWR },
1270 { efXVG, "-bal", "ballisitc", ffOPTWR },
1271 { efXVG, "-fitted", "fitted", ffOPTWR },
1272 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1273 { efLOG, "-g", "fitlog", ffOPTWR }
1275 #define NFILE asize(fnm)
1281 ppa = add_acf_pargs(&npargs, pa);
1283 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1284 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1289 acfile = opt2fn_null("-ac", NFILE, fnm);
1290 msdfile = opt2fn_null("-msd", NFILE, fnm);
1291 ccfile = opt2fn_null("-cc", NFILE, fnm);
1292 distfile = opt2fn_null("-dist", NFILE, fnm);
1293 avfile = opt2fn_null("-av", NFILE, fnm);
1294 eefile = opt2fn_null("-ee", NFILE, fnm);
1295 balfile = opt2fn_null("-bal", NFILE, fnm);
1296 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1297 /* When doing autocorrelation we don't want a fitlog for fitting
1298 * the function itself (not the acf) when the user did not ask for it.
1300 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1302 fitfile = opt2fn("-g", NFILE, fnm);
1306 fitfile = opt2fn_null("-g", NFILE, fnm);
1309 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1310 opt2parg_bSet("-b", npargs, ppa), tb,
1311 opt2parg_bSet("-e", npargs, ppa), te,
1312 nsets_in, &nset, &n, &dt, &t);
1313 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1317 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1320 for (s = 0; s < nset; s++)
1322 for (i = 0; (i < n); i++)
1324 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1333 printf("Calculating the integral using the trapezium rule\n");
1337 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1338 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1342 for (s = 0; s < nset; s++)
1344 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1345 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1350 if (fitfile != NULL)
1352 print_fitted_function(fitfile,
1353 opt2fn_null("-fitted", NFILE, fnm),
1360 printf(" std. dev. relative deviation of\n");
1361 printf(" standard --------- cumulants from those of\n");
1362 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1363 printf(" cum. 3 cum. 4\n");
1366 for (s = 0; (s < nset); s++)
1372 for (i = 0; (i < n); i++)
1377 for (i = 0; (i < n); i++)
1379 db = val[s][i]-cum1;
1382 cum4 += db*db*db*db;
1388 sig[s] = sqrt(cum2);
1391 error = sqrt(cum2/(n-1));
1397 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1398 s+1, av[s], sig[s], error,
1399 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1400 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1406 filter(filtlen, n, nset, val, dt);
1411 out = xvgropen(msdfile, "Mean square displacement",
1412 "time", "MSD (nm\\S2\\N)", oenv);
1413 nlast = (int)(n*frac);
1414 for (s = 0; s < nset; s++)
1416 for (j = 0; j <= nlast; j++)
1420 fprintf(stderr, "\r%d", j);
1423 for (i = 0; i < n-j; i++)
1425 tot += sqr(val[s][i]-val[s][i+j]);
1428 fprintf(out, " %g %8g\n", dt*j, tot);
1432 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1436 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1440 plot_coscont(ccfile, n, nset, val, oenv);
1445 histogram(distfile, binwidth, n, nset, val, oenv);
1449 average(avfile, nenum(avbar_opt), n, nset, val, t);
1453 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1454 bEeFitAc, bEESEF, bEENLC, oenv);
1458 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1461 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1462 /* nFitPoints, fit_start, fit_end, oenv); */
1465 power_fit(n, nset, val, t);
1472 for (s = 0; s < nset; s++)
1474 for (i = 0; i < n; i++)
1480 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1481 eacNormal, bAverCorr);
1486 regression_analysis(n, bXYdy, t, nset, val);
1491 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1494 view_all(oenv, NFILE, fnm);