2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/correlationfunctions/autocorr.h"
45 #include "gromacs/correlationfunctions/expfit.h"
46 #include "gromacs/correlationfunctions/integrate.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/geminate.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/legacyheaders/copyrite.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/readinp.h"
54 #include "gromacs/legacyheaders/txtdump.h"
55 #include "gromacs/legacyheaders/typedefs.h"
56 #include "gromacs/legacyheaders/viewit.h"
57 #include "gromacs/linearalgebra/matrix.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/statistics/statistics.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/snprintf.h"
65 /* must correspond to char *avbar_opt[] declared in main() */
67 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
70 static void power_fit(int n, int nset, real **val, real *t)
72 real *x, *y, quality, a, b, r;
80 for (i = 0; i < n; i++)
90 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
91 for (i = 0; i < n; i++)
97 for (s = 0; s < nset; s++)
100 for (i = 0; i < n && val[s][i] >= 0; i++)
102 y[i] = log(val[s][i]);
106 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
108 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
109 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
110 s+1, quality, a, exp(b));
117 static real cosine_content(int nhp, int n, real *y)
118 /* Assumes n equidistant points */
120 double fac, cosyint, yyint;
128 fac = M_PI*nhp/(n-1);
132 for (i = 0; i < n; i++)
134 cosyint += cos(fac*i)*y[i];
138 return 2*cosyint*cosyint/(n*yyint);
141 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
142 const output_env_t oenv)
148 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
151 for (s = 0; s < nset; s++)
153 cc = cosine_content(s+1, n, val[s]);
154 fprintf(fp, " %d %g\n", s+1, cc);
155 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
158 fprintf(stdout, "\n");
163 static void regression_analysis(int n, gmx_bool bXYdy,
164 real *x, int nset, real **val)
166 real S, chi2, a, b, da, db, r = 0;
169 if (bXYdy || (nset == 1))
171 printf("Fitting data to a function f(x) = ax + b\n");
172 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
173 printf("Error estimates will be given if w_i (sigma) values are given\n");
174 printf("(use option -xydy).\n\n");
177 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
179 gmx_fatal(FARGS, "Error fitting the data: %s",
180 gmx_stats_message(ok));
185 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
187 gmx_fatal(FARGS, "Error fitting the data: %s",
188 gmx_stats_message(ok));
192 printf("Chi2 = %g\n", chi2);
193 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
194 printf("Correlation coefficient = %.1f%%\n", 100*r);
198 printf("a = %g +/- %g\n", a, da);
199 printf("b = %g +/- %g\n", b, db);
203 printf("a = %g\n", a);
204 printf("b = %g\n", b);
209 double chi2, *a, **xx, *y;
214 for (j = 0; (j < nset-1); j++)
218 for (i = 0; (i < n); i++)
221 for (j = 1; (j < nset); j++)
223 xx[j-1][i] = val[j][i];
227 chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
228 printf("Fitting %d data points in %d sets\n", n, nset-1);
229 printf("chi2 = %g\n", chi2);
231 for (i = 0; (i < nset-1); i++)
243 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
244 const output_env_t oenv)
254 for (s = 0; s < nset; s++)
256 for (i = 0; i < n; i++)
262 else if (val[s][i] > max)
269 min = binwidth*floor(min/binwidth);
270 max = binwidth*ceil(max/binwidth);
277 nbin = (int)((max - min)/binwidth + 0.5) + 1;
278 fprintf(stderr, "Making distributions with %d bins\n", nbin);
280 fp = xvgropen(distfile, "Distribution", "", "", oenv);
281 for (s = 0; s < nset; s++)
283 for (i = 0; i < nbin; i++)
287 for (i = 0; i < n; i++)
289 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
291 for (i = 0; i < nbin; i++)
293 fprintf(fp, " %g %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
297 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
303 static int real_comp(const void *a, const void *b)
305 real dif = *(real *)a - *(real *)b;
321 static void average(const char *avfile, int avbar_opt,
322 int n, int nset, real **val, real *t)
329 fp = gmx_ffopen(avfile, "w");
330 if ((avbar_opt == avbarERROR) && (nset == 1))
332 avbar_opt = avbarNONE;
334 if (avbar_opt != avbarNONE)
336 if (avbar_opt == avbar90)
339 fprintf(fp, "@TYPE xydydy\n");
340 edge = (int)(nset*0.05+0.5);
341 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
342 " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
346 fprintf(fp, "@TYPE xydy\n");
350 for (i = 0; i < n; i++)
353 for (s = 0; s < nset; s++)
358 fprintf(fp, " %g %g", t[i], av);
360 if (avbar_opt != avbarNONE)
362 if (avbar_opt == avbar90)
364 for (s = 0; s < nset; s++)
368 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
369 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
373 for (s = 0; s < nset; s++)
375 var += sqr(val[s][i]-av);
377 if (avbar_opt == avbarSTDDEV)
379 err = sqrt(var/nset);
383 err = sqrt(var/(nset*(nset-1)));
385 fprintf(fp, " %g", err);
392 if (avbar_opt == avbar90)
398 /*! \brief Compute final error estimate.
400 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
402 static real optimal_error_estimate(double sigma, double fitparm[], real tTotal)
404 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
405 if ((tTotal <= 0) || (ss <= 0))
407 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
412 return sigma*sqrt(2*ss/tTotal);
415 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
416 int nset, double *av, double *sig, real **val, real dt,
417 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
418 const output_env_t oenv)
421 int bs, prev_bs, nbs, nb;
426 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
428 real ee, a, tau1, tau2;
432 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
437 fp = xvgropen(eefile, "Error estimates",
438 "Block size (time)", "Error estimate", oenv);
439 if (output_env_get_print_xvgr_codes(oenv))
442 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
446 xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
449 spacing = pow(2, 1.0/resol);
453 for (s = 0; s < nset; s++)
465 for (i = 0; i < nb; i++)
468 for (j = 0; j < bs; j++)
470 blav += val[s][bs*i+j];
472 var += sqr(av[s] - blav/bs);
481 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
498 for (i = 0; i < nbs/2; i++)
501 tbs[i] = tbs[nbs-1-i];
504 ybs[i] = ybs[nbs-1-i];
507 /* The initial slope of the normalized ybs^2 is 1.
508 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
509 * From this we take our initial guess for tau1.
518 while (i < nbs - 1 &&
519 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
523 fprintf(stdout, "Data set %d has strange time correlations:\n"
524 "the std. error using single points is larger than that of blocks of 2 points\n"
525 "The error estimate might be inaccurate, check the fit\n",
527 /* Use the total time as tau for the fitting weights */
528 tau_sig = (n - 1)*dt;
537 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
540 /* Generate more or less appropriate sigma's,
541 * also taking the density of points into account.
543 for (i = 0; i < nbs; i++)
547 dens = tbs[1]/tbs[0] - 1;
551 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
555 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
557 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
562 fitparm[0] = tau1_est;
564 /* We set the initial guess for tau2
565 * to halfway between tau1_est and the total time (on log scale).
567 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
568 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
569 bDebugMode(), effnERREST, fitparm, 0,
572 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
573 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
577 if (fitparm[2] > (n-1)*dt)
580 "Warning: tau2 is longer than the length of the data (%g)\n"
581 " the statistics might be bad\n",
586 fprintf(stdout, "a fitted parameter is negative\n");
588 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
589 optimal_error_estimate(sig[s], fitparm, n*dt),
590 fitparm[1], fitparm[0], fitparm[2]);
591 /* Do a fit with tau2 fixed at the total time.
592 * One could also choose any other large value for tau2.
594 fitparm[0] = tau1_est;
596 fitparm[2] = (n-1)*dt;
597 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
598 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
599 effnERREST, fitparm, 4, NULL);
601 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
602 || (fitparm[1] > 1 && !bAllowNegLTCorr))
606 fprintf(stdout, "a fitted parameter is negative\n");
607 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
608 optimal_error_estimate(sig[s], fitparm, n*dt),
609 fitparm[1], fitparm[0], fitparm[2]);
611 /* Do a single exponential fit */
612 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
613 fitparm[0] = tau1_est;
616 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
617 effnERREST, fitparm, 6, NULL);
620 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
625 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
626 s+1, ee, a, tau1, tau2);
627 if (output_env_get_xvg_format(oenv) == exvgXMGR)
629 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
630 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
631 optimal_error_estimate(sig[s], fitparm, n*dt));
633 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
635 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
636 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
637 optimal_error_estimate(sig[s], fitparm, n*dt));
639 for (i = 0; i < nbs; i++)
641 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
642 sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
652 for (i = 0; i < n; i++)
654 ac[i] = val[s][i] - av[s];
664 low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
665 dt, eacNormal, 1, FALSE, TRUE,
666 FALSE, 0, 0, effnNONE);
670 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
672 for (i = 1; i <= fitlen/2; i++)
678 /* Generate more or less appropriate sigma's */
679 for (i = 0; i <= fitlen; i++)
681 fitsig[i] = sqrt(acint + dt*i);
684 ac_fit[0] = 0.5*acint;
686 ac_fit[2] = 10*acint;
687 do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
688 bDebugMode(), effnEXPEXP, ac_fit, 0, NULL);
689 ac_fit[3] = 1 - ac_fit[1];
691 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
692 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
693 ac_fit[1], ac_fit[0], ac_fit[2]);
695 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
696 for (i = 0; i < nbs; i++)
698 fprintf(fp, "%g %g\n", tbs[i],
699 sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
706 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
715 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
716 gmx_bool bError, real fit_start)
718 const real tol = 1e-8;
723 please_cite(stdout, "Spoel2006b");
725 /* Compute negative derivative k(t) = -dc(t)/dt */
729 compute_derivative(nn, time, val[0], kt);
730 for (j = 0; (j < nn); j++)
737 for (j = 0; (j < nn); j++)
739 d2 += sqr(kt[j] - val[3][j]);
741 fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
743 analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
749 analyse_corr(nn, time, val[0], val[2], val[4],
750 val[1], val[3], val[5], fit_start, temp);
754 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
755 printf("Not doing anything. Sorry.\n");
759 static void filter(real flen, int n, int nset, real **val, real dt)
762 double *filt, sum, vf, fluc, fluctot;
764 f = (int)(flen/(2*dt));
768 for (i = 1; i <= f; i++)
770 filt[i] = cos(M_PI*dt*i/flen);
773 for (i = 0; i <= f; i++)
777 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
778 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
780 for (s = 0; s < nset; s++)
783 for (i = f; i < n-f; i++)
785 vf = filt[0]*val[s][i];
786 for (j = 1; j <= f; j++)
788 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
790 fluc += sqr(val[s][i] - vf);
794 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
796 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
797 fprintf(stdout, "\n");
802 static void do_fit(FILE *out, int n, gmx_bool bYdy,
803 int ny, real *x0, real **val,
804 int npargs, t_pargs *ppa, const output_env_t oenv,
805 const char *fn_fitted)
807 real *c1 = NULL, *sig = NULL;
809 real tendfit, tbeginfit;
810 int i, efitfn, nparm;
812 efitfn = get_acffitfn();
813 nparm = effnNparams(efitfn);
814 fprintf(out, "Will fit to the following function:\n");
815 fprintf(out, "%s\n", effnDescription(efitfn));
821 fprintf(out, "Using two columns as y and sigma values\n");
827 if (opt2parg_bSet("-beginfit", npargs, ppa))
829 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
835 if (opt2parg_bSet("-endfit", npargs, ppa))
837 tendfit = opt2parg_real("-endfit", npargs, ppa);
844 snew(fitparm, nparm);
856 fitparm[1] = 0.5*c1[0];
860 fitparm[0] = fitparm[2] = 0.5*c1[0];
866 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
873 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
881 fprintf(out, "Warning: don't know how to initialize the parameters\n");
882 for (i = 0; (i < nparm); i++)
887 fprintf(out, "Starting parameters:\n");
888 for (i = 0; (i < nparm); i++)
890 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
892 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
893 oenv, bDebugMode(), efitfn, fitparm, 0,
896 for (i = 0; (i < nparm); i++)
898 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
903 fprintf(out, "No solution was found\n");
907 static void do_ballistic(const char *balFile, int nData,
908 real *t, real **val, int nSet,
909 real balTime, int nBalExp,
910 const output_env_t oenv)
912 double **ctd = NULL, *td = NULL;
913 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
914 static char *leg[] = {"Ac'(t)"};
918 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
923 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
924 xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
926 for (set = 0; set < nSet; set++)
928 snew(ctd[set], nData);
929 for (i = 0; i < nData; i++)
931 ctd[set][i] = (double)val[set][i];
934 td[i] = (double)t[i];
938 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
941 for (i = 0; i < nData; i++)
943 fprintf(fp, " %g", t[i]);
944 for (set = 0; set < nSet; set++)
946 fprintf(fp, " %g", ctd[set][i]);
952 for (set = 0; set < nSet; set++)
962 printf("Number of data points is less than the number of parameters to fit\n."
963 "The system is underdetermined, hence no ballistic term can be found.\n\n");
967 static void do_geminate(const char *gemFile, int nData,
968 real *t, real **val, int nSet,
969 const real D, const real rcut, const real balTime,
970 const int nFitPoints, const real begFit, const real endFit,
971 const output_env_t oenv)
973 double **ctd = NULL, **ctdGem = NULL, *td = NULL;
974 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
975 begFit, endFit, balTime, 1);
976 const char *leg[] = {"Ac\\sgem\\N(t)"};
984 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
985 xvgr_legend(fp, asize(leg), leg, oenv);
987 for (set = 0; set < nSet; set++)
989 snew(ctd[set], nData);
990 snew(ctdGem[set], nData);
991 for (i = 0; i < nData; i++)
993 ctd[set][i] = (double)val[set][i];
996 td[i] = (double)t[i];
999 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
1002 for (i = 0; i < nData; i++)
1004 fprintf(fp, " %g", t[i]);
1005 for (set = 0; set < nSet; set++)
1007 fprintf(fp, " %g", ctdGem[set][i]);
1012 for (set = 0; set < nSet; set++)
1023 static void print_fitted_function(const char *fitfile,
1024 const char *fn_fitted,
1034 FILE *out_fit = gmx_ffopen(fitfile, "w");
1035 if (bXYdy && nset >= 2)
1037 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
1044 if (NULL != fn_fitted)
1046 buflen = strlen(fn_fitted)+32;
1048 strncpy(buf2, fn_fitted, buflen);
1049 buf2[strlen(buf2)-4] = '\0';
1051 for (s = 0; s < nset; s++)
1054 if (NULL != fn_fitted)
1057 snprintf(buf, n, "%s_%d.xvg", buf2, s);
1059 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
1064 gmx_ffclose(out_fit);
1067 int gmx_analyze(int argc, char *argv[])
1069 static const char *desc[] = {
1070 "[THISMODULE] reads an ASCII file and analyzes data sets.",
1071 "A line in the input file may start with a time",
1072 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1073 "Multiple sets can also be",
1074 "read when they are separated by & (option [TT]-n[tt]);",
1075 "in this case only one [IT]y[it]-value is read from each line.",
1076 "All lines starting with # and @ are skipped.",
1077 "All analyses can also be done for the derivative of a set",
1078 "(option [TT]-d[tt]).[PAR]",
1080 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1081 "points are equidistant in time.[PAR]",
1083 "[THISMODULE] always shows the average and standard deviation of each",
1084 "set, as well as the relative deviation of the third",
1085 "and fourth cumulant from those of a Gaussian distribution with the same",
1086 "standard deviation.[PAR]",
1088 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1089 "Be sure that the time interval between data points is",
1090 "much shorter than the time scale of the autocorrelation.[PAR]",
1092 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1093 "i/2 periods. The formula is::",
1095 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
1097 "This is useful for principal components obtained from covariance",
1098 "analysis, since the principal components of random diffusion are",
1099 "pure cosines.[PAR]",
1101 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1103 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1105 "Option [TT]-av[tt] produces the average over the sets.",
1106 "Error bars can be added with the option [TT]-errbar[tt].",
1107 "The errorbars can represent the standard deviation, the error",
1108 "(assuming the points are independent) or the interval containing",
1109 "90% of the points, by discarding 5% of the points at the top and",
1112 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1113 "A set is divided in a number of blocks and averages are calculated for",
1114 "each block. The error for the total average is calculated from",
1115 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1116 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1117 "These errors are plotted as a function of the block size.",
1118 "Also an analytical block average curve is plotted, assuming",
1119 "that the autocorrelation is a sum of two exponentials.",
1120 "The analytical curve for the block average is::",
1122 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1123 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1125 "where T is the total time.",
1126 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1127 "When the actual block average is very close to the analytical curve,",
1128 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1129 "The complete derivation is given in",
1130 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1132 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1133 "component from a hydrogen bond autocorrelation function by the fitting",
1134 "of a sum of exponentials, as described in e.g.",
1135 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1136 "is the one with the most negative coefficient in the exponential,",
1137 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1138 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1140 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1141 "(and optionally kD) to the hydrogen bond autocorrelation function",
1142 "according to the reversible geminate recombination model. Removal of",
1143 "the ballistic component first is strongly advised. The model is presented in",
1144 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1146 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1147 "of each set and over all sets with respect to a filtered average.",
1148 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1149 "to len/2. len is supplied with the option [TT]-filter[tt].",
1150 "This filter reduces oscillations with period len/2 and len by a factor",
1151 "of 0.79 and 0.33 respectively.[PAR]",
1153 "Option [TT]-g[tt] fits the data to the function given with option",
1154 "[TT]-fitfn[tt].[PAR]",
1156 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1157 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1158 "zero or with a negative value are ignored.[PAR]"
1160 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1161 "on output from [gmx-hbond]. The input file can be taken directly",
1162 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1163 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1164 "curves that make sense in the context of molecular dynamics, mainly",
1165 "exponential curves. More information is in the manual. To check the output",
1166 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1167 "original data and the fitted function to a new data file. The fitting",
1168 "parameters are stored as comment in the output file."
1170 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1171 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1172 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1173 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1174 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1175 static real temp = 298.15, fit_start = 1, fit_end = 60, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1177 /* must correspond to enum avbar* declared at beginning of file */
1178 static const char *avbar_opt[avbarNR+1] = {
1179 NULL, "none", "stddev", "error", "90", NULL
1183 { "-time", FALSE, etBOOL, {&bHaveT},
1184 "Expect a time in the input" },
1185 { "-b", FALSE, etREAL, {&tb},
1186 "First time to read from set" },
1187 { "-e", FALSE, etREAL, {&te},
1188 "Last time to read from set" },
1189 { "-n", FALSE, etINT, {&nsets_in},
1190 "Read this number of sets separated by &" },
1191 { "-d", FALSE, etBOOL, {&bDer},
1192 "Use the derivative" },
1193 { "-dp", FALSE, etINT, {&d},
1194 "HIDDENThe derivative is the difference over this number of points" },
1195 { "-bw", FALSE, etREAL, {&binwidth},
1196 "Binwidth for the distribution" },
1197 { "-errbar", FALSE, etENUM, {avbar_opt},
1198 "Error bars for [TT]-av[tt]" },
1199 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1200 "Integrate data function(s) numerically using trapezium rule" },
1201 { "-aver_start", FALSE, etREAL, {&aver_start},
1202 "Start averaging the integral from here" },
1203 { "-xydy", FALSE, etBOOL, {&bXYdy},
1204 "Interpret second data set as error in the y values for integrating" },
1205 { "-regression", FALSE, etBOOL, {&bRegression},
1206 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1207 { "-luzar", FALSE, etBOOL, {&bLuzar},
1208 "Do a Luzar and Chandler analysis on a correlation function and "
1209 "related as produced by [gmx-hbond]. When in addition the "
1210 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1211 "interpreted as errors in c(t) and n(t)." },
1212 { "-temp", FALSE, etREAL, {&temp},
1213 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1214 { "-fitstart", FALSE, etREAL, {&fit_start},
1215 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1216 { "-fitend", FALSE, etREAL, {&fit_end},
1217 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1218 { "-nbmin", FALSE, etINT, {&nb_min},
1219 "HIDDENMinimum number of blocks for block averaging" },
1220 { "-resol", FALSE, etINT, {&resol},
1221 "HIDDENResolution for the block averaging, block size increases with"
1222 " a factor 2^(1/resol)" },
1223 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1224 "HIDDENAlways use a single exponential fit for the error estimate" },
1225 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1226 "HIDDENAllow a negative long-time correlation" },
1227 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1228 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1229 { "-filter", FALSE, etREAL, {&filtlen},
1230 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1231 { "-power", FALSE, etBOOL, {&bPower},
1232 "Fit data to: b t^a" },
1233 { "-subav", FALSE, etBOOL, {&bSubAv},
1234 "Subtract the average before autocorrelating" },
1235 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1236 "Calculate one ACF over all sets" },
1237 { "-nbalexp", FALSE, etINT, {&nBalExp},
1238 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1239 { "-baltime", FALSE, etREAL, {&balTime},
1240 "HIDDENTime up to which the ballistic component will be fitted" },
1241 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1242 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1243 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1244 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1245 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1246 /* "What type of gminate recombination to use"}, */
1247 /* { "-D", FALSE, etREAL, {&diffusion}, */
1248 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1250 #define NPA asize(pa)
1252 FILE *out, *out_fit;
1253 int n, nlast, s, nset, i, j = 0;
1254 real **val, *t, dt, tot, error;
1255 double *av, *sig, cum1, cum2, cum3, cum4, db;
1256 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1260 { efXVG, "-f", "graph", ffREAD },
1261 { efXVG, "-ac", "autocorr", ffOPTWR },
1262 { efXVG, "-msd", "msd", ffOPTWR },
1263 { efXVG, "-cc", "coscont", ffOPTWR },
1264 { efXVG, "-dist", "distr", ffOPTWR },
1265 { efXVG, "-av", "average", ffOPTWR },
1266 { efXVG, "-ee", "errest", ffOPTWR },
1267 { efXVG, "-bal", "ballisitc", ffOPTWR },
1268 { efXVG, "-fitted", "fitted", ffOPTWR },
1269 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1270 { efLOG, "-g", "fitlog", ffOPTWR }
1272 #define NFILE asize(fnm)
1278 ppa = add_acf_pargs(&npargs, pa);
1280 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1281 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1286 acfile = opt2fn_null("-ac", NFILE, fnm);
1287 msdfile = opt2fn_null("-msd", NFILE, fnm);
1288 ccfile = opt2fn_null("-cc", NFILE, fnm);
1289 distfile = opt2fn_null("-dist", NFILE, fnm);
1290 avfile = opt2fn_null("-av", NFILE, fnm);
1291 eefile = opt2fn_null("-ee", NFILE, fnm);
1292 balfile = opt2fn_null("-bal", NFILE, fnm);
1293 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1294 /* When doing autocorrelation we don't want a fitlog for fitting
1295 * the function itself (not the acf) when the user did not ask for it.
1297 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1299 fitfile = opt2fn("-g", NFILE, fnm);
1303 fitfile = opt2fn_null("-g", NFILE, fnm);
1306 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1307 opt2parg_bSet("-b", npargs, ppa), tb,
1308 opt2parg_bSet("-e", npargs, ppa), te,
1309 nsets_in, &nset, &n, &dt, &t);
1310 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1314 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1317 for (s = 0; s < nset; s++)
1319 for (i = 0; (i < n); i++)
1321 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1330 printf("Calculating the integral using the trapezium rule\n");
1334 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1335 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1339 for (s = 0; s < nset; s++)
1341 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1342 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1347 if (fitfile != NULL)
1349 print_fitted_function(fitfile,
1350 opt2fn_null("-fitted", NFILE, fnm),
1357 printf(" std. dev. relative deviation of\n");
1358 printf(" standard --------- cumulants from those of\n");
1359 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1360 printf(" cum. 3 cum. 4\n");
1363 for (s = 0; (s < nset); s++)
1369 for (i = 0; (i < n); i++)
1374 for (i = 0; (i < n); i++)
1376 db = val[s][i]-cum1;
1379 cum4 += db*db*db*db;
1385 sig[s] = sqrt(cum2);
1388 error = sqrt(cum2/(n-1));
1394 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1395 s+1, av[s], sig[s], error,
1396 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1397 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1403 filter(filtlen, n, nset, val, dt);
1408 out = xvgropen(msdfile, "Mean square displacement",
1409 "time", "MSD (nm\\S2\\N)", oenv);
1410 nlast = (int)(n*frac);
1411 for (s = 0; s < nset; s++)
1413 for (j = 0; j <= nlast; j++)
1417 fprintf(stderr, "\r%d", j);
1420 for (i = 0; i < n-j; i++)
1422 tot += sqr(val[s][i]-val[s][i+j]);
1425 fprintf(out, " %g %8g\n", dt*j, tot);
1429 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1433 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1437 plot_coscont(ccfile, n, nset, val, oenv);
1442 histogram(distfile, binwidth, n, nset, val, oenv);
1446 average(avfile, nenum(avbar_opt), n, nset, val, t);
1450 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1451 bEeFitAc, bEESEF, bEENLC, oenv);
1455 do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1458 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1459 /* nFitPoints, fit_start, fit_end, oenv); */
1462 power_fit(n, nset, val, t);
1469 for (s = 0; s < nset; s++)
1471 for (i = 0; i < n; i++)
1477 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1478 eacNormal, bAverCorr);
1483 regression_analysis(n, bXYdy, t, nset, val);
1488 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1491 view_all(oenv, NFILE, fnm);