1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
46 #include "gmx_fatal.h"
54 #include "gmx_matrix.h"
55 #include "gmx_statistics.h"
60 /* must correspond to char *avbar_opt[] declared in main() */
61 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
63 static void power_fit(int n,int nset,real **val,real *t)
65 real *x,*y,quality,a,b,r;
76 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
81 for(s=0; s<nset; s++) {
83 for(i=0; i<n && val[s][i]>=0; i++)
84 y[i] = log(val[s][i]);
86 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
87 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
88 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
89 s+1,quality,a,exp(b));
96 static real cosine_content(int nhp,int n,real *y)
97 /* Assumes n equidistant points */
99 double fac,cosyint,yyint;
105 fac = M_PI*nhp/(n-1);
110 cosyint += cos(fac*i)*y[i];
114 return 2*cosyint*cosyint/(n*yyint);
117 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
118 const output_env_t oenv)
124 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
127 for(s=0; s<nset; s++) {
128 cc = cosine_content(s+1,n,val[s]);
129 fprintf(fp," %d %g\n",s+1,cc);
130 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
133 fprintf(stdout,"\n");
138 static void regression_analysis(int n,gmx_bool bXYdy,
139 real *x,int nset,real **val)
141 real S,chi2,a,b,da,db,r=0;
144 if (bXYdy || (nset == 1))
146 printf("Fitting data to a function f(x) = ax + b\n");
147 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
148 printf("Error estimates will be given if w_i (sigma) values are given\n");
149 printf("(use option -xydy).\n\n");
152 if ((ok = lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S)) != estatsOK)
153 gmx_fatal(FARGS,"Error fitting the data: %s",
154 gmx_stats_message(ok));
158 if ((ok = lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S)) != estatsOK)
159 gmx_fatal(FARGS,"Error fitting the data: %s",
160 gmx_stats_message(ok));
163 printf("Chi2 = %g\n",chi2);
164 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
165 printf("Correlation coefficient = %.1f%%\n",100*r);
168 printf("a = %g +/- %g\n",a,da);
169 printf("b = %g +/- %g\n",b,db);
172 printf("a = %g\n",a);
173 printf("b = %g\n",b);
178 double chi2,*a,**xx,*y;
183 for(j=0; (j<nset-1); j++)
188 for(j=1; (j<nset); j++)
189 xx[j-1][i] = val[j][i];
192 chi2 = multi_regression(NULL,n,y,nset-1,xx,a);
193 printf("Fitting %d data points in %d sets\n",n,nset-1);
194 printf("chi2 = %g\n",chi2);
196 for(i=0; (i<nset-1); i++)
208 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
209 const output_env_t oenv)
215 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
216 long long int *histo;
223 for(s=0; s<nset; s++)
227 else if (val[s][i] > max)
230 min = binwidth*floor(min/binwidth);
231 max = binwidth*ceil(max/binwidth);
236 nbin = (int)((max - min)/binwidth + 0.5) + 1;
237 fprintf(stderr,"Making distributions with %d bins\n",nbin);
239 fp = xvgropen(distfile,"Distribution","","",oenv);
240 for(s=0; s<nset; s++) {
241 for(i=0; i<nbin; i++)
244 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
245 for(i=0; i<nbin; i++)
246 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
253 static int real_comp(const void *a,const void *b)
255 real dif = *(real *)a - *(real *)b;
265 static void average(const char *avfile,int avbar_opt,
266 int n, int nset,real **val,real *t)
273 fp = ffopen(avfile,"w");
274 if ((avbar_opt == avbarERROR) && (nset == 1))
275 avbar_opt = avbarNONE;
276 if (avbar_opt != avbarNONE) {
277 if (avbar_opt == avbar90) {
279 fprintf(fp,"@TYPE xydydy\n");
280 edge = (int)(nset*0.05+0.5);
281 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
282 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
284 fprintf(fp,"@TYPE xydy\n");
289 for(s=0; s<nset; s++)
292 fprintf(fp," %g %g",t[i],av);
294 if (avbar_opt != avbarNONE) {
295 if (avbar_opt == avbar90) {
296 for(s=0; s<nset; s++)
298 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
299 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
301 for(s=0; s<nset; s++)
302 var += sqr(val[s][i]-av);
303 if (avbar_opt == avbarSTDDEV)
304 err = sqrt(var/nset);
306 err = sqrt(var/(nset*(nset-1)));
307 fprintf(fp," %g",err);
314 if (avbar_opt == avbar90)
318 static real anal_ee_inf(real *parm,real T)
320 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
323 static real anal_ee(real *parm,real T,real t)
328 e1 = exp(-t/parm[0]);
332 e2 = exp(-t/parm[2]);
336 return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
337 parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
340 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
341 int nset, double *av,double *sig,real **val,real dt,
342 gmx_bool bFitAc,gmx_bool bSingleExpFit,gmx_bool bAllowNegLTCorr,
343 const output_env_t oenv)
346 int bs,prev_bs,nbs,nb;
351 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
357 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
362 fp = xvgropen(eefile,"Error estimates",
363 "Block size (time)","Error estimate", oenv);
364 if (output_env_get_print_xvgr_codes(oenv))
367 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
371 xvgr_legend(fp,2*nset,(const char**)leg,oenv);
374 spacing = pow(2,1.0/resol);
378 for(s=0; s<nset; s++)
395 blav += val[s][bs*i+j];
397 var += sqr(av[s] - blav/bs);
406 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
423 for(i=0; i<nbs/2; i++)
426 tbs[i] = tbs[nbs-1-i];
429 ybs[i] = ybs[nbs-1-i];
432 /* The initial slope of the normalized ybs^2 is 1.
433 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
434 * From this we take our initial guess for tau1.
442 } while (i < nbs - 1 &&
443 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
447 fprintf(stdout,"Data set %d has strange time correlations:\n"
448 "the std. error using single points is larger than that of blocks of 2 points\n"
449 "The error estimate might be inaccurate, check the fit\n",
451 /* Use the total time as tau for the fitting weights */
452 tau_sig = (n - 1)*dt;
461 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
464 /* Generate more or less appropriate sigma's,
465 * also taking the density of points into account.
471 dens = tbs[1]/tbs[0] - 1;
475 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
479 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
481 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
486 fitparm[0] = tau1_est;
488 /* We set the initial guess for tau2
489 * to halfway between tau1_est and the total time (on log scale).
491 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
492 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
493 bDebugMode(),effnERREST,fitparm,0);
494 fitparm[3] = 1-fitparm[1];
496 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
497 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
501 if (fitparm[2] > (n-1)*dt)
504 "Warning: tau2 is longer than the length of the data (%g)\n"
505 " the statistics might be bad\n",
510 fprintf(stdout,"a fitted parameter is negative\n");
512 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
513 sig[s]*anal_ee_inf(fitparm,n*dt),
514 fitparm[1],fitparm[0],fitparm[2]);
515 /* Do a fit with tau2 fixed at the total time.
516 * One could also choose any other large value for tau2.
518 fitparm[0] = tau1_est;
520 fitparm[2] = (n-1)*dt;
521 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
522 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
523 effnERREST,fitparm,4);
524 fitparm[3] = 1-fitparm[1];
526 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
527 || (fitparm[1]>1 && !bAllowNegLTCorr))
529 if (!bSingleExpFit) {
530 fprintf(stdout,"a fitted parameter is negative\n");
531 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
532 sig[s]*anal_ee_inf(fitparm,n*dt),
533 fitparm[1],fitparm[0],fitparm[2]);
535 /* Do a single exponential fit */
536 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
537 fitparm[0] = tau1_est;
540 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
541 effnERREST,fitparm,6);
542 fitparm[3] = 1-fitparm[1];
545 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
550 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
552 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
553 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
554 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
557 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
558 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
564 real *ac,acint,ac_fit[4];
568 ac[i] = val[s][i] - av[s];
574 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
575 dt,eacNormal,1,FALSE,TRUE,
576 FALSE,0,0,effnNONE,0);
580 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
582 for(i=1; i<=fitlen/2; i++)
588 /* Generate more or less appropriate sigma's */
589 for(i=0; i<=fitlen; i++)
591 fitsig[i] = sqrt(acint + dt*i);
594 ac_fit[0] = 0.5*acint;
596 ac_fit[2] = 10*acint;
597 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
598 bDebugMode(),effnEXP3,ac_fit,0);
599 ac_fit[3] = 1 - ac_fit[1];
601 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
602 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
603 ac_fit[1],ac_fit[0],ac_fit[2]);
608 fprintf(fp,"%g %g\n",tbs[i],
609 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
625 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
626 gmx_bool bError,real fit_start,real smooth_tail_start,
627 const output_env_t oenv)
629 const real tol = 1e-8;
634 please_cite(stdout,"Spoel2006b");
636 /* Compute negative derivative k(t) = -dc(t)/dt */
639 compute_derivative(nn,time,val[0],kt);
640 for(j=0; (j<nn); j++)
644 for(j=0; (j<nn); j++)
645 d2 += sqr(kt[j] - val[3][j]);
646 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
648 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
649 temp,smooth_tail_start,oenv);
652 else if (nset == 6) {
653 analyse_corr(nn,time,val[0],val[2],val[4],
654 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
657 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
658 printf("Not doing anything. Sorry.\n");
662 static void filter(real flen,int n,int nset,real **val,real dt,
663 const output_env_t oenv)
666 double *filt,sum,vf,fluc,fluctot;
668 f = (int)(flen/(2*dt));
672 for(i=1; i<=f; i++) {
673 filt[i] = cos(M_PI*dt*i/flen);
678 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
679 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
681 for(s=0; s<nset; s++) {
683 for(i=f; i<n-f; i++) {
684 vf = filt[0]*val[s][i];
686 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
687 fluc += sqr(val[s][i] - vf);
691 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
693 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
694 fprintf(stdout,"\n");
699 static void do_fit(FILE *out,int n,gmx_bool bYdy,
700 int ny,real *x0,real **val,
701 int npargs,t_pargs *ppa,const output_env_t oenv)
703 real *c1=NULL,*sig=NULL,*fitparm;
704 real tendfit,tbeginfit;
707 efitfn = get_acffitfn();
708 nparm = nfp_ffn[efitfn];
709 fprintf(out,"Will fit to the following function:\n");
710 fprintf(out,"%s\n",longs_ffn[efitfn]);
715 fprintf(out,"Using two columns as y and sigma values\n");
719 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
720 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
724 if (opt2parg_bSet("-endfit",npargs,ppa)) {
725 tendfit = opt2parg_real("-endfit",npargs,ppa);
741 fitparm[1] = 0.5*c1[0];
745 fitparm[0] = fitparm[2] = 0.5*c1[0];
751 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
758 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
766 fprintf(out,"Warning: don't know how to initialize the parameters\n");
767 for(i=0; (i<nparm); i++)
770 fprintf(out,"Starting parameters:\n");
771 for(i=0; (i<nparm); i++)
772 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
773 if (do_lmfit(ny,c1,sig,0,x0,tbeginfit,tendfit,
774 oenv,bDebugMode(),efitfn,fitparm,0)) {
775 for(i=0; (i<nparm); i++)
776 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
779 fprintf(out,"No solution was found\n");
783 static void do_ballistic(const char *balFile, int nData,
784 real *t, real **val, int nSet,
785 real balTime, int nBalExp,
786 gmx_bool bDerivative,
787 const output_env_t oenv)
789 double **ctd=NULL, *td=NULL;
790 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
791 static char *leg[] = {"Ac'(t)"};
795 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
800 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
801 xvgr_legend(fp,asize(leg),(const char**)leg,oenv);
803 for (set=0; set<nSet; set++)
805 snew(ctd[set], nData);
806 for (i=0; i<nData; i++) {
807 ctd[set][i] = (double)val[set][i];
809 td[i] = (double)t[i];
812 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
815 for (i=0; i<nData; i++)
817 fprintf(fp, " %g",t[i]);
818 for (set=0; set<nSet; set++)
820 fprintf(fp, " %g", ctd[set][i]);
826 for (set=0; set<nSet; set++)
832 printf("Number of data points is less than the number of parameters to fit\n."
833 "The system is underdetermined, hence no ballistic term can be found.\n\n");
836 static void do_geminate(const char *gemFile, int nData,
837 real *t, real **val, int nSet,
838 const real D, const real rcut, const real balTime,
839 const int nFitPoints, const real begFit, const real endFit,
840 const output_env_t oenv)
842 double **ctd=NULL, **ctdGem=NULL, *td=NULL;
843 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
844 begFit, endFit, balTime, 1, FALSE);
845 const char *leg[] = {"Ac\\sgem\\N(t)"};
853 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
854 xvgr_legend(fp,asize(leg),leg,oenv);
856 for (set=0; set<nSet; set++)
858 snew(ctd[set], nData);
859 snew(ctdGem[set], nData);
860 for (i=0; i<nData; i++) {
861 ctd[set][i] = (double)val[set][i];
863 td[i] = (double)t[i];
865 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
868 for (i=0; i<nData; i++)
870 fprintf(fp, " %g",t[i]);
871 for (set=0; set<nSet; set++)
873 fprintf(fp, " %g", ctdGem[set][i]);
878 for (set=0; set<nSet; set++)
888 int gmx_analyze(int argc,char *argv[])
890 static const char *desc[] = {
891 "[TT]g_analyze[tt] reads an ascii file and analyzes data sets.",
892 "A line in the input file may start with a time",
893 "(see option [TT]-time[tt]) and any number of y values may follow.",
894 "Multiple sets can also be",
895 "read when they are separated by & (option [TT]-n[tt]),",
896 "in this case only one y value is read from each line.",
897 "All lines starting with # and @ are skipped.",
898 "All analyses can also be done for the derivative of a set",
899 "(option [TT]-d[tt]).[PAR]",
901 "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
902 "points are equidistant in time.[PAR]",
904 "[TT]g_analyze[tt] always shows the average and standard deviation of each",
905 "set. For each set it also shows the relative deviation of the third",
906 "and fourth cumulant from those of a Gaussian distribution with the same",
907 "standard deviation.[PAR]",
909 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
911 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
912 "i/2 periods. The formula is:[BR]"
913 "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
914 "This is useful for principal components obtained from covariance",
915 "analysis, since the principal components of random diffusion are",
916 "pure cosines.[PAR]",
918 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
920 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
922 "Option [TT]-av[tt] produces the average over the sets.",
923 "Error bars can be added with the option [TT]-errbar[tt].",
924 "The errorbars can represent the standard deviation, the error",
925 "(assuming the points are independent) or the interval containing",
926 "90% of the points, by discarding 5% of the points at the top and",
929 "Option [TT]-ee[tt] produces error estimates using block averaging.",
930 "A set is divided in a number of blocks and averages are calculated for",
931 "each block. The error for the total average is calculated from",
932 "the variance between averages of the m blocks B_i as follows:",
933 "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
934 "These errors are plotted as a function of the block size.",
935 "Also an analytical block average curve is plotted, assuming",
936 "that the autocorrelation is a sum of two exponentials.",
937 "The analytical curve for the block average is:[BR]",
938 "f(t) = sigma sqrt(2/T ( a (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
939 " (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
940 "where T is the total time.",
941 "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
942 "When the actual block average is very close to the analytical curve,",
943 "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
944 "The complete derivation is given in",
945 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
947 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
948 "component from a hydrogen bond autocorrelation function by the fitting",
949 "of a sum of exponentials, as described in e.g.",
950 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
951 "is the one with the most negative coefficient in the exponential,",
952 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
953 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
955 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
956 "(and optionally kD) to the hydrogen bond autocorrelation function",
957 "according to the reversible geminate recombination model. Removal of",
958 "the ballistic component first is strongly adviced. The model is presented in",
959 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
961 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
962 "of each set and over all sets with respect to a filtered average.",
963 "The filter is proportional to cos(pi t/len) where t goes from -len/2",
964 "to len/2. len is supplied with the option [TT]-filter[tt].",
965 "This filter reduces oscillations with period len/2 and len by a factor",
966 "of 0.79 and 0.33 respectively.[PAR]",
968 "Option [TT]-g[tt] fits the data to the function given with option",
969 "[TT]-fitfn[tt].[PAR]",
971 "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
972 "by fitting to a t + b on log-log scale. All points after the first",
973 "zero or negative value are ignored.[PAR]"
975 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
976 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
977 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
979 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
980 static gmx_bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
981 static gmx_bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
982 static gmx_bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
983 static int nsets_in=1,d=1,nb_min=4,resol=10, nBalExp=4, nFitPoints=100;
984 static real temp=298.15,fit_start=1, fit_end=60, smooth_tail_start=-1, balTime=0.2, diffusion=5e-5,rcut=0.35;
986 /* must correspond to enum avbar* declared at beginning of file */
987 static const char *avbar_opt[avbarNR+1] = {
988 NULL, "none", "stddev", "error", "90", NULL
992 { "-time", FALSE, etBOOL, {&bHaveT},
993 "Expect a time in the input" },
994 { "-b", FALSE, etREAL, {&tb},
995 "First time to read from set" },
996 { "-e", FALSE, etREAL, {&te},
997 "Last time to read from set" },
998 { "-n", FALSE, etINT, {&nsets_in},
999 "Read # sets separated by &" },
1000 { "-d", FALSE, etBOOL, {&bDer},
1001 "Use the derivative" },
1002 { "-dp", FALSE, etINT, {&d},
1003 "HIDDENThe derivative is the difference over # points" },
1004 { "-bw", FALSE, etREAL, {&binwidth},
1005 "Binwidth for the distribution" },
1006 { "-errbar", FALSE, etENUM, {avbar_opt},
1007 "Error bars for [TT]-av[tt]" },
1008 { "-integrate",FALSE,etBOOL, {&bIntegrate},
1009 "Integrate data function(s) numerically using trapezium rule" },
1010 { "-aver_start",FALSE, etREAL, {&aver_start},
1011 "Start averaging the integral from here" },
1012 { "-xydy", FALSE, etBOOL, {&bXYdy},
1013 "Interpret second data set as error in the y values for integrating" },
1014 { "-regression",FALSE,etBOOL,{&bRegression},
1015 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize chi^2 = (y - A0 x0 - A1 x1 - ... - AN xN)^2 where now Y is the first data set in the input file and xi the others. Do read the information at the option [TT]-time[tt]." },
1016 { "-luzar", FALSE, etBOOL, {&bLuzar},
1017 "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1018 { "-temp", FALSE, etREAL, {&temp},
1019 "Temperature for the Luzar hydrogen bonding kinetics analysis" },
1020 { "-fitstart", FALSE, etREAL, {&fit_start},
1021 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1022 { "-fitend", FALSE, etREAL, {&fit_end},
1023 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1024 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
1025 "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
1026 { "-nbmin", FALSE, etINT, {&nb_min},
1027 "HIDDENMinimum number of blocks for block averaging" },
1028 { "-resol", FALSE, etINT, {&resol},
1029 "HIDDENResolution for the block averaging, block size increases with"
1030 " a factor 2^(1/#)" },
1031 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1032 "HIDDENAlways use a single exponential fit for the error estimate" },
1033 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1034 "HIDDENAllow a negative long-time correlation" },
1035 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1036 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1037 { "-filter", FALSE, etREAL, {&filtlen},
1038 "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
1039 { "-power", FALSE, etBOOL, {&bPower},
1040 "Fit data to: b t^a" },
1041 { "-subav", FALSE, etBOOL, {&bSubAv},
1042 "Subtract the average before autocorrelating" },
1043 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1044 "Calculate one ACF over all sets" },
1045 { "-nbalexp", FALSE, etINT, {&nBalExp},
1046 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1047 { "-baltime", FALSE, etREAL, {&balTime},
1048 "HIDDENTime up to which the ballistic component will be fitted" },
1049 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1050 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1051 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1052 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1053 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1054 /* "What type of gminate recombination to use"}, */
1055 /* { "-D", FALSE, etREAL, {&diffusion}, */
1056 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1058 #define NPA asize(pa)
1061 int n,nlast,s,nset,i,j=0;
1062 real **val,*t,dt,tot,error;
1063 double *av,*sig,cum1,cum2,cum3,cum4,db;
1064 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*balfile,*gemfile,*fitfile;
1068 { efXVG, "-f", "graph", ffREAD },
1069 { efXVG, "-ac", "autocorr", ffOPTWR },
1070 { efXVG, "-msd", "msd", ffOPTWR },
1071 { efXVG, "-cc", "coscont", ffOPTWR },
1072 { efXVG, "-dist", "distr", ffOPTWR },
1073 { efXVG, "-av", "average", ffOPTWR },
1074 { efXVG, "-ee", "errest", ffOPTWR },
1075 { efXVG, "-bal", "ballisitc",ffOPTWR },
1076 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1077 { efLOG, "-g", "fitlog", ffOPTWR }
1079 #define NFILE asize(fnm)
1085 ppa = add_acf_pargs(&npargs,pa);
1087 CopyRight(stderr,argv[0]);
1088 parse_common_args(&argc,argv,PCA_CAN_VIEW,
1089 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
1091 acfile = opt2fn_null("-ac",NFILE,fnm);
1092 msdfile = opt2fn_null("-msd",NFILE,fnm);
1093 ccfile = opt2fn_null("-cc",NFILE,fnm);
1094 distfile = opt2fn_null("-dist",NFILE,fnm);
1095 avfile = opt2fn_null("-av",NFILE,fnm);
1096 eefile = opt2fn_null("-ee",NFILE,fnm);
1097 balfile = opt2fn_null("-bal",NFILE,fnm);
1098 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1099 /* When doing autocorrelation we don't want a fitlog for fitting
1100 * the function itself (not the acf) when the user did not ask for it.
1102 if (opt2parg_bSet("-fitfn",npargs,ppa) && acfile == NULL)
1104 fitfile = opt2fn("-g",NFILE,fnm);
1108 fitfile = opt2fn_null("-g",NFILE,fnm);
1111 val = read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
1112 opt2parg_bSet("-b",npargs,ppa),tb,
1113 opt2parg_bSet("-e",npargs,ppa),te,
1114 nsets_in,&nset,&n,&dt,&t);
1115 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
1119 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1122 for(s=0; s<nset; s++)
1124 for(i=0; (i<n); i++)
1126 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1135 printf("Calculating the integral using the trapezium rule\n");
1139 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
1140 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
1144 for(s=0; s<nset; s++)
1146 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
1147 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
1152 if (fitfile != NULL)
1154 out_fit = ffopen(fitfile,"w");
1155 if (bXYdy && nset >= 2)
1157 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
1161 for(s=0; s<nset; s++)
1163 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
1169 printf(" std. dev. relative deviation of\n");
1170 printf(" standard --------- cumulants from those of\n");
1171 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1172 printf(" cum. 3 cum. 4\n");
1175 for(s=0; (s<nset); s++) {
1180 for(i=0; (i<n); i++)
1183 for(i=0; (i<n); i++) {
1184 db = val[s][i]-cum1;
1187 cum4 += db*db*db*db;
1193 sig[s] = sqrt(cum2);
1195 error = sqrt(cum2/(n-1));
1198 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1199 s+1,av[s],sig[s],error,
1200 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1201 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1206 filter(filtlen,n,nset,val,dt,oenv);
1209 out=xvgropen(msdfile,"Mean square displacement",
1210 "time","MSD (nm\\S2\\N)",oenv);
1211 nlast = (int)(n*frac);
1212 for(s=0; s<nset; s++) {
1213 for(j=0; j<=nlast; j++) {
1215 fprintf(stderr,"\r%d",j);
1217 for(i=0; i<n-j; i++)
1218 tot += sqr(val[s][i]-val[s][i+j]);
1220 fprintf(out," %g %8g\n",dt*j,tot);
1226 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1229 plot_coscont(ccfile,n,nset,val,oenv);
1232 histogram(distfile,binwidth,n,nset,val,oenv);
1234 average(avfile,nenum(avbar_opt),n,nset,val,t);
1236 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1237 bEeFitAc,bEESEF,bEENLC,oenv);
1239 do_ballistic(balfile,n,t,val,nset,balTime,nBalExp,bDer,oenv);
1241 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1242 /* nFitPoints, fit_start, fit_end, oenv); */
1244 power_fit(n,nset,val,t);
1250 for(s=0; s<nset; s++)
1258 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1259 eacNormal,bAverCorr);
1263 regression_analysis(n,bXYdy,t,nset,val);
1266 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1268 view_all(oenv,NFILE, fnm);