src/gromacs/gmxana/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include <math.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "gromacs/commandline/pargs.h"
  44 #include "gromacs/correlationfunctions/autocorr.h"
  45 #include "gromacs/correlationfunctions/expfit.h"
  46 #include "gromacs/correlationfunctions/integrate.h"
  47 #include "gromacs/fileio/xvgr.h"
  48 #include "gromacs/gmxana/geminate.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/gmxana/gstat.h"
  51 #include "gromacs/legacyheaders/copyrite.h"
  52 #include "gromacs/legacyheaders/macros.h"
  53 #include "gromacs/legacyheaders/readinp.h"
  54 #include "gromacs/legacyheaders/txtdump.h"
  55 #include "gromacs/legacyheaders/typedefs.h"
  56 #include "gromacs/legacyheaders/viewit.h"
  57 #include "gromacs/linearalgebra/matrix.h"
  58 #include "gromacs/math/vec.h"
  59 #include "gromacs/statistics/statistics.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/futil.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/snprintf.h"
  64
  65 /* must correspond to char *avbar_opt[] declared in main() */
  66 enum {
  67     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  68 };
  69
  70 static void power_fit(int n, int nset, real **val, real *t)
  71 {
  72     real *x, *y, quality, a, b, r;
  73     int   s, i;
  74
  75     snew(x, n);
  76     snew(y, n);
  77
  78     if (t[0] > 0)
  79     {
  80         for (i = 0; i < n; i++)
  81         {
  82             if (t[0] > 0)
  83             {
  84                 x[i] = log(t[i]);
  85             }
  86         }
  87     }
  88     else
  89     {
  90         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  91         for (i = 0; i < n; i++)
  92         {
  93             x[i] = gmx_log1p(i);
  94         }
  95     }
  96
  97     for (s = 0; s < nset; s++)
  98     {
  99         i = 0;
 100         for (i = 0; i < n && val[s][i] >= 0; i++)
 101         {
 102             y[i] = log(val[s][i]);
 103         }
 104         if (i < n)
 105         {
 106             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 107         }
 108         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 109         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 110                 s+1, quality, a, exp(b));
 111     }
 112
 113     sfree(y);
 114     sfree(x);
 115 }
 116
 117 static real cosine_content(int nhp, int n, real *y)
 118 /* Assumes n equidistant points */
 119 {
 120     double fac, cosyint, yyint;
 121     int    i;
 122
 123     if (n < 2)
 124     {
 125         return 0;
 126     }
 127
 128     fac = M_PI*nhp/(n-1);
 129
 130     cosyint = 0;
 131     yyint   = 0;
 132     for (i = 0; i < n; i++)
 133     {
 134         cosyint += cos(fac*i)*y[i];
 135         yyint   += y[i]*y[i];
 136     }
 137
 138     return 2*cosyint*cosyint/(n*yyint);
 139 }
 140
 141 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 142                          const output_env_t oenv)
 143 {
 144     FILE *fp;
 145     int   s;
 146     real  cc;
 147
 148     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 149                   oenv);
 150
 151     for (s = 0; s < nset; s++)
 152     {
 153         cc = cosine_content(s+1, n, val[s]);
 154         fprintf(fp, " %d %g\n", s+1, cc);
 155         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 156                 s+1, 0.5*(s+1), cc);
 157     }
 158     fprintf(stdout, "\n");
 159
 160     xvgrclose(fp);
 161 }
 162
 163 static void regression_analysis(int n, gmx_bool bXYdy,
 164                                 real *x, int nset, real **val)
 165 {
 166     real S, chi2, a, b, da, db, r = 0;
 167     int  ok;
 168
 169     if (bXYdy || (nset == 1))
 170     {
 171         printf("Fitting data to a function f(x) = ax + b\n");
 172         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 173         printf("Error estimates will be given if w_i (sigma) values are given\n");
 174         printf("(use option -xydy).\n\n");
 175         if (bXYdy)
 176         {
 177             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 178             {
 179                 gmx_fatal(FARGS, "Error fitting the data: %s",
 180                           gmx_stats_message(ok));
 181             }
 182         }
 183         else
 184         {
 185             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 186             {
 187                 gmx_fatal(FARGS, "Error fitting the data: %s",
 188                           gmx_stats_message(ok));
 189             }
 190         }
 191         chi2 = sqr((n-2)*S);
 192         printf("Chi2                    = %g\n", chi2);
 193         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 194         printf("Correlation coefficient = %.1f%%\n", 100*r);
 195         printf("\n");
 196         if (bXYdy)
 197         {
 198             printf("a    = %g +/- %g\n", a, da);
 199             printf("b    = %g +/- %g\n", b, db);
 200         }
 201         else
 202         {
 203             printf("a    = %g\n", a);
 204             printf("b    = %g\n", b);
 205         }
 206     }
 207     else
 208     {
 209         double chi2, *a, **xx, *y;
 210         int    i, j;
 211
 212         snew(y, n);
 213         snew(xx, nset-1);
 214         for (j = 0; (j < nset-1); j++)
 215         {
 216             snew(xx[j], n);
 217         }
 218         for (i = 0; (i < n); i++)
 219         {
 220             y[i] = val[0][i];
 221             for (j = 1; (j < nset); j++)
 222             {
 223                 xx[j-1][i] = val[j][i];
 224             }
 225         }
 226         snew(a, nset-1);
 227         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 228         printf("Fitting %d data points in %d sets\n", n, nset-1);
 229         printf("chi2 = %g\n", chi2);
 230         printf("A =");
 231         for (i = 0; (i < nset-1); i++)
 232         {
 233             printf("  %g", a[i]);
 234             sfree(xx[i]);
 235         }
 236         printf("\n");
 237         sfree(xx);
 238         sfree(y);
 239         sfree(a);
 240     }
 241 }
 242
 243 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 244                const output_env_t oenv)
 245 {
 246     FILE          *fp;
 247     int            i, s;
 248     double         min, max;
 249     int            nbin;
 250     gmx_int64_t   *histo;
 251
 252     min = val[0][0];
 253     max = val[0][0];
 254     for (s = 0; s < nset; s++)
 255     {
 256         for (i = 0; i < n; i++)
 257         {
 258             if (val[s][i] < min)
 259             {
 260                 min = val[s][i];
 261             }
 262             else if (val[s][i] > max)
 263             {
 264                 max = val[s][i];
 265             }
 266         }
 267     }
 268
 269     min = binwidth*floor(min/binwidth);
 270     max = binwidth*ceil(max/binwidth);
 271     if (min != 0)
 272     {
 273         min -= binwidth;
 274     }
 275     max += binwidth;
 276
 277     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 278     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 279     snew(histo, nbin);
 280     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 281     for (s = 0; s < nset; s++)
 282     {
 283         for (i = 0; i < nbin; i++)
 284         {
 285             histo[i] = 0;
 286         }
 287         for (i = 0; i < n; i++)
 288         {
 289             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 290         }
 291         for (i = 0; i < nbin; i++)
 292         {
 293             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 294         }
 295         if (s < nset-1)
 296         {
 297             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 298         }
 299     }
 300     xvgrclose(fp);
 301 }
 302
 303 static int real_comp(const void *a, const void *b)
 304 {
 305     real dif = *(real *)a - *(real *)b;
 306
 307     if (dif < 0)
 308     {
 309         return -1;
 310     }
 311     else if (dif > 0)
 312     {
 313         return 1;
 314     }
 315     else
 316     {
 317         return 0;
 318     }
 319 }
 320
 321 static void average(const char *avfile, int avbar_opt,
 322                     int n, int nset, real **val, real *t)
 323 {
 324     FILE   *fp;
 325     int     i, s, edge = 0;
 326     double  av, var, err;
 327     real   *tmp = NULL;
 328
 329     fp = gmx_ffopen(avfile, "w");
 330     if ((avbar_opt == avbarERROR) && (nset == 1))
 331     {
 332         avbar_opt = avbarNONE;
 333     }
 334     if (avbar_opt != avbarNONE)
 335     {
 336         if (avbar_opt == avbar90)
 337         {
 338             snew(tmp, nset);
 339             fprintf(fp, "@TYPE xydydy\n");
 340             edge = (int)(nset*0.05+0.5);
 341             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 342                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 343         }
 344         else
 345         {
 346             fprintf(fp, "@TYPE xydy\n");
 347         }
 348     }
 349
 350     for (i = 0; i < n; i++)
 351     {
 352         av = 0;
 353         for (s = 0; s < nset; s++)
 354         {
 355             av += val[s][i];
 356         }
 357         av /= nset;
 358         fprintf(fp, " %g %g", t[i], av);
 359         var = 0;
 360         if (avbar_opt != avbarNONE)
 361         {
 362             if (avbar_opt == avbar90)
 363             {
 364                 for (s = 0; s < nset; s++)
 365                 {
 366                     tmp[s] = val[s][i];
 367                 }
 368                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 369                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 370             }
 371             else
 372             {
 373                 for (s = 0; s < nset; s++)
 374                 {
 375                     var += sqr(val[s][i]-av);
 376                 }
 377                 if (avbar_opt == avbarSTDDEV)
 378                 {
 379                     err = sqrt(var/nset);
 380                 }
 381                 else
 382                 {
 383                     err = sqrt(var/(nset*(nset-1)));
 384                 }
 385                 fprintf(fp, " %g", err);
 386             }
 387         }
 388         fprintf(fp, "\n");
 389     }
 390     gmx_ffclose(fp);
 391
 392     if (avbar_opt == avbar90)
 393     {
 394         sfree(tmp);
 395     }
 396 }
 397
 398 /*! \brief Compute final error estimate.
 399  *
 400  * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
 401  */
 402 static real optimal_error_estimate(double sigma, double fitparm[], real tTotal)
 403 {
 404     double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
 405     if ((tTotal <= 0) || (ss <= 0))
 406     {
 407         fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
 408                 tTotal, ss);
 409         return 0;
 410     }
 411
 412     return sigma*sqrt(2*ss/tTotal);
 413 }
 414
 415 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 416                            int nset, double *av, double *sig, real **val, real dt,
 417                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 418                            const output_env_t oenv)
 419 {
 420     FILE    *fp;
 421     int      bs, prev_bs, nbs, nb;
 422     real     spacing, nbr;
 423     int      s, i, j;
 424     double   blav, var;
 425     char   **leg;
 426     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 427     double   fitparm[3];
 428     real     ee, a, tau1, tau2;
 429
 430     if (n < 4)
 431     {
 432         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 433
 434         return;
 435     }
 436
 437     fp = xvgropen(eefile, "Error estimates",
 438                   "Block size (time)", "Error estimate", oenv);
 439     if (output_env_get_print_xvgr_codes(oenv))
 440     {
 441         fprintf(fp,
 442                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 443                 (n-1)*dt, n);
 444     }
 445     snew(leg, 2*nset);
 446     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 447     sfree(leg);
 448
 449     spacing = pow(2, 1.0/resol);
 450     snew(tbs, n);
 451     snew(ybs, n);
 452     snew(fitsig, n);
 453     for (s = 0; s < nset; s++)
 454     {
 455         nbs     = 0;
 456         prev_bs = 0;
 457         nbr     = nb_min;
 458         while (nbr <= n)
 459         {
 460             bs = n/(int)nbr;
 461             if (bs != prev_bs)
 462             {
 463                 nb  = n/bs;
 464                 var = 0;
 465                 for (i = 0; i < nb; i++)
 466                 {
 467                     blav = 0;
 468                     for (j = 0; j < bs; j++)
 469                     {
 470                         blav += val[s][bs*i+j];
 471                     }
 472                     var += sqr(av[s] - blav/bs);
 473                 }
 474                 tbs[nbs] = bs*dt;
 475                 if (sig[s] == 0)
 476                 {
 477                     ybs[nbs] = 0;
 478                 }
 479                 else
 480                 {
 481                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 482                 }
 483                 nbs++;
 484             }
 485             nbr    *= spacing;
 486             nb      = (int)(nbr+0.5);
 487             prev_bs = bs;
 488         }
 489         if (sig[s] == 0)
 490         {
 491             ee   = 0;
 492             a    = 1;
 493             tau1 = 0;
 494             tau2 = 0;
 495         }
 496         else
 497         {
 498             for (i = 0; i < nbs/2; i++)
 499             {
 500                 rtmp         = tbs[i];
 501                 tbs[i]       = tbs[nbs-1-i];
 502                 tbs[nbs-1-i] = rtmp;
 503                 rtmp         = ybs[i];
 504                 ybs[i]       = ybs[nbs-1-i];
 505                 ybs[nbs-1-i] = rtmp;
 506             }
 507             /* The initial slope of the normalized ybs^2 is 1.
 508              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 509              * From this we take our initial guess for tau1.
 510              */
 511             twooe = 2/exp(1);
 512             i     = -1;
 513             do
 514             {
 515                 i++;
 516                 tau1_est = tbs[i];
 517             }
 518             while (i < nbs - 1 &&
 519                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 520
 521             if (ybs[0] > ybs[1])
 522             {
 523                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 524                         "the std. error using single points is larger than that of blocks of 2 points\n"
 525                         "The error estimate might be inaccurate, check the fit\n",
 526                         s+1);
 527                 /* Use the total time as tau for the fitting weights */
 528                 tau_sig = (n - 1)*dt;
 529             }
 530             else
 531             {
 532                 tau_sig = tau1_est;
 533             }
 534
 535             if (debug)
 536             {
 537                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 538             }
 539
 540             /* Generate more or less appropriate sigma's,
 541              * also taking the density of points into account.
 542              */
 543             for (i = 0; i < nbs; i++)
 544             {
 545                 if (i == 0)
 546                 {
 547                     dens = tbs[1]/tbs[0] - 1;
 548                 }
 549                 else if (i == nbs-1)
 550                 {
 551                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 552                 }
 553                 else
 554                 {
 555                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 556                 }
 557                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 558             }
 559
 560             if (!bSingleExpFit)
 561             {
 562                 fitparm[0] = tau1_est;
 563                 fitparm[1] = 0.95;
 564                 /* We set the initial guess for tau2
 565                  * to halfway between tau1_est and the total time (on log scale).
 566                  */
 567                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 568                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 569                          bDebugMode(), effnERREST, fitparm, 0,
 570                          NULL);
 571             }
 572             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 573                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 574             {
 575                 if (!bSingleExpFit)
 576                 {
 577                     if (fitparm[2] > (n-1)*dt)
 578                     {
 579                         fprintf(stdout,
 580                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 581                                 "         the statistics might be bad\n",
 582                                 (n-1)*dt);
 583                     }
 584                     else
 585                     {
 586                         fprintf(stdout, "a fitted parameter is negative\n");
 587                     }
 588                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 589                             optimal_error_estimate(sig[s], fitparm, n*dt),
 590                             fitparm[1], fitparm[0], fitparm[2]);
 591                     /* Do a fit with tau2 fixed at the total time.
 592                      * One could also choose any other large value for tau2.
 593                      */
 594                     fitparm[0] = tau1_est;
 595                     fitparm[1] = 0.95;
 596                     fitparm[2] = (n-1)*dt;
 597                     fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 598                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 599                              effnERREST, fitparm, 4, NULL);
 600                 }
 601                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 602                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 603                 {
 604                     if (!bSingleExpFit)
 605                     {
 606                         fprintf(stdout, "a fitted parameter is negative\n");
 607                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 608                                 optimal_error_estimate(sig[s], fitparm, n*dt),
 609                                 fitparm[1], fitparm[0], fitparm[2]);
 610                     }
 611                     /* Do a single exponential fit */
 612                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 613                     fitparm[0] = tau1_est;
 614                     fitparm[1] = 1.0;
 615                     fitparm[2] = 0.0;
 616                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 617                              effnERREST, fitparm, 6, NULL);
 618                 }
 619             }
 620             ee   = optimal_error_estimate(sig[s], fitparm, n*dt);
 621             a    = fitparm[1];
 622             tau1 = fitparm[0];
 623             tau2 = fitparm[2];
 624         }
 625         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 626                 s+1, ee, a, tau1, tau2);
 627         if (output_env_get_xvg_format(oenv) == exvgXMGR)
 628         {
 629             fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 630             fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
 631                     optimal_error_estimate(sig[s], fitparm, n*dt));
 632         }
 633         else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
 634         {
 635             fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
 636             fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
 637                     optimal_error_estimate(sig[s], fitparm, n*dt));
 638         }
 639         for (i = 0; i < nbs; i++)
 640         {
 641             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 642                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 643         }
 644
 645         if (bFitAc)
 646         {
 647             int    fitlen;
 648             real  *ac, acint;
 649             double ac_fit[4];
 650
 651             snew(ac, n);
 652             for (i = 0; i < n; i++)
 653             {
 654                 ac[i] = val[s][i] - av[s];
 655                 if (i > 0)
 656                 {
 657                     fitsig[i] = sqrt(i);
 658                 }
 659                 else
 660                 {
 661                     fitsig[i] = 1;
 662                 }
 663             }
 664             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 665                             dt, eacNormal, 1, FALSE, TRUE,
 666                             FALSE, 0, 0, effnNONE);
 667
 668             fitlen = n/nb_min;
 669
 670             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 671             acint = 0.5*ac[0];
 672             for (i = 1; i <= fitlen/2; i++)
 673             {
 674                 acint += ac[i];
 675             }
 676             acint *= dt;
 677
 678             /* Generate more or less appropriate sigma's */
 679             for (i = 0; i <= fitlen; i++)
 680             {
 681                 fitsig[i] = sqrt(acint + dt*i);
 682             }
 683
 684             ac_fit[0] = 0.5*acint;
 685             ac_fit[1] = 0.95;
 686             ac_fit[2] = 10*acint;
 687             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 688                      bDebugMode(), effnEXPEXP, ac_fit, 0, NULL);
 689             ac_fit[3] = 1 - ac_fit[1];
 690
 691             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 692                     s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
 693                     ac_fit[1], ac_fit[0], ac_fit[2]);
 694
 695             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 696             for (i = 0; i < nbs; i++)
 697             {
 698                 fprintf(fp, "%g %g\n", tbs[i],
 699                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 700             }
 701
 702             sfree(ac);
 703         }
 704         if (s < nset-1)
 705         {
 706             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 707         }
 708     }
 709     sfree(fitsig);
 710     sfree(ybs);
 711     sfree(tbs);
 712     xvgrclose(fp);
 713 }
 714
 715 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 716                          gmx_bool bError, real fit_start)
 717 {
 718     const real tol = 1e-8;
 719     real      *kt;
 720     real       weight, d2;
 721     int        j;
 722
 723     please_cite(stdout, "Spoel2006b");
 724
 725     /* Compute negative derivative k(t) = -dc(t)/dt */
 726     if (!bError)
 727     {
 728         snew(kt, nn);
 729         compute_derivative(nn, time, val[0], kt);
 730         for (j = 0; (j < nn); j++)
 731         {
 732             kt[j] = -kt[j];
 733         }
 734         if (debug)
 735         {
 736             d2 = 0;
 737             for (j = 0; (j < nn); j++)
 738             {
 739                 d2 += sqr(kt[j] - val[3][j]);
 740             }
 741             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 742         }
 743         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 744                      temp);
 745         sfree(kt);
 746     }
 747     else if (nset == 6)
 748     {
 749         analyse_corr(nn, time, val[0], val[2], val[4],
 750                      val[1], val[3], val[5], fit_start, temp);
 751     }
 752     else
 753     {
 754         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 755         printf("Not doing anything. Sorry.\n");
 756     }
 757 }
 758
 759 static void filter(real flen, int n, int nset, real **val, real dt)
 760 {
 761     int     f, s, i, j;
 762     double *filt, sum, vf, fluc, fluctot;
 763
 764     f = (int)(flen/(2*dt));
 765     snew(filt, f+1);
 766     filt[0] = 1;
 767     sum     = 1;
 768     for (i = 1; i <= f; i++)
 769     {
 770         filt[i] = cos(M_PI*dt*i/flen);
 771         sum    += 2*filt[i];
 772     }
 773     for (i = 0; i <= f; i++)
 774     {
 775         filt[i] /= sum;
 776     }
 777     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 778     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 779     fluctot = 0;
 780     for (s = 0; s < nset; s++)
 781     {
 782         fluc = 0;
 783         for (i = f; i < n-f; i++)
 784         {
 785             vf = filt[0]*val[s][i];
 786             for (j = 1; j <= f; j++)
 787             {
 788                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 789             }
 790             fluc += sqr(val[s][i] - vf);
 791         }
 792         fluc    /= n - 2*f;
 793         fluctot += fluc;
 794         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 795     }
 796     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 797     fprintf(stdout, "\n");
 798
 799     sfree(filt);
 800 }
 801
 802 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 803                    int ny, real *x0, real **val,
 804                    int npargs, t_pargs *ppa, const output_env_t oenv,
 805                    const char *fn_fitted)
 806 {
 807     real   *c1 = NULL, *sig = NULL;
 808     double *fitparm;
 809     real    tendfit, tbeginfit;
 810     int     i, efitfn, nparm;
 811
 812     efitfn = get_acffitfn();
 813     nparm  = effnNparams(efitfn);
 814     fprintf(out, "Will fit to the following function:\n");
 815     fprintf(out, "%s\n", effnDescription(efitfn));
 816     c1 = val[n];
 817     if (bYdy)
 818     {
 819         c1  = val[n];
 820         sig = val[n+1];
 821         fprintf(out, "Using two columns as y and sigma values\n");
 822     }
 823     else
 824     {
 825         snew(sig, ny);
 826     }
 827     if (opt2parg_bSet("-beginfit", npargs, ppa))
 828     {
 829         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 830     }
 831     else
 832     {
 833         tbeginfit = x0[0];
 834     }
 835     if (opt2parg_bSet("-endfit", npargs, ppa))
 836     {
 837         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 838     }
 839     else
 840     {
 841         tendfit   = x0[ny-1];
 842     }
 843
 844     snew(fitparm, nparm);
 845     switch (efitfn)
 846     {
 847         case effnEXP1:
 848             fitparm[0] = 0.5;
 849             break;
 850         case effnEXP2:
 851             fitparm[0] = 0.5;
 852             fitparm[1] = c1[0];
 853             break;
 854         case effnEXPEXP:
 855             fitparm[0] = 1.0;
 856             fitparm[1] = 0.5*c1[0];
 857             fitparm[2] = 10.0;
 858             break;
 859         case effnEXP5:
 860             fitparm[0] = fitparm[2] = 0.5*c1[0];
 861             fitparm[1] = 10;
 862             fitparm[3] = 40;
 863             fitparm[4] = 0;
 864             break;
 865         case effnEXP7:
 866             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 867             fitparm[1] = 1;
 868             fitparm[3] = 10;
 869             fitparm[5] = 100;
 870             fitparm[6] = 0;
 871             break;
 872         case effnEXP9:
 873             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 874             fitparm[1] = 0.1;
 875             fitparm[3] = 1;
 876             fitparm[5] = 10;
 877             fitparm[7] = 100;
 878             fitparm[8] = 0;
 879             break;
 880         default:
 881             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 882             for (i = 0; (i < nparm); i++)
 883             {
 884                 fitparm[i] = 1;
 885             }
 886     }
 887     fprintf(out, "Starting parameters:\n");
 888     for (i = 0; (i < nparm); i++)
 889     {
 890         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 891     }
 892     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 893                  oenv, bDebugMode(), efitfn, fitparm, 0,
 894                  fn_fitted) > 0)
 895     {
 896         for (i = 0; (i < nparm); i++)
 897         {
 898             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 899         }
 900     }
 901     else
 902     {
 903         fprintf(out, "No solution was found\n");
 904     }
 905 }
 906
 907 static void do_ballistic(const char *balFile, int nData,
 908                          real *t, real **val, int nSet,
 909                          real balTime, int nBalExp,
 910                          const output_env_t oenv)
 911 {
 912     double     **ctd   = NULL, *td = NULL;
 913     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 914     static char *leg[] = {"Ac'(t)"};
 915     FILE        *fp;
 916     int          i, set;
 917
 918     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 919     {
 920         snew(ctd, nSet);
 921         snew(td,  nData);
 922
 923         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 924         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 925
 926         for (set = 0; set < nSet; set++)
 927         {
 928             snew(ctd[set], nData);
 929             for (i = 0; i < nData; i++)
 930             {
 931                 ctd[set][i] = (double)val[set][i];
 932                 if (set == 0)
 933                 {
 934                     td[i] = (double)t[i];
 935                 }
 936             }
 937
 938             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 939         }
 940
 941         for (i = 0; i < nData; i++)
 942         {
 943             fprintf(fp, "  %g", t[i]);
 944             for (set = 0; set < nSet; set++)
 945             {
 946                 fprintf(fp, "  %g", ctd[set][i]);
 947             }
 948             fprintf(fp, "\n");
 949         }
 950
 951
 952         for (set = 0; set < nSet; set++)
 953         {
 954             sfree(ctd[set]);
 955         }
 956         sfree(ctd);
 957         sfree(td);
 958         xvgrclose(fp);
 959     }
 960     else
 961     {
 962         printf("Number of data points is less than the number of parameters to fit\n."
 963                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 964     }
 965 }
 966
 967 static void do_geminate(const char *gemFile, int nData,
 968                         real *t, real **val, int nSet,
 969                         const real D, const real rcut, const real balTime,
 970                         const int nFitPoints, const real begFit, const real endFit,
 971                         const output_env_t oenv)
 972 {
 973     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 974     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 975                                       begFit, endFit, balTime, 1);
 976     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 977     FILE        *fp;
 978     int          i, set;
 979
 980     snew(ctd,    nSet);
 981     snew(ctdGem, nSet);
 982     snew(td,  nData);
 983
 984     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 985     xvgr_legend(fp, asize(leg), leg, oenv);
 986
 987     for (set = 0; set < nSet; set++)
 988     {
 989         snew(ctd[set],    nData);
 990         snew(ctdGem[set], nData);
 991         for (i = 0; i < nData; i++)
 992         {
 993             ctd[set][i] = (double)val[set][i];
 994             if (set == 0)
 995             {
 996                 td[i] = (double)t[i];
 997             }
 998         }
 999         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
1000     }
1001
1002     for (i = 0; i < nData; i++)
1003     {
1004         fprintf(fp, "  %g", t[i]);
1005         for (set = 0; set < nSet; set++)
1006         {
1007             fprintf(fp, "  %g", ctdGem[set][i]);
1008         }
1009         fprintf(fp, "\n");
1010     }
1011
1012     for (set = 0; set < nSet; set++)
1013     {
1014         sfree(ctd[set]);
1015         sfree(ctdGem[set]);
1016     }
1017     sfree(ctd);
1018     sfree(ctdGem);
1019     sfree(td);
1020     xvgrclose(fp);
1021 }
1022
1023 static void print_fitted_function(const char   *fitfile,
1024                                   const char   *fn_fitted,
1025                                   gmx_bool      bXYdy,
1026                                   int           nset,
1027                                   int           n,
1028                                   real         *t,
1029                                   real        **val,
1030                                   int           npargs,
1031                                   t_pargs      *ppa,
1032                                   output_env_t  oenv)
1033 {
1034     FILE *out_fit = gmx_ffopen(fitfile, "w");
1035     if (bXYdy && nset >= 2)
1036     {
1037         do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
1038                fn_fitted);
1039     }
1040     else
1041     {
1042         char *buf2 = NULL;
1043         int   s, buflen = 0;
1044         if (NULL != fn_fitted)
1045         {
1046             buflen = strlen(fn_fitted)+32;
1047             snew(buf2, buflen);
1048             strncpy(buf2, fn_fitted, buflen);
1049             buf2[strlen(buf2)-4] = '\0';
1050         }
1051         for (s = 0; s < nset; s++)
1052         {
1053             char *buf = NULL;
1054             if (NULL != fn_fitted)
1055             {
1056                 snew(buf, buflen);
1057                 snprintf(buf, n, "%s_%d.xvg", buf2, s);
1058             }
1059             do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
1060             sfree(buf);
1061         }
1062         sfree(buf2);
1063     }
1064     gmx_ffclose(out_fit);
1065 }
1066
1067 int gmx_analyze(int argc, char *argv[])
1068 {
1069     static const char *desc[] = {
1070         "[THISMODULE] reads an ASCII file and analyzes data sets.",
1071         "A line in the input file may start with a time",
1072         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1073         "Multiple sets can also be",
1074         "read when they are separated by & (option [TT]-n[tt]);",
1075         "in this case only one [IT]y[it]-value is read from each line.",
1076         "All lines starting with # and @ are skipped.",
1077         "All analyses can also be done for the derivative of a set",
1078         "(option [TT]-d[tt]).[PAR]",
1079
1080         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1081         "points are equidistant in time.[PAR]",
1082
1083         "[THISMODULE] always shows the average and standard deviation of each",
1084         "set, as well as the relative deviation of the third",
1085         "and fourth cumulant from those of a Gaussian distribution with the same",
1086         "standard deviation.[PAR]",
1087
1088         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1089         "Be sure that the time interval between data points is",
1090         "much shorter than the time scale of the autocorrelation.[PAR]",
1091
1092         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1093         "i/2 periods. The formula is::",
1094         "",
1095         "  [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
1096         "",
1097         "This is useful for principal components obtained from covariance",
1098         "analysis, since the principal components of random diffusion are",
1099         "pure cosines.[PAR]",
1100
1101         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1102
1103         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1104
1105         "Option [TT]-av[tt] produces the average over the sets.",
1106         "Error bars can be added with the option [TT]-errbar[tt].",
1107         "The errorbars can represent the standard deviation, the error",
1108         "(assuming the points are independent) or the interval containing",
1109         "90% of the points, by discarding 5% of the points at the top and",
1110         "the bottom.[PAR]",
1111
1112         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1113         "A set is divided in a number of blocks and averages are calculated for",
1114         "each block. The error for the total average is calculated from",
1115         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1116         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1117         "These errors are plotted as a function of the block size.",
1118         "Also an analytical block average curve is plotted, assuming",
1119         "that the autocorrelation is a sum of two exponentials.",
1120         "The analytical curve for the block average is::",
1121         "",
1122         "  [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1123         "                         (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1124         "",
1125         "where T is the total time.",
1126         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1127         "When the actual block average is very close to the analytical curve,",
1128         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1129         "The complete derivation is given in",
1130         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1131
1132         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1133         "component from a hydrogen bond autocorrelation function by the fitting",
1134         "of a sum of exponentials, as described in e.g.",
1135         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1136         "is the one with the most negative coefficient in the exponential,",
1137         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1138         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1139
1140         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1141         "(and optionally kD) to the hydrogen bond autocorrelation function",
1142         "according to the reversible geminate recombination model. Removal of",
1143         "the ballistic component first is strongly advised. The model is presented in",
1144         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1145
1146         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1147         "of each set and over all sets with respect to a filtered average.",
1148         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1149         "to len/2. len is supplied with the option [TT]-filter[tt].",
1150         "This filter reduces oscillations with period len/2 and len by a factor",
1151         "of 0.79 and 0.33 respectively.[PAR]",
1152
1153         "Option [TT]-g[tt] fits the data to the function given with option",
1154         "[TT]-fitfn[tt].[PAR]",
1155
1156         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1157         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1158         "zero or with a negative value are ignored.[PAR]"
1159
1160         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1161         "on output from [gmx-hbond]. The input file can be taken directly",
1162         "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1163         "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1164         "curves that make sense in the context of molecular dynamics, mainly",
1165         "exponential curves. More information is in the manual. To check the output",
1166         "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1167         "original data and the fitted function to a new data file. The fitting",
1168         "parameters are stored as comment in the output file."
1169     };
1170     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1171     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1172     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1173     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1174     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1175     static real        temp       = 298.15, fit_start = 1, fit_end = 60, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1176
1177     /* must correspond to enum avbar* declared at beginning of file */
1178     static const char *avbar_opt[avbarNR+1] = {
1179         NULL, "none", "stddev", "error", "90", NULL
1180     };
1181
1182     t_pargs            pa[] = {
1183         { "-time",    FALSE, etBOOL, {&bHaveT},
1184           "Expect a time in the input" },
1185         { "-b",       FALSE, etREAL, {&tb},
1186           "First time to read from set" },
1187         { "-e",       FALSE, etREAL, {&te},
1188           "Last time to read from set" },
1189         { "-n",       FALSE, etINT, {&nsets_in},
1190           "Read this number of sets separated by &" },
1191         { "-d",       FALSE, etBOOL, {&bDer},
1192           "Use the derivative" },
1193         { "-dp",      FALSE, etINT, {&d},
1194           "HIDDENThe derivative is the difference over this number of points" },
1195         { "-bw",      FALSE, etREAL, {&binwidth},
1196           "Binwidth for the distribution" },
1197         { "-errbar",  FALSE, etENUM, {avbar_opt},
1198           "Error bars for [TT]-av[tt]" },
1199         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1200           "Integrate data function(s) numerically using trapezium rule" },
1201         { "-aver_start", FALSE, etREAL, {&aver_start},
1202           "Start averaging the integral from here" },
1203         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1204           "Interpret second data set as error in the y values for integrating" },
1205         { "-regression", FALSE, etBOOL, {&bRegression},
1206           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1207         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1208           "Do a Luzar and Chandler analysis on a correlation function and "
1209           "related as produced by [gmx-hbond]. When in addition the "
1210           "[TT]-xydy[tt] flag is given the second and fourth column will be "
1211           "interpreted as errors in c(t) and n(t)." },
1212         { "-temp",    FALSE, etREAL, {&temp},
1213           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1214         { "-fitstart", FALSE, etREAL, {&fit_start},
1215           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1216         { "-fitend", FALSE, etREAL, {&fit_end},
1217           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1218         { "-nbmin",   FALSE, etINT, {&nb_min},
1219           "HIDDENMinimum number of blocks for block averaging" },
1220         { "-resol", FALSE, etINT, {&resol},
1221           "HIDDENResolution for the block averaging, block size increases with"
1222           " a factor 2^(1/resol)" },
1223         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1224           "HIDDENAlways use a single exponential fit for the error estimate" },
1225         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1226           "HIDDENAllow a negative long-time correlation" },
1227         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1228           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1229         { "-filter",  FALSE, etREAL, {&filtlen},
1230           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1231         { "-power", FALSE, etBOOL, {&bPower},
1232           "Fit data to: b t^a" },
1233         { "-subav", FALSE, etBOOL, {&bSubAv},
1234           "Subtract the average before autocorrelating" },
1235         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1236           "Calculate one ACF over all sets" },
1237         { "-nbalexp", FALSE, etINT, {&nBalExp},
1238           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1239         { "-baltime", FALSE, etREAL, {&balTime},
1240           "HIDDENTime up to which the ballistic component will be fitted" },
1241 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1242 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1243 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1244 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1245 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1246 /*       "What type of gminate recombination to use"}, */
1247 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1248 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1249     };
1250 #define NPA asize(pa)
1251
1252     FILE           *out, *out_fit;
1253     int             n, nlast, s, nset, i, j = 0;
1254     real          **val, *t, dt, tot, error;
1255     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1256     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1257     output_env_t    oenv;
1258
1259     t_filenm        fnm[] = {
1260         { efXVG, "-f",    "graph",    ffREAD   },
1261         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1262         { efXVG, "-msd",  "msd",      ffOPTWR  },
1263         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1264         { efXVG, "-dist", "distr",    ffOPTWR  },
1265         { efXVG, "-av",   "average",  ffOPTWR  },
1266         { efXVG, "-ee",   "errest",   ffOPTWR  },
1267         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1268         { efXVG, "-fitted", "fitted", ffOPTWR },
1269 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1270         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1271     };
1272 #define NFILE asize(fnm)
1273
1274     int      npargs;
1275     t_pargs *ppa;
1276
1277     npargs = asize(pa);
1278     ppa    = add_acf_pargs(&npargs, pa);
1279
1280     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1281                            NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1282     {
1283         return 0;
1284     }
1285
1286     acfile   = opt2fn_null("-ac", NFILE, fnm);
1287     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1288     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1289     distfile = opt2fn_null("-dist", NFILE, fnm);
1290     avfile   = opt2fn_null("-av", NFILE, fnm);
1291     eefile   = opt2fn_null("-ee", NFILE, fnm);
1292     balfile  = opt2fn_null("-bal", NFILE, fnm);
1293 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1294     /* When doing autocorrelation we don't want a fitlog for fitting
1295      * the function itself (not the acf) when the user did not ask for it.
1296      */
1297     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1298     {
1299         fitfile  = opt2fn("-g", NFILE, fnm);
1300     }
1301     else
1302     {
1303         fitfile  = opt2fn_null("-g", NFILE, fnm);
1304     }
1305
1306     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1307                         opt2parg_bSet("-b", npargs, ppa), tb,
1308                         opt2parg_bSet("-e", npargs, ppa), te,
1309                         nsets_in, &nset, &n, &dt, &t);
1310     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1311
1312     if (bDer)
1313     {
1314         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1315                d, d);
1316         n -= d;
1317         for (s = 0; s < nset; s++)
1318         {
1319             for (i = 0; (i < n); i++)
1320             {
1321                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1322             }
1323         }
1324     }
1325
1326     if (bIntegrate)
1327     {
1328         real sum, stddev;
1329
1330         printf("Calculating the integral using the trapezium rule\n");
1331
1332         if (bXYdy)
1333         {
1334             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1335             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1336         }
1337         else
1338         {
1339             for (s = 0; s < nset; s++)
1340             {
1341                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1342                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1343             }
1344         }
1345     }
1346
1347     if (fitfile != NULL)
1348     {
1349         print_fitted_function(fitfile,
1350                               opt2fn_null("-fitted", NFILE, fnm),
1351                               bXYdy, nset,
1352                               n, t, val,
1353                               npargs, ppa,
1354                               oenv);
1355     }
1356
1357     printf("                                      std. dev.    relative deviation of\n");
1358     printf("                       standard       ---------   cumulants from those of\n");
1359     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1360     printf("                                                      cum. 3   cum. 4\n");
1361     snew(av, nset);
1362     snew(sig, nset);
1363     for (s = 0; (s < nset); s++)
1364     {
1365         cum1 = 0;
1366         cum2 = 0;
1367         cum3 = 0;
1368         cum4 = 0;
1369         for (i = 0; (i < n); i++)
1370         {
1371             cum1 += val[s][i];
1372         }
1373         cum1 /= n;
1374         for (i = 0; (i < n); i++)
1375         {
1376             db    = val[s][i]-cum1;
1377             cum2 += db*db;
1378             cum3 += db*db*db;
1379             cum4 += db*db*db*db;
1380         }
1381         cum2  /= n;
1382         cum3  /= n;
1383         cum4  /= n;
1384         av[s]  = cum1;
1385         sig[s] = sqrt(cum2);
1386         if (n > 1)
1387         {
1388             error = sqrt(cum2/(n-1));
1389         }
1390         else
1391         {
1392             error = 0;
1393         }
1394         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1395                s+1, av[s], sig[s], error,
1396                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1397                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1398     }
1399     printf("\n");
1400
1401     if (filtlen)
1402     {
1403         filter(filtlen, n, nset, val, dt);
1404     }
1405
1406     if (msdfile)
1407     {
1408         out = xvgropen(msdfile, "Mean square displacement",
1409                        "time", "MSD (nm\\S2\\N)", oenv);
1410         nlast = (int)(n*frac);
1411         for (s = 0; s < nset; s++)
1412         {
1413             for (j = 0; j <= nlast; j++)
1414             {
1415                 if (j % 100 == 0)
1416                 {
1417                     fprintf(stderr, "\r%d", j);
1418                 }
1419                 tot = 0;
1420                 for (i = 0; i < n-j; i++)
1421                 {
1422                     tot += sqr(val[s][i]-val[s][i+j]);
1423                 }
1424                 tot /= (real)(n-j);
1425                 fprintf(out, " %g %8g\n", dt*j, tot);
1426             }
1427             if (s < nset-1)
1428             {
1429                 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1430             }
1431         }
1432         xvgrclose(out);
1433         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1434     }
1435     if (ccfile)
1436     {
1437         plot_coscont(ccfile, n, nset, val, oenv);
1438     }
1439
1440     if (distfile)
1441     {
1442         histogram(distfile, binwidth, n, nset, val, oenv);
1443     }
1444     if (avfile)
1445     {
1446         average(avfile, nenum(avbar_opt), n, nset, val, t);
1447     }
1448     if (eefile)
1449     {
1450         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1451                        bEeFitAc, bEESEF, bEENLC, oenv);
1452     }
1453     if (balfile)
1454     {
1455         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1456     }
1457 /*   if (gemfile) */
1458 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1459 /*                   nFitPoints, fit_start, fit_end, oenv); */
1460     if (bPower)
1461     {
1462         power_fit(n, nset, val, t);
1463     }
1464
1465     if (acfile != NULL)
1466     {
1467         if (bSubAv)
1468         {
1469             for (s = 0; s < nset; s++)
1470             {
1471                 for (i = 0; i < n; i++)
1472                 {
1473                     val[s][i] -= av[s];
1474                 }
1475             }
1476         }
1477         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1478                     eacNormal, bAverCorr);
1479     }
1480
1481     if (bRegression)
1482     {
1483         regression_analysis(n, bXYdy, t, nset, val);
1484     }
1485
1486     if (bLuzar)
1487     {
1488         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1489     }
1490
1491     view_all(oenv, NFILE, fnm);
1492
1493     return 0;
1494 }