src/tools/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41 #include <math.h>
  42 #include <string.h>
  43 #include "statutil.h"
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "macros.h"
  48 #include "gmx_fatal.h"
  49 #include "vec.h"
  50 #include "copyrite.h"
  51 #include "futil.h"
  52 #include "readinp.h"
  53 #include "statutil.h"
  54 #include "txtdump.h"
  55 #include "gstat.h"
  56 #include "gmx_matrix.h"
  57 #include "gmx_statistics.h"
  58 #include "xvgr.h"
  59 #include "gmx_ana.h"
  60 #include "geminate.h"
  61
  62 /* must correspond to char *avbar_opt[] declared in main() */
  63 enum {
  64     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  65 };
  66
  67 static void power_fit(int n, int nset, real **val, real *t)
  68 {
  69     real *x, *y, quality, a, b, r;
  70     int   s, i;
  71
  72     snew(x, n);
  73     snew(y, n);
  74
  75     if (t[0] > 0)
  76     {
  77         for (i = 0; i < n; i++)
  78         {
  79             if (t[0] > 0)
  80             {
  81                 x[i] = log(t[i]);
  82             }
  83         }
  84     }
  85     else
  86     {
  87         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  88         for (i = 0; i < n; i++)
  89         {
  90             x[i] = log(i+1);
  91         }
  92     }
  93
  94     for (s = 0; s < nset; s++)
  95     {
  96         i = 0;
  97         for (i = 0; i < n && val[s][i] >= 0; i++)
  98         {
  99             y[i] = log(val[s][i]);
 100         }
 101         if (i < n)
 102         {
 103             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 104         }
 105         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 106         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 107                 s+1, quality, a, exp(b));
 108     }
 109
 110     sfree(y);
 111     sfree(x);
 112 }
 113
 114 static real cosine_content(int nhp, int n, real *y)
 115 /* Assumes n equidistant points */
 116 {
 117     double fac, cosyint, yyint;
 118     int    i;
 119
 120     if (n < 2)
 121     {
 122         return 0;
 123     }
 124
 125     fac = M_PI*nhp/(n-1);
 126
 127     cosyint = 0;
 128     yyint   = 0;
 129     for (i = 0; i < n; i++)
 130     {
 131         cosyint += cos(fac*i)*y[i];
 132         yyint   += y[i]*y[i];
 133     }
 134
 135     return 2*cosyint*cosyint/(n*yyint);
 136 }
 137
 138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 139                          const output_env_t oenv)
 140 {
 141     FILE *fp;
 142     int   s;
 143     real  cc;
 144
 145     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 146                   oenv);
 147
 148     for (s = 0; s < nset; s++)
 149     {
 150         cc = cosine_content(s+1, n, val[s]);
 151         fprintf(fp, " %d %g\n", s+1, cc);
 152         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 153                 s+1, 0.5*(s+1), cc);
 154     }
 155     fprintf(stdout, "\n");
 156
 157     ffclose(fp);
 158 }
 159
 160 static void regression_analysis(int n, gmx_bool bXYdy,
 161                                 real *x, int nset, real **val)
 162 {
 163     real S, chi2, a, b, da, db, r = 0;
 164     int  ok;
 165
 166     if (bXYdy || (nset == 1))
 167     {
 168         printf("Fitting data to a function f(x) = ax + b\n");
 169         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 170         printf("Error estimates will be given if w_i (sigma) values are given\n");
 171         printf("(use option -xydy).\n\n");
 172         if (bXYdy)
 173         {
 174             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 175             {
 176                 gmx_fatal(FARGS, "Error fitting the data: %s",
 177                           gmx_stats_message(ok));
 178             }
 179         }
 180         else
 181         {
 182             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 183             {
 184                 gmx_fatal(FARGS, "Error fitting the data: %s",
 185                           gmx_stats_message(ok));
 186             }
 187         }
 188         chi2 = sqr((n-2)*S);
 189         printf("Chi2                    = %g\n", chi2);
 190         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 191         printf("Correlation coefficient = %.1f%%\n", 100*r);
 192         printf("\n");
 193         if (bXYdy)
 194         {
 195             printf("a    = %g +/- %g\n", a, da);
 196             printf("b    = %g +/- %g\n", b, db);
 197         }
 198         else
 199         {
 200             printf("a    = %g\n", a);
 201             printf("b    = %g\n", b);
 202         }
 203     }
 204     else
 205     {
 206         double chi2, *a, **xx, *y;
 207         int    i, j;
 208
 209         snew(y, n);
 210         snew(xx, nset-1);
 211         for (j = 0; (j < nset-1); j++)
 212         {
 213             snew(xx[j], n);
 214         }
 215         for (i = 0; (i < n); i++)
 216         {
 217             y[i] = val[0][i];
 218             for (j = 1; (j < nset); j++)
 219             {
 220                 xx[j-1][i] = val[j][i];
 221             }
 222         }
 223         snew(a, nset-1);
 224         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 225         printf("Fitting %d data points in %d sets\n", n, nset-1);
 226         printf("chi2 = %g\n", chi2);
 227         printf("A =");
 228         for (i = 0; (i < nset-1); i++)
 229         {
 230             printf("  %g", a[i]);
 231             sfree(xx[i]);
 232         }
 233         printf("\n");
 234         sfree(xx);
 235         sfree(y);
 236         sfree(a);
 237     }
 238 }
 239
 240 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 241                const output_env_t oenv)
 242 {
 243     FILE          *fp;
 244     int            i, s;
 245     double         min, max;
 246     int            nbin;
 247 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 248     long long int *histo;
 249 #else
 250     double        *histo;
 251 #endif
 252
 253     min = val[0][0];
 254     max = val[0][0];
 255     for (s = 0; s < nset; s++)
 256     {
 257         for (i = 0; i < n; i++)
 258         {
 259             if (val[s][i] < min)
 260             {
 261                 min = val[s][i];
 262             }
 263             else if (val[s][i] > max)
 264             {
 265                 max = val[s][i];
 266             }
 267         }
 268     }
 269
 270     min = binwidth*floor(min/binwidth);
 271     max = binwidth*ceil(max/binwidth);
 272     if (min != 0)
 273     {
 274         min -= binwidth;
 275     }
 276     max += binwidth;
 277
 278     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 279     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 280     snew(histo, nbin);
 281     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 282     for (s = 0; s < nset; s++)
 283     {
 284         for (i = 0; i < nbin; i++)
 285         {
 286             histo[i] = 0;
 287         }
 288         for (i = 0; i < n; i++)
 289         {
 290             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 291         }
 292         for (i = 0; i < nbin; i++)
 293         {
 294             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 295         }
 296         if (s < nset-1)
 297         {
 298             fprintf(fp, "&\n");
 299         }
 300     }
 301     ffclose(fp);
 302 }
 303
 304 static int real_comp(const void *a, const void *b)
 305 {
 306     real dif = *(real *)a - *(real *)b;
 307
 308     if (dif < 0)
 309     {
 310         return -1;
 311     }
 312     else if (dif > 0)
 313     {
 314         return 1;
 315     }
 316     else
 317     {
 318         return 0;
 319     }
 320 }
 321
 322 static void average(const char *avfile, int avbar_opt,
 323                     int n, int nset, real **val, real *t)
 324 {
 325     FILE   *fp;
 326     int     i, s, edge = 0;
 327     double  av, var, err;
 328     real   *tmp = NULL;
 329
 330     fp = ffopen(avfile, "w");
 331     if ((avbar_opt == avbarERROR) && (nset == 1))
 332     {
 333         avbar_opt = avbarNONE;
 334     }
 335     if (avbar_opt != avbarNONE)
 336     {
 337         if (avbar_opt == avbar90)
 338         {
 339             snew(tmp, nset);
 340             fprintf(fp, "@TYPE xydydy\n");
 341             edge = (int)(nset*0.05+0.5);
 342             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 343                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 344         }
 345         else
 346         {
 347             fprintf(fp, "@TYPE xydy\n");
 348         }
 349     }
 350
 351     for (i = 0; i < n; i++)
 352     {
 353         av = 0;
 354         for (s = 0; s < nset; s++)
 355         {
 356             av += val[s][i];
 357         }
 358         av /= nset;
 359         fprintf(fp, " %g %g", t[i], av);
 360         var = 0;
 361         if (avbar_opt != avbarNONE)
 362         {
 363             if (avbar_opt == avbar90)
 364             {
 365                 for (s = 0; s < nset; s++)
 366                 {
 367                     tmp[s] = val[s][i];
 368                 }
 369                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 370                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 371             }
 372             else
 373             {
 374                 for (s = 0; s < nset; s++)
 375                 {
 376                     var += sqr(val[s][i]-av);
 377                 }
 378                 if (avbar_opt == avbarSTDDEV)
 379                 {
 380                     err = sqrt(var/nset);
 381                 }
 382                 else
 383                 {
 384                     err = sqrt(var/(nset*(nset-1)));
 385                 }
 386                 fprintf(fp, " %g", err);
 387             }
 388         }
 389         fprintf(fp, "\n");
 390     }
 391     ffclose(fp);
 392
 393     if (avbar_opt == avbar90)
 394     {
 395         sfree(tmp);
 396     }
 397 }
 398
 399 static real anal_ee_inf(real *parm, real T)
 400 {
 401     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 402 }
 403
 404 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 405                            int nset, double *av, double *sig, real **val, real dt,
 406                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 407                            const output_env_t oenv)
 408 {
 409     FILE    *fp;
 410     int      bs, prev_bs, nbs, nb;
 411     real     spacing, nbr;
 412     int      s, i, j;
 413     double   blav, var;
 414     char   **leg;
 415     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 416     real     fitparm[4];
 417     real     ee, a, tau1, tau2;
 418
 419     if (n < 4)
 420     {
 421         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 422
 423         return;
 424     }
 425
 426     fp = xvgropen(eefile, "Error estimates",
 427                   "Block size (time)", "Error estimate", oenv);
 428     if (output_env_get_print_xvgr_codes(oenv))
 429     {
 430         fprintf(fp,
 431                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 432                 (n-1)*dt, n);
 433     }
 434     snew(leg, 2*nset);
 435     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 436     sfree(leg);
 437
 438     spacing = pow(2, 1.0/resol);
 439     snew(tbs, n);
 440     snew(ybs, n);
 441     snew(fitsig, n);
 442     for (s = 0; s < nset; s++)
 443     {
 444         nbs     = 0;
 445         prev_bs = 0;
 446         nbr     = nb_min;
 447         while (nbr <= n)
 448         {
 449             bs = n/(int)nbr;
 450             if (bs != prev_bs)
 451             {
 452                 nb  = n/bs;
 453                 var = 0;
 454                 for (i = 0; i < nb; i++)
 455                 {
 456                     blav = 0;
 457                     for (j = 0; j < bs; j++)
 458                     {
 459                         blav += val[s][bs*i+j];
 460                     }
 461                     var += sqr(av[s] - blav/bs);
 462                 }
 463                 tbs[nbs] = bs*dt;
 464                 if (sig[s] == 0)
 465                 {
 466                     ybs[nbs] = 0;
 467                 }
 468                 else
 469                 {
 470                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 471                 }
 472                 nbs++;
 473             }
 474             nbr    *= spacing;
 475             nb      = (int)(nbr+0.5);
 476             prev_bs = bs;
 477         }
 478         if (sig[s] == 0)
 479         {
 480             ee   = 0;
 481             a    = 1;
 482             tau1 = 0;
 483             tau2 = 0;
 484         }
 485         else
 486         {
 487             for (i = 0; i < nbs/2; i++)
 488             {
 489                 rtmp         = tbs[i];
 490                 tbs[i]       = tbs[nbs-1-i];
 491                 tbs[nbs-1-i] = rtmp;
 492                 rtmp         = ybs[i];
 493                 ybs[i]       = ybs[nbs-1-i];
 494                 ybs[nbs-1-i] = rtmp;
 495             }
 496             /* The initial slope of the normalized ybs^2 is 1.
 497              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 498              * From this we take our initial guess for tau1.
 499              */
 500             twooe = 2/exp(1);
 501             i     = -1;
 502             do
 503             {
 504                 i++;
 505                 tau1_est = tbs[i];
 506             }
 507             while (i < nbs - 1 &&
 508                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 509
 510             if (ybs[0] > ybs[1])
 511             {
 512                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 513                         "the std. error using single points is larger than that of blocks of 2 points\n"
 514                         "The error estimate might be inaccurate, check the fit\n",
 515                         s+1);
 516                 /* Use the total time as tau for the fitting weights */
 517                 tau_sig = (n - 1)*dt;
 518             }
 519             else
 520             {
 521                 tau_sig = tau1_est;
 522             }
 523
 524             if (debug)
 525             {
 526                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 527             }
 528
 529             /* Generate more or less appropriate sigma's,
 530              * also taking the density of points into account.
 531              */
 532             for (i = 0; i < nbs; i++)
 533             {
 534                 if (i == 0)
 535                 {
 536                     dens = tbs[1]/tbs[0] - 1;
 537                 }
 538                 else if (i == nbs-1)
 539                 {
 540                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 541                 }
 542                 else
 543                 {
 544                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 545                 }
 546                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 547             }
 548
 549             if (!bSingleExpFit)
 550             {
 551                 fitparm[0] = tau1_est;
 552                 fitparm[1] = 0.95;
 553                 /* We set the initial guess for tau2
 554                  * to halfway between tau1_est and the total time (on log scale).
 555                  */
 556                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 557                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 558                          bDebugMode(), effnERREST, fitparm, 0);
 559                 fitparm[3] = 1-fitparm[1];
 560             }
 561             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 562                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 563             {
 564                 if (!bSingleExpFit)
 565                 {
 566                     if (fitparm[2] > (n-1)*dt)
 567                     {
 568                         fprintf(stdout,
 569                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 570                                 "         the statistics might be bad\n",
 571                                 (n-1)*dt);
 572                     }
 573                     else
 574                     {
 575                         fprintf(stdout, "a fitted parameter is negative\n");
 576                     }
 577                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 578                             sig[s]*anal_ee_inf(fitparm, n*dt),
 579                             fitparm[1], fitparm[0], fitparm[2]);
 580                     /* Do a fit with tau2 fixed at the total time.
 581                      * One could also choose any other large value for tau2.
 582                      */
 583                     fitparm[0] = tau1_est;
 584                     fitparm[1] = 0.95;
 585                     fitparm[2] = (n-1)*dt;
 586                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 587                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 588                              effnERREST, fitparm, 4);
 589                     fitparm[3] = 1-fitparm[1];
 590                 }
 591                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 592                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 593                 {
 594                     if (!bSingleExpFit)
 595                     {
 596                         fprintf(stdout, "a fitted parameter is negative\n");
 597                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 598                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 599                                 fitparm[1], fitparm[0], fitparm[2]);
 600                     }
 601                     /* Do a single exponential fit */
 602                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 603                     fitparm[0] = tau1_est;
 604                     fitparm[1] = 1.0;
 605                     fitparm[2] = 0.0;
 606                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 607                              effnERREST, fitparm, 6);
 608                     fitparm[3] = 1-fitparm[1];
 609                 }
 610             }
 611             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 612             a    = fitparm[1];
 613             tau1 = fitparm[0];
 614             tau2 = fitparm[2];
 615         }
 616         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 617                 s+1, ee, a, tau1, tau2);
 618         fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 619         fprintf(fp, "@ legend string %d \"ee %6g\"\n",
 620                 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 621         for (i = 0; i < nbs; i++)
 622         {
 623             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 624                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 625         }
 626
 627         if (bFitAc)
 628         {
 629             int   fitlen;
 630             real *ac, acint, ac_fit[4];
 631
 632             snew(ac, n);
 633             for (i = 0; i < n; i++)
 634             {
 635                 ac[i] = val[s][i] - av[s];
 636                 if (i > 0)
 637                 {
 638                     fitsig[i] = sqrt(i);
 639                 }
 640                 else
 641                 {
 642                     fitsig[i] = 1;
 643                 }
 644             }
 645             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 646                             dt, eacNormal, 1, FALSE, TRUE,
 647                             FALSE, 0, 0, effnNONE, 0);
 648
 649             fitlen = n/nb_min;
 650
 651             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 652             acint = 0.5*ac[0];
 653             for (i = 1; i <= fitlen/2; i++)
 654             {
 655                 acint += ac[i];
 656             }
 657             acint *= dt;
 658
 659             /* Generate more or less appropriate sigma's */
 660             for (i = 0; i <= fitlen; i++)
 661             {
 662                 fitsig[i] = sqrt(acint + dt*i);
 663             }
 664
 665             ac_fit[0] = 0.5*acint;
 666             ac_fit[1] = 0.95;
 667             ac_fit[2] = 10*acint;
 668             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 669                      bDebugMode(), effnEXP3, ac_fit, 0);
 670             ac_fit[3] = 1 - ac_fit[1];
 671
 672             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 673                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 674                     ac_fit[1], ac_fit[0], ac_fit[2]);
 675
 676             fprintf(fp, "&\n");
 677             for (i = 0; i < nbs; i++)
 678             {
 679                 fprintf(fp, "%g %g\n", tbs[i],
 680                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 681             }
 682
 683             sfree(ac);
 684         }
 685         if (s < nset-1)
 686         {
 687             fprintf(fp, "&\n");
 688         }
 689     }
 690     sfree(fitsig);
 691     sfree(ybs);
 692     sfree(tbs);
 693     ffclose(fp);
 694 }
 695
 696 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 697                          gmx_bool bError, real fit_start, real smooth_tail_start,
 698                          const output_env_t oenv)
 699 {
 700     const real tol = 1e-8;
 701     real      *kt;
 702     real       weight, d2;
 703     int        j;
 704
 705     please_cite(stdout, "Spoel2006b");
 706
 707     /* Compute negative derivative k(t) = -dc(t)/dt */
 708     if (!bError)
 709     {
 710         snew(kt, nn);
 711         compute_derivative(nn, time, val[0], kt);
 712         for (j = 0; (j < nn); j++)
 713         {
 714             kt[j] = -kt[j];
 715         }
 716         if (debug)
 717         {
 718             d2 = 0;
 719             for (j = 0; (j < nn); j++)
 720             {
 721                 d2 += sqr(kt[j] - val[3][j]);
 722             }
 723             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 724         }
 725         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 726                      temp, smooth_tail_start, oenv);
 727         sfree(kt);
 728     }
 729     else if (nset == 6)
 730     {
 731         analyse_corr(nn, time, val[0], val[2], val[4],
 732                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 733     }
 734     else
 735     {
 736         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 737         printf("Not doing anything. Sorry.\n");
 738     }
 739 }
 740
 741 static void filter(real flen, int n, int nset, real **val, real dt,
 742                    const output_env_t oenv)
 743 {
 744     int     f, s, i, j;
 745     double *filt, sum, vf, fluc, fluctot;
 746
 747     f = (int)(flen/(2*dt));
 748     snew(filt, f+1);
 749     filt[0] = 1;
 750     sum     = 1;
 751     for (i = 1; i <= f; i++)
 752     {
 753         filt[i] = cos(M_PI*dt*i/flen);
 754         sum    += 2*filt[i];
 755     }
 756     for (i = 0; i <= f; i++)
 757     {
 758         filt[i] /= sum;
 759     }
 760     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 761     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 762     fluctot = 0;
 763     for (s = 0; s < nset; s++)
 764     {
 765         fluc = 0;
 766         for (i = f; i < n-f; i++)
 767         {
 768             vf = filt[0]*val[s][i];
 769             for (j = 1; j <= f; j++)
 770             {
 771                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 772             }
 773             fluc += sqr(val[s][i] - vf);
 774         }
 775         fluc    /= n - 2*f;
 776         fluctot += fluc;
 777         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 778     }
 779     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 780     fprintf(stdout, "\n");
 781
 782     sfree(filt);
 783 }
 784
 785 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 786                    int ny, real *x0, real **val,
 787                    int npargs, t_pargs *ppa, const output_env_t oenv)
 788 {
 789     real *c1 = NULL, *sig = NULL, *fitparm;
 790     real  tendfit, tbeginfit;
 791     int   i, efitfn, nparm;
 792
 793     efitfn = get_acffitfn();
 794     nparm  = nfp_ffn[efitfn];
 795     fprintf(out, "Will fit to the following function:\n");
 796     fprintf(out, "%s\n", longs_ffn[efitfn]);
 797     c1 = val[n];
 798     if (bYdy)
 799     {
 800         c1  = val[n];
 801         sig = val[n+1];
 802         fprintf(out, "Using two columns as y and sigma values\n");
 803     }
 804     else
 805     {
 806         snew(sig, ny);
 807     }
 808     if (opt2parg_bSet("-beginfit", npargs, ppa))
 809     {
 810         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 811     }
 812     else
 813     {
 814         tbeginfit = x0[0];
 815     }
 816     if (opt2parg_bSet("-endfit", npargs, ppa))
 817     {
 818         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 819     }
 820     else
 821     {
 822         tendfit   = x0[ny-1];
 823     }
 824
 825     snew(fitparm, nparm);
 826     switch (efitfn)
 827     {
 828         case effnEXP1:
 829             fitparm[0] = 0.5;
 830             break;
 831         case effnEXP2:
 832             fitparm[0] = 0.5;
 833             fitparm[1] = c1[0];
 834             break;
 835         case effnEXP3:
 836             fitparm[0] = 1.0;
 837             fitparm[1] = 0.5*c1[0];
 838             fitparm[2] = 10.0;
 839             break;
 840         case effnEXP5:
 841             fitparm[0] = fitparm[2] = 0.5*c1[0];
 842             fitparm[1] = 10;
 843             fitparm[3] = 40;
 844             fitparm[4] = 0;
 845             break;
 846         case effnEXP7:
 847             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 848             fitparm[1] = 1;
 849             fitparm[3] = 10;
 850             fitparm[5] = 100;
 851             fitparm[6] = 0;
 852             break;
 853         case effnEXP9:
 854             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 855             fitparm[1] = 0.1;
 856             fitparm[3] = 1;
 857             fitparm[5] = 10;
 858             fitparm[7] = 100;
 859             fitparm[8] = 0;
 860             break;
 861         default:
 862             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 863             for (i = 0; (i < nparm); i++)
 864             {
 865                 fitparm[i] = 1;
 866             }
 867     }
 868     fprintf(out, "Starting parameters:\n");
 869     for (i = 0; (i < nparm); i++)
 870     {
 871         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 872     }
 873     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 874                  oenv, bDebugMode(), efitfn, fitparm, 0))
 875     {
 876         for (i = 0; (i < nparm); i++)
 877         {
 878             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 879         }
 880     }
 881     else
 882     {
 883         fprintf(out, "No solution was found\n");
 884     }
 885 }
 886
 887 static void do_ballistic(const char *balFile, int nData,
 888                          real *t, real **val, int nSet,
 889                          real balTime, int nBalExp,
 890                          gmx_bool bDerivative,
 891                          const output_env_t oenv)
 892 {
 893     double     **ctd   = NULL, *td = NULL;
 894     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
 895     static char *leg[] = {"Ac'(t)"};
 896     FILE        *fp;
 897     int          i, set;
 898
 899     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 900     {
 901         snew(ctd, nSet);
 902         snew(td,  nData);
 903
 904         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 905         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 906
 907         for (set = 0; set < nSet; set++)
 908         {
 909             snew(ctd[set], nData);
 910             for (i = 0; i < nData; i++)
 911             {
 912                 ctd[set][i] = (double)val[set][i];
 913                 if (set == 0)
 914                 {
 915                     td[i] = (double)t[i];
 916                 }
 917             }
 918
 919             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 920         }
 921
 922         for (i = 0; i < nData; i++)
 923         {
 924             fprintf(fp, "  %g", t[i]);
 925             for (set = 0; set < nSet; set++)
 926             {
 927                 fprintf(fp, "  %g", ctd[set][i]);
 928             }
 929             fprintf(fp, "\n");
 930         }
 931
 932
 933         for (set = 0; set < nSet; set++)
 934         {
 935             sfree(ctd[set]);
 936         }
 937         sfree(ctd);
 938         sfree(td);
 939     }
 940     else
 941     {
 942         printf("Number of data points is less than the number of parameters to fit\n."
 943                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 944     }
 945 }
 946
 947 static void do_geminate(const char *gemFile, int nData,
 948                         real *t, real **val, int nSet,
 949                         const real D, const real rcut, const real balTime,
 950                         const int nFitPoints, const real begFit, const real endFit,
 951                         const output_env_t oenv)
 952 {
 953     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 954     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 955                                       begFit, endFit, balTime, 1, FALSE);
 956     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 957     FILE        *fp;
 958     int          i, set;
 959
 960     snew(ctd,    nSet);
 961     snew(ctdGem, nSet);
 962     snew(td,  nData);
 963
 964     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 965     xvgr_legend(fp, asize(leg), leg, oenv);
 966
 967     for (set = 0; set < nSet; set++)
 968     {
 969         snew(ctd[set],    nData);
 970         snew(ctdGem[set], nData);
 971         for (i = 0; i < nData; i++)
 972         {
 973             ctd[set][i] = (double)val[set][i];
 974             if (set == 0)
 975             {
 976                 td[i] = (double)t[i];
 977             }
 978         }
 979         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 980     }
 981
 982     for (i = 0; i < nData; i++)
 983     {
 984         fprintf(fp, "  %g", t[i]);
 985         for (set = 0; set < nSet; set++)
 986         {
 987             fprintf(fp, "  %g", ctdGem[set][i]);
 988         }
 989         fprintf(fp, "\n");
 990     }
 991
 992     for (set = 0; set < nSet; set++)
 993     {
 994         sfree(ctd[set]);
 995         sfree(ctdGem[set]);
 996     }
 997     sfree(ctd);
 998     sfree(ctdGem);
 999     sfree(td);
1000 }
1001
1002 int gmx_analyze(int argc, char *argv[])
1003 {
1004     static const char *desc[] = {
1005         "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
1006         "A line in the input file may start with a time",
1007         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1008         "Multiple sets can also be",
1009         "read when they are separated by & (option [TT]-n[tt]);",
1010         "in this case only one [IT]y[it]-value is read from each line.",
1011         "All lines starting with # and @ are skipped.",
1012         "All analyses can also be done for the derivative of a set",
1013         "(option [TT]-d[tt]).[PAR]",
1014
1015         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1016         "points are equidistant in time.[PAR]",
1017
1018         "[TT]g_analyze[tt] always shows the average and standard deviation of each",
1019         "set, as well as the relative deviation of the third",
1020         "and fourth cumulant from those of a Gaussian distribution with the same",
1021         "standard deviation.[PAR]",
1022
1023         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1024         "Be sure that the time interval between data points is",
1025         "much shorter than the time scale of the autocorrelation.[PAR]",
1026
1027         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1028         "i/2 periods. The formula is:[BR]"
1029         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1030         "This is useful for principal components obtained from covariance",
1031         "analysis, since the principal components of random diffusion are",
1032         "pure cosines.[PAR]",
1033
1034         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1035
1036         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1037
1038         "Option [TT]-av[tt] produces the average over the sets.",
1039         "Error bars can be added with the option [TT]-errbar[tt].",
1040         "The errorbars can represent the standard deviation, the error",
1041         "(assuming the points are independent) or the interval containing",
1042         "90% of the points, by discarding 5% of the points at the top and",
1043         "the bottom.[PAR]",
1044
1045         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1046         "A set is divided in a number of blocks and averages are calculated for",
1047         "each block. The error for the total average is calculated from",
1048         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1049         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1050         "These errors are plotted as a function of the block size.",
1051         "Also an analytical block average curve is plotted, assuming",
1052         "that the autocorrelation is a sum of two exponentials.",
1053         "The analytical curve for the block average is:[BR]",
1054         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1055         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1056         "where T is the total time.",
1057         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1058         "When the actual block average is very close to the analytical curve,",
1059         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1060         "The complete derivation is given in",
1061         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1062
1063         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1064         "component from a hydrogen bond autocorrelation function by the fitting",
1065         "of a sum of exponentials, as described in e.g.",
1066         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1067         "is the one with the most negative coefficient in the exponential,",
1068         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1069         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1070
1071         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1072         "(and optionally kD) to the hydrogen bond autocorrelation function",
1073         "according to the reversible geminate recombination model. Removal of",
1074         "the ballistic component first is strongly advised. The model is presented in",
1075         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1076
1077         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1078         "of each set and over all sets with respect to a filtered average.",
1079         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1080         "to len/2. len is supplied with the option [TT]-filter[tt].",
1081         "This filter reduces oscillations with period len/2 and len by a factor",
1082         "of 0.79 and 0.33 respectively.[PAR]",
1083
1084         "Option [TT]-g[tt] fits the data to the function given with option",
1085         "[TT]-fitfn[tt].[PAR]",
1086
1087         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1088         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1089         "zero or with a negative value are ignored.[PAR]"
1090
1091         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1092         "on output from [TT]g_hbond[tt]. The input file can be taken directly",
1093         "from [TT]g_hbond -ac[tt], and then the same result should be produced."
1094     };
1095     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1096     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1097     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1098     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1099     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1100     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1101
1102     /* must correspond to enum avbar* declared at beginning of file */
1103     static const char *avbar_opt[avbarNR+1] = {
1104         NULL, "none", "stddev", "error", "90", NULL
1105     };
1106
1107     t_pargs            pa[] = {
1108         { "-time",    FALSE, etBOOL, {&bHaveT},
1109           "Expect a time in the input" },
1110         { "-b",       FALSE, etREAL, {&tb},
1111           "First time to read from set" },
1112         { "-e",       FALSE, etREAL, {&te},
1113           "Last time to read from set" },
1114         { "-n",       FALSE, etINT, {&nsets_in},
1115           "Read this number of sets separated by &" },
1116         { "-d",       FALSE, etBOOL, {&bDer},
1117           "Use the derivative" },
1118         { "-dp",      FALSE, etINT, {&d},
1119           "HIDDENThe derivative is the difference over this number of points" },
1120         { "-bw",      FALSE, etREAL, {&binwidth},
1121           "Binwidth for the distribution" },
1122         { "-errbar",  FALSE, etENUM, {avbar_opt},
1123           "Error bars for [TT]-av[tt]" },
1124         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1125           "Integrate data function(s) numerically using trapezium rule" },
1126         { "-aver_start", FALSE, etREAL, {&aver_start},
1127           "Start averaging the integral from here" },
1128         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1129           "Interpret second data set as error in the y values for integrating" },
1130         { "-regression", FALSE, etBOOL, {&bRegression},
1131           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1132         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1133           "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1134         { "-temp",    FALSE, etREAL, {&temp},
1135           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1136         { "-fitstart", FALSE, etREAL, {&fit_start},
1137           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1138         { "-fitend", FALSE, etREAL, {&fit_end},
1139           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1140         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1141           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1142         { "-nbmin",   FALSE, etINT, {&nb_min},
1143           "HIDDENMinimum number of blocks for block averaging" },
1144         { "-resol", FALSE, etINT, {&resol},
1145           "HIDDENResolution for the block averaging, block size increases with"
1146           " a factor 2^(1/resol)" },
1147         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1148           "HIDDENAlways use a single exponential fit for the error estimate" },
1149         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1150           "HIDDENAllow a negative long-time correlation" },
1151         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1152           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1153         { "-filter",  FALSE, etREAL, {&filtlen},
1154           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1155         { "-power", FALSE, etBOOL, {&bPower},
1156           "Fit data to: b t^a" },
1157         { "-subav", FALSE, etBOOL, {&bSubAv},
1158           "Subtract the average before autocorrelating" },
1159         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1160           "Calculate one ACF over all sets" },
1161         { "-nbalexp", FALSE, etINT, {&nBalExp},
1162           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1163         { "-baltime", FALSE, etREAL, {&balTime},
1164           "HIDDENTime up to which the ballistic component will be fitted" },
1165 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1166 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1167 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1168 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1169 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1170 /*       "What type of gminate recombination to use"}, */
1171 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1172 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1173     };
1174 #define NPA asize(pa)
1175
1176     FILE           *out, *out_fit;
1177     int             n, nlast, s, nset, i, j = 0;
1178     real          **val, *t, dt, tot, error;
1179     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1180     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1181     output_env_t    oenv;
1182
1183     t_filenm        fnm[] = {
1184         { efXVG, "-f",    "graph",    ffREAD   },
1185         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1186         { efXVG, "-msd",  "msd",      ffOPTWR  },
1187         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1188         { efXVG, "-dist", "distr",    ffOPTWR  },
1189         { efXVG, "-av",   "average",  ffOPTWR  },
1190         { efXVG, "-ee",   "errest",   ffOPTWR  },
1191         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1192 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1193         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1194     };
1195 #define NFILE asize(fnm)
1196
1197     int      npargs;
1198     t_pargs *ppa;
1199
1200     npargs = asize(pa);
1201     ppa    = add_acf_pargs(&npargs, pa);
1202
1203     CopyRight(stderr, argv[0]);
1204     parse_common_args(&argc, argv, PCA_CAN_VIEW,
1205                       NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
1206
1207     acfile   = opt2fn_null("-ac", NFILE, fnm);
1208     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1209     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1210     distfile = opt2fn_null("-dist", NFILE, fnm);
1211     avfile   = opt2fn_null("-av", NFILE, fnm);
1212     eefile   = opt2fn_null("-ee", NFILE, fnm);
1213     balfile  = opt2fn_null("-bal", NFILE, fnm);
1214 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1215     /* When doing autocorrelation we don't want a fitlog for fitting
1216      * the function itself (not the acf) when the user did not ask for it.
1217      */
1218     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1219     {
1220         fitfile  = opt2fn("-g", NFILE, fnm);
1221     }
1222     else
1223     {
1224         fitfile  = opt2fn_null("-g", NFILE, fnm);
1225     }
1226
1227     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1228                         opt2parg_bSet("-b", npargs, ppa), tb,
1229                         opt2parg_bSet("-e", npargs, ppa), te,
1230                         nsets_in, &nset, &n, &dt, &t);
1231     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1232
1233     if (bDer)
1234     {
1235         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1236                d, d);
1237         n -= d;
1238         for (s = 0; s < nset; s++)
1239         {
1240             for (i = 0; (i < n); i++)
1241             {
1242                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1243             }
1244         }
1245     }
1246
1247     if (bIntegrate)
1248     {
1249         real sum, stddev;
1250
1251         printf("Calculating the integral using the trapezium rule\n");
1252
1253         if (bXYdy)
1254         {
1255             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1256             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1257         }
1258         else
1259         {
1260             for (s = 0; s < nset; s++)
1261             {
1262                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1263                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1264             }
1265         }
1266     }
1267
1268     if (fitfile != NULL)
1269     {
1270         out_fit = ffopen(fitfile, "w");
1271         if (bXYdy && nset >= 2)
1272         {
1273             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1274         }
1275         else
1276         {
1277             for (s = 0; s < nset; s++)
1278             {
1279                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1280             }
1281         }
1282         ffclose(out_fit);
1283     }
1284
1285     printf("                                      std. dev.    relative deviation of\n");
1286     printf("                       standard       ---------   cumulants from those of\n");
1287     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1288     printf("                                                      cum. 3   cum. 4\n");
1289     snew(av, nset);
1290     snew(sig, nset);
1291     for (s = 0; (s < nset); s++)
1292     {
1293         cum1 = 0;
1294         cum2 = 0;
1295         cum3 = 0;
1296         cum4 = 0;
1297         for (i = 0; (i < n); i++)
1298         {
1299             cum1 += val[s][i];
1300         }
1301         cum1 /= n;
1302         for (i = 0; (i < n); i++)
1303         {
1304             db    = val[s][i]-cum1;
1305             cum2 += db*db;
1306             cum3 += db*db*db;
1307             cum4 += db*db*db*db;
1308         }
1309         cum2  /= n;
1310         cum3  /= n;
1311         cum4  /= n;
1312         av[s]  = cum1;
1313         sig[s] = sqrt(cum2);
1314         if (n > 1)
1315         {
1316             error = sqrt(cum2/(n-1));
1317         }
1318         else
1319         {
1320             error = 0;
1321         }
1322         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1323                s+1, av[s], sig[s], error,
1324                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1325                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1326     }
1327     printf("\n");
1328
1329     if (filtlen)
1330     {
1331         filter(filtlen, n, nset, val, dt, oenv);
1332     }
1333
1334     if (msdfile)
1335     {
1336         out = xvgropen(msdfile, "Mean square displacement",
1337                        "time", "MSD (nm\\S2\\N)", oenv);
1338         nlast = (int)(n*frac);
1339         for (s = 0; s < nset; s++)
1340         {
1341             for (j = 0; j <= nlast; j++)
1342             {
1343                 if (j % 100 == 0)
1344                 {
1345                     fprintf(stderr, "\r%d", j);
1346                 }
1347                 tot = 0;
1348                 for (i = 0; i < n-j; i++)
1349                 {
1350                     tot += sqr(val[s][i]-val[s][i+j]);
1351                 }
1352                 tot /= (real)(n-j);
1353                 fprintf(out, " %g %8g\n", dt*j, tot);
1354             }
1355             if (s < nset-1)
1356             {
1357                 fprintf(out, "&\n");
1358             }
1359         }
1360         ffclose(out);
1361         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1362     }
1363     if (ccfile)
1364     {
1365         plot_coscont(ccfile, n, nset, val, oenv);
1366     }
1367
1368     if (distfile)
1369     {
1370         histogram(distfile, binwidth, n, nset, val, oenv);
1371     }
1372     if (avfile)
1373     {
1374         average(avfile, nenum(avbar_opt), n, nset, val, t);
1375     }
1376     if (eefile)
1377     {
1378         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1379                        bEeFitAc, bEESEF, bEENLC, oenv);
1380     }
1381     if (balfile)
1382     {
1383         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, bDer, oenv);
1384     }
1385 /*   if (gemfile) */
1386 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1387 /*                   nFitPoints, fit_start, fit_end, oenv); */
1388     if (bPower)
1389     {
1390         power_fit(n, nset, val, t);
1391     }
1392
1393     if (acfile != NULL)
1394     {
1395         if (bSubAv)
1396         {
1397             for (s = 0; s < nset; s++)
1398             {
1399                 for (i = 0; i < n; i++)
1400                 {
1401                     val[s][i] -= av[s];
1402                 }
1403             }
1404         }
1405         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1406                     eacNormal, bAverCorr);
1407     }
1408
1409     if (bRegression)
1410     {
1411         regression_analysis(n, bXYdy, t, nset, val);
1412     }
1413
1414     if (bLuzar)
1415     {
1416         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1417     }
1418
1419     view_all(oenv, NFILE, fnm);
1420
1421     thanx(stderr);
1422
1423     return 0;
1424 }