src/tools/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41 #include <math.h>
  42 #include <string.h>
  43 #include "statutil.h"
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "macros.h"
  48 #include "gmx_fatal.h"
  49 #include "vec.h"
  50 #include "copyrite.h"
  51 #include "futil.h"
  52 #include "readinp.h"
  53 #include "statutil.h"
  54 #include "txtdump.h"
  55 #include "gstat.h"
  56 #include "gmx_matrix.h"
  57 #include "gmx_statistics.h"
  58 #include "xvgr.h"
  59 #include "gmx_ana.h"
  60 #include "geminate.h"
  61
  62 /* must correspond to char *avbar_opt[] declared in main() */
  63 enum {
  64     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  65 };
  66
  67 static void power_fit(int n, int nset, real **val, real *t)
  68 {
  69     real *x, *y, quality, a, b, r;
  70     int   s, i;
  71
  72     snew(x, n);
  73     snew(y, n);
  74
  75     if (t[0] > 0)
  76     {
  77         for (i = 0; i < n; i++)
  78         {
  79             if (t[0] > 0)
  80             {
  81                 x[i] = log(t[i]);
  82             }
  83         }
  84     }
  85     else
  86     {
  87         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  88         for (i = 0; i < n; i++)
  89         {
  90             x[i] = log(i+1);
  91         }
  92     }
  93
  94     for (s = 0; s < nset; s++)
  95     {
  96         i = 0;
  97         for (i = 0; i < n && val[s][i] >= 0; i++)
  98         {
  99             y[i] = log(val[s][i]);
 100         }
 101         if (i < n)
 102         {
 103             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 104         }
 105         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 106         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 107                 s+1, quality, a, exp(b));
 108     }
 109
 110     sfree(y);
 111     sfree(x);
 112 }
 113
 114 static real cosine_content(int nhp, int n, real *y)
 115 /* Assumes n equidistant points */
 116 {
 117     double fac, cosyint, yyint;
 118     int    i;
 119
 120     if (n < 2)
 121     {
 122         return 0;
 123     }
 124
 125     fac = M_PI*nhp/(n-1);
 126
 127     cosyint = 0;
 128     yyint   = 0;
 129     for (i = 0; i < n; i++)
 130     {
 131         cosyint += cos(fac*i)*y[i];
 132         yyint   += y[i]*y[i];
 133     }
 134
 135     return 2*cosyint*cosyint/(n*yyint);
 136 }
 137
 138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 139                          const output_env_t oenv)
 140 {
 141     FILE *fp;
 142     int   s;
 143     real  cc;
 144
 145     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 146                   oenv);
 147
 148     for (s = 0; s < nset; s++)
 149     {
 150         cc = cosine_content(s+1, n, val[s]);
 151         fprintf(fp, " %d %g\n", s+1, cc);
 152         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 153                 s+1, 0.5*(s+1), cc);
 154     }
 155     fprintf(stdout, "\n");
 156
 157     ffclose(fp);
 158 }
 159
 160 static void regression_analysis(int n, gmx_bool bXYdy,
 161                                 real *x, int nset, real **val)
 162 {
 163     real S, chi2, a, b, da, db, r = 0;
 164     int  ok;
 165
 166     if (bXYdy || (nset == 1))
 167     {
 168         printf("Fitting data to a function f(x) = ax + b\n");
 169         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 170         printf("Error estimates will be given if w_i (sigma) values are given\n");
 171         printf("(use option -xydy).\n\n");
 172         if (bXYdy)
 173         {
 174             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 175             {
 176                 gmx_fatal(FARGS, "Error fitting the data: %s",
 177                           gmx_stats_message(ok));
 178             }
 179         }
 180         else
 181         {
 182             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 183             {
 184                 gmx_fatal(FARGS, "Error fitting the data: %s",
 185                           gmx_stats_message(ok));
 186             }
 187         }
 188         chi2 = sqr((n-2)*S);
 189         printf("Chi2                    = %g\n", chi2);
 190         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 191         printf("Correlation coefficient = %.1f%%\n", 100*r);
 192         printf("\n");
 193         if (bXYdy)
 194         {
 195             printf("a    = %g +/- %g\n", a, da);
 196             printf("b    = %g +/- %g\n", b, db);
 197         }
 198         else
 199         {
 200             printf("a    = %g\n", a);
 201             printf("b    = %g\n", b);
 202         }
 203     }
 204     else
 205     {
 206         double chi2, *a, **xx, *y;
 207         int    i, j;
 208
 209         snew(y, n);
 210         snew(xx, nset-1);
 211         for (j = 0; (j < nset-1); j++)
 212         {
 213             snew(xx[j], n);
 214         }
 215         for (i = 0; (i < n); i++)
 216         {
 217             y[i] = val[0][i];
 218             for (j = 1; (j < nset); j++)
 219             {
 220                 xx[j-1][i] = val[j][i];
 221             }
 222         }
 223         snew(a, nset-1);
 224         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 225         printf("Fitting %d data points in %d sets\n", n, nset-1);
 226         printf("chi2 = %g\n", chi2);
 227         printf("A =");
 228         for (i = 0; (i < nset-1); i++)
 229         {
 230             printf("  %g", a[i]);
 231             sfree(xx[i]);
 232         }
 233         printf("\n");
 234         sfree(xx);
 235         sfree(y);
 236         sfree(a);
 237     }
 238 }
 239
 240 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 241                const output_env_t oenv)
 242 {
 243     FILE          *fp;
 244     int            i, s;
 245     double         min, max;
 246     int            nbin;
 247 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 248     long long int *histo;
 249 #else
 250     double        *histo;
 251 #endif
 252
 253     min = val[0][0];
 254     max = val[0][0];
 255     for (s = 0; s < nset; s++)
 256     {
 257         for (i = 0; i < n; i++)
 258         {
 259             if (val[s][i] < min)
 260             {
 261                 min = val[s][i];
 262             }
 263             else if (val[s][i] > max)
 264             {
 265                 max = val[s][i];
 266             }
 267         }
 268     }
 269
 270     min = binwidth*floor(min/binwidth);
 271     max = binwidth*ceil(max/binwidth);
 272     if (min != 0)
 273     {
 274         min -= binwidth;
 275     }
 276     max += binwidth;
 277
 278     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 279     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 280     snew(histo, nbin);
 281     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 282     for (s = 0; s < nset; s++)
 283     {
 284         for (i = 0; i < nbin; i++)
 285         {
 286             histo[i] = 0;
 287         }
 288         for (i = 0; i < n; i++)
 289         {
 290             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 291         }
 292         for (i = 0; i < nbin; i++)
 293         {
 294             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 295         }
 296         if (s < nset-1)
 297         {
 298             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 299         }
 300     }
 301     ffclose(fp);
 302 }
 303
 304 static int real_comp(const void *a, const void *b)
 305 {
 306     real dif = *(real *)a - *(real *)b;
 307
 308     if (dif < 0)
 309     {
 310         return -1;
 311     }
 312     else if (dif > 0)
 313     {
 314         return 1;
 315     }
 316     else
 317     {
 318         return 0;
 319     }
 320 }
 321
 322 static void average(const char *avfile, int avbar_opt,
 323                     int n, int nset, real **val, real *t)
 324 {
 325     FILE   *fp;
 326     int     i, s, edge = 0;
 327     double  av, var, err;
 328     real   *tmp = NULL;
 329
 330     fp = ffopen(avfile, "w");
 331     if ((avbar_opt == avbarERROR) && (nset == 1))
 332     {
 333         avbar_opt = avbarNONE;
 334     }
 335     if (avbar_opt != avbarNONE)
 336     {
 337         if (avbar_opt == avbar90)
 338         {
 339             snew(tmp, nset);
 340             fprintf(fp, "@TYPE xydydy\n");
 341             edge = (int)(nset*0.05+0.5);
 342             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 343                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 344         }
 345         else
 346         {
 347             fprintf(fp, "@TYPE xydy\n");
 348         }
 349     }
 350
 351     for (i = 0; i < n; i++)
 352     {
 353         av = 0;
 354         for (s = 0; s < nset; s++)
 355         {
 356             av += val[s][i];
 357         }
 358         av /= nset;
 359         fprintf(fp, " %g %g", t[i], av);
 360         var = 0;
 361         if (avbar_opt != avbarNONE)
 362         {
 363             if (avbar_opt == avbar90)
 364             {
 365                 for (s = 0; s < nset; s++)
 366                 {
 367                     tmp[s] = val[s][i];
 368                 }
 369                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 370                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 371             }
 372             else
 373             {
 374                 for (s = 0; s < nset; s++)
 375                 {
 376                     var += sqr(val[s][i]-av);
 377                 }
 378                 if (avbar_opt == avbarSTDDEV)
 379                 {
 380                     err = sqrt(var/nset);
 381                 }
 382                 else
 383                 {
 384                     err = sqrt(var/(nset*(nset-1)));
 385                 }
 386                 fprintf(fp, " %g", err);
 387             }
 388         }
 389         fprintf(fp, "\n");
 390     }
 391     ffclose(fp);
 392
 393     if (avbar_opt == avbar90)
 394     {
 395         sfree(tmp);
 396     }
 397 }
 398
 399 static real anal_ee_inf(real *parm, real T)
 400 {
 401     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 402 }
 403
 404 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 405                            int nset, double *av, double *sig, real **val, real dt,
 406                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 407                            const output_env_t oenv)
 408 {
 409     FILE    *fp;
 410     int      bs, prev_bs, nbs, nb;
 411     real     spacing, nbr;
 412     int      s, i, j;
 413     double   blav, var;
 414     char   **leg;
 415     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 416     real     fitparm[4];
 417     real     ee, a, tau1, tau2;
 418
 419     if (n < 4)
 420     {
 421         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 422
 423         return;
 424     }
 425
 426     fp = xvgropen(eefile, "Error estimates",
 427                   "Block size (time)", "Error estimate", oenv);
 428     if (output_env_get_print_xvgr_codes(oenv))
 429     {
 430         fprintf(fp,
 431                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 432                 (n-1)*dt, n);
 433     }
 434     snew(leg, 2*nset);
 435     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 436     sfree(leg);
 437
 438     spacing = pow(2, 1.0/resol);
 439     snew(tbs, n);
 440     snew(ybs, n);
 441     snew(fitsig, n);
 442     for (s = 0; s < nset; s++)
 443     {
 444         nbs     = 0;
 445         prev_bs = 0;
 446         nbr     = nb_min;
 447         while (nbr <= n)
 448         {
 449             bs = n/(int)nbr;
 450             if (bs != prev_bs)
 451             {
 452                 nb  = n/bs;
 453                 var = 0;
 454                 for (i = 0; i < nb; i++)
 455                 {
 456                     blav = 0;
 457                     for (j = 0; j < bs; j++)
 458                     {
 459                         blav += val[s][bs*i+j];
 460                     }
 461                     var += sqr(av[s] - blav/bs);
 462                 }
 463                 tbs[nbs] = bs*dt;
 464                 if (sig[s] == 0)
 465                 {
 466                     ybs[nbs] = 0;
 467                 }
 468                 else
 469                 {
 470                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 471                 }
 472                 nbs++;
 473             }
 474             nbr    *= spacing;
 475             nb      = (int)(nbr+0.5);
 476             prev_bs = bs;
 477         }
 478         if (sig[s] == 0)
 479         {
 480             ee   = 0;
 481             a    = 1;
 482             tau1 = 0;
 483             tau2 = 0;
 484         }
 485         else
 486         {
 487             for (i = 0; i < nbs/2; i++)
 488             {
 489                 rtmp         = tbs[i];
 490                 tbs[i]       = tbs[nbs-1-i];
 491                 tbs[nbs-1-i] = rtmp;
 492                 rtmp         = ybs[i];
 493                 ybs[i]       = ybs[nbs-1-i];
 494                 ybs[nbs-1-i] = rtmp;
 495             }
 496             /* The initial slope of the normalized ybs^2 is 1.
 497              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 498              * From this we take our initial guess for tau1.
 499              */
 500             twooe = 2/exp(1);
 501             i     = -1;
 502             do
 503             {
 504                 i++;
 505                 tau1_est = tbs[i];
 506             }
 507             while (i < nbs - 1 &&
 508                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 509
 510             if (ybs[0] > ybs[1])
 511             {
 512                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 513                         "the std. error using single points is larger than that of blocks of 2 points\n"
 514                         "The error estimate might be inaccurate, check the fit\n",
 515                         s+1);
 516                 /* Use the total time as tau for the fitting weights */
 517                 tau_sig = (n - 1)*dt;
 518             }
 519             else
 520             {
 521                 tau_sig = tau1_est;
 522             }
 523
 524             if (debug)
 525             {
 526                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 527             }
 528
 529             /* Generate more or less appropriate sigma's,
 530              * also taking the density of points into account.
 531              */
 532             for (i = 0; i < nbs; i++)
 533             {
 534                 if (i == 0)
 535                 {
 536                     dens = tbs[1]/tbs[0] - 1;
 537                 }
 538                 else if (i == nbs-1)
 539                 {
 540                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 541                 }
 542                 else
 543                 {
 544                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 545                 }
 546                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 547             }
 548
 549             if (!bSingleExpFit)
 550             {
 551                 fitparm[0] = tau1_est;
 552                 fitparm[1] = 0.95;
 553                 /* We set the initial guess for tau2
 554                  * to halfway between tau1_est and the total time (on log scale).
 555                  */
 556                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 557                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 558                          bDebugMode(), effnERREST, fitparm, 0);
 559                 fitparm[3] = 1-fitparm[1];
 560             }
 561             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 562                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 563             {
 564                 if (!bSingleExpFit)
 565                 {
 566                     if (fitparm[2] > (n-1)*dt)
 567                     {
 568                         fprintf(stdout,
 569                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 570                                 "         the statistics might be bad\n",
 571                                 (n-1)*dt);
 572                     }
 573                     else
 574                     {
 575                         fprintf(stdout, "a fitted parameter is negative\n");
 576                     }
 577                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 578                             sig[s]*anal_ee_inf(fitparm, n*dt),
 579                             fitparm[1], fitparm[0], fitparm[2]);
 580                     /* Do a fit with tau2 fixed at the total time.
 581                      * One could also choose any other large value for tau2.
 582                      */
 583                     fitparm[0] = tau1_est;
 584                     fitparm[1] = 0.95;
 585                     fitparm[2] = (n-1)*dt;
 586                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 587                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 588                              effnERREST, fitparm, 4);
 589                     fitparm[3] = 1-fitparm[1];
 590                 }
 591                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 592                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 593                 {
 594                     if (!bSingleExpFit)
 595                     {
 596                         fprintf(stdout, "a fitted parameter is negative\n");
 597                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 598                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 599                                 fitparm[1], fitparm[0], fitparm[2]);
 600                     }
 601                     /* Do a single exponential fit */
 602                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 603                     fitparm[0] = tau1_est;
 604                     fitparm[1] = 1.0;
 605                     fitparm[2] = 0.0;
 606                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 607                              effnERREST, fitparm, 6);
 608                     fitparm[3] = 1-fitparm[1];
 609                 }
 610             }
 611             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 612             a    = fitparm[1];
 613             tau1 = fitparm[0];
 614             tau2 = fitparm[2];
 615         }
 616         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 617                 s+1, ee, a, tau1, tau2);
 618         if(output_env_get_print_xvgr_codes(oenv))
 619         {
 620             fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 621             fprintf(fp, "@ legend string %d \"ee %6g\"\n",2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 622         }
 623         for (i = 0; i < nbs; i++)
 624         {
 625             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 626                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 627         }
 628
 629         if (bFitAc)
 630         {
 631             int   fitlen;
 632             real *ac, acint, ac_fit[4];
 633
 634             snew(ac, n);
 635             for (i = 0; i < n; i++)
 636             {
 637                 ac[i] = val[s][i] - av[s];
 638                 if (i > 0)
 639                 {
 640                     fitsig[i] = sqrt(i);
 641                 }
 642                 else
 643                 {
 644                     fitsig[i] = 1;
 645                 }
 646             }
 647             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 648                             dt, eacNormal, 1, FALSE, TRUE,
 649                             FALSE, 0, 0, effnNONE, 0);
 650
 651             fitlen = n/nb_min;
 652
 653             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 654             acint = 0.5*ac[0];
 655             for (i = 1; i <= fitlen/2; i++)
 656             {
 657                 acint += ac[i];
 658             }
 659             acint *= dt;
 660
 661             /* Generate more or less appropriate sigma's */
 662             for (i = 0; i <= fitlen; i++)
 663             {
 664                 fitsig[i] = sqrt(acint + dt*i);
 665             }
 666
 667             ac_fit[0] = 0.5*acint;
 668             ac_fit[1] = 0.95;
 669             ac_fit[2] = 10*acint;
 670             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 671                      bDebugMode(), effnEXP3, ac_fit, 0);
 672             ac_fit[3] = 1 - ac_fit[1];
 673
 674             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 675                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 676                     ac_fit[1], ac_fit[0], ac_fit[2]);
 677
 678             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 679             for (i = 0; i < nbs; i++)
 680             {
 681                 fprintf(fp, "%g %g\n", tbs[i],
 682                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 683             }
 684
 685             sfree(ac);
 686         }
 687         if (s < nset-1)
 688         {
 689             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 690         }
 691     }
 692     sfree(fitsig);
 693     sfree(ybs);
 694     sfree(tbs);
 695     ffclose(fp);
 696 }
 697
 698 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 699                          gmx_bool bError, real fit_start, real smooth_tail_start,
 700                          const output_env_t oenv)
 701 {
 702     const real tol = 1e-8;
 703     real      *kt;
 704     real       weight, d2;
 705     int        j;
 706
 707     please_cite(stdout, "Spoel2006b");
 708
 709     /* Compute negative derivative k(t) = -dc(t)/dt */
 710     if (!bError)
 711     {
 712         snew(kt, nn);
 713         compute_derivative(nn, time, val[0], kt);
 714         for (j = 0; (j < nn); j++)
 715         {
 716             kt[j] = -kt[j];
 717         }
 718         if (debug)
 719         {
 720             d2 = 0;
 721             for (j = 0; (j < nn); j++)
 722             {
 723                 d2 += sqr(kt[j] - val[3][j]);
 724             }
 725             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 726         }
 727         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 728                      temp, smooth_tail_start, oenv);
 729         sfree(kt);
 730     }
 731     else if (nset == 6)
 732     {
 733         analyse_corr(nn, time, val[0], val[2], val[4],
 734                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 735     }
 736     else
 737     {
 738         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 739         printf("Not doing anything. Sorry.\n");
 740     }
 741 }
 742
 743 static void filter(real flen, int n, int nset, real **val, real dt,
 744                    const output_env_t oenv)
 745 {
 746     int     f, s, i, j;
 747     double *filt, sum, vf, fluc, fluctot;
 748
 749     f = (int)(flen/(2*dt));
 750     snew(filt, f+1);
 751     filt[0] = 1;
 752     sum     = 1;
 753     for (i = 1; i <= f; i++)
 754     {
 755         filt[i] = cos(M_PI*dt*i/flen);
 756         sum    += 2*filt[i];
 757     }
 758     for (i = 0; i <= f; i++)
 759     {
 760         filt[i] /= sum;
 761     }
 762     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 763     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 764     fluctot = 0;
 765     for (s = 0; s < nset; s++)
 766     {
 767         fluc = 0;
 768         for (i = f; i < n-f; i++)
 769         {
 770             vf = filt[0]*val[s][i];
 771             for (j = 1; j <= f; j++)
 772             {
 773                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 774             }
 775             fluc += sqr(val[s][i] - vf);
 776         }
 777         fluc    /= n - 2*f;
 778         fluctot += fluc;
 779         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 780     }
 781     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 782     fprintf(stdout, "\n");
 783
 784     sfree(filt);
 785 }
 786
 787 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 788                    int ny, real *x0, real **val,
 789                    int npargs, t_pargs *ppa, const output_env_t oenv)
 790 {
 791     real *c1 = NULL, *sig = NULL, *fitparm;
 792     real  tendfit, tbeginfit;
 793     int   i, efitfn, nparm;
 794
 795     efitfn = get_acffitfn();
 796     nparm  = nfp_ffn[efitfn];
 797     fprintf(out, "Will fit to the following function:\n");
 798     fprintf(out, "%s\n", longs_ffn[efitfn]);
 799     c1 = val[n];
 800     if (bYdy)
 801     {
 802         c1  = val[n];
 803         sig = val[n+1];
 804         fprintf(out, "Using two columns as y and sigma values\n");
 805     }
 806     else
 807     {
 808         snew(sig, ny);
 809     }
 810     if (opt2parg_bSet("-beginfit", npargs, ppa))
 811     {
 812         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 813     }
 814     else
 815     {
 816         tbeginfit = x0[0];
 817     }
 818     if (opt2parg_bSet("-endfit", npargs, ppa))
 819     {
 820         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 821     }
 822     else
 823     {
 824         tendfit   = x0[ny-1];
 825     }
 826
 827     snew(fitparm, nparm);
 828     switch (efitfn)
 829     {
 830         case effnEXP1:
 831             fitparm[0] = 0.5;
 832             break;
 833         case effnEXP2:
 834             fitparm[0] = 0.5;
 835             fitparm[1] = c1[0];
 836             break;
 837         case effnEXP3:
 838             fitparm[0] = 1.0;
 839             fitparm[1] = 0.5*c1[0];
 840             fitparm[2] = 10.0;
 841             break;
 842         case effnEXP5:
 843             fitparm[0] = fitparm[2] = 0.5*c1[0];
 844             fitparm[1] = 10;
 845             fitparm[3] = 40;
 846             fitparm[4] = 0;
 847             break;
 848         case effnEXP7:
 849             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 850             fitparm[1] = 1;
 851             fitparm[3] = 10;
 852             fitparm[5] = 100;
 853             fitparm[6] = 0;
 854             break;
 855         case effnEXP9:
 856             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 857             fitparm[1] = 0.1;
 858             fitparm[3] = 1;
 859             fitparm[5] = 10;
 860             fitparm[7] = 100;
 861             fitparm[8] = 0;
 862             break;
 863         default:
 864             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 865             for (i = 0; (i < nparm); i++)
 866             {
 867                 fitparm[i] = 1;
 868             }
 869     }
 870     fprintf(out, "Starting parameters:\n");
 871     for (i = 0; (i < nparm); i++)
 872     {
 873         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 874     }
 875     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 876                  oenv, bDebugMode(), efitfn, fitparm, 0))
 877     {
 878         for (i = 0; (i < nparm); i++)
 879         {
 880             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 881         }
 882     }
 883     else
 884     {
 885         fprintf(out, "No solution was found\n");
 886     }
 887 }
 888
 889 static void do_ballistic(const char *balFile, int nData,
 890                          real *t, real **val, int nSet,
 891                          real balTime, int nBalExp,
 892                          gmx_bool bDerivative,
 893                          const output_env_t oenv)
 894 {
 895     double     **ctd   = NULL, *td = NULL;
 896     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
 897     static char *leg[] = {"Ac'(t)"};
 898     FILE        *fp;
 899     int          i, set;
 900
 901     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 902     {
 903         snew(ctd, nSet);
 904         snew(td,  nData);
 905
 906         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 907         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 908
 909         for (set = 0; set < nSet; set++)
 910         {
 911             snew(ctd[set], nData);
 912             for (i = 0; i < nData; i++)
 913             {
 914                 ctd[set][i] = (double)val[set][i];
 915                 if (set == 0)
 916                 {
 917                     td[i] = (double)t[i];
 918                 }
 919             }
 920
 921             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 922         }
 923
 924         for (i = 0; i < nData; i++)
 925         {
 926             fprintf(fp, "  %g", t[i]);
 927             for (set = 0; set < nSet; set++)
 928             {
 929                 fprintf(fp, "  %g", ctd[set][i]);
 930             }
 931             fprintf(fp, "\n");
 932         }
 933
 934
 935         for (set = 0; set < nSet; set++)
 936         {
 937             sfree(ctd[set]);
 938         }
 939         sfree(ctd);
 940         sfree(td);
 941     }
 942     else
 943     {
 944         printf("Number of data points is less than the number of parameters to fit\n."
 945                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 946     }
 947 }
 948
 949 static void do_geminate(const char *gemFile, int nData,
 950                         real *t, real **val, int nSet,
 951                         const real D, const real rcut, const real balTime,
 952                         const int nFitPoints, const real begFit, const real endFit,
 953                         const output_env_t oenv)
 954 {
 955     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 956     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 957                                       begFit, endFit, balTime, 1, FALSE);
 958     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 959     FILE        *fp;
 960     int          i, set;
 961
 962     snew(ctd,    nSet);
 963     snew(ctdGem, nSet);
 964     snew(td,  nData);
 965
 966     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 967     xvgr_legend(fp, asize(leg), leg, oenv);
 968
 969     for (set = 0; set < nSet; set++)
 970     {
 971         snew(ctd[set],    nData);
 972         snew(ctdGem[set], nData);
 973         for (i = 0; i < nData; i++)
 974         {
 975             ctd[set][i] = (double)val[set][i];
 976             if (set == 0)
 977             {
 978                 td[i] = (double)t[i];
 979             }
 980         }
 981         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 982     }
 983
 984     for (i = 0; i < nData; i++)
 985     {
 986         fprintf(fp, "  %g", t[i]);
 987         for (set = 0; set < nSet; set++)
 988         {
 989             fprintf(fp, "  %g", ctdGem[set][i]);
 990         }
 991         fprintf(fp, "\n");
 992     }
 993
 994     for (set = 0; set < nSet; set++)
 995     {
 996         sfree(ctd[set]);
 997         sfree(ctdGem[set]);
 998     }
 999     sfree(ctd);
1000     sfree(ctdGem);
1001     sfree(td);
1002 }
1003
1004 int gmx_analyze(int argc, char *argv[])
1005 {
1006     static const char *desc[] = {
1007         "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
1008         "A line in the input file may start with a time",
1009         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1010         "Multiple sets can also be",
1011         "read when they are separated by & (option [TT]-n[tt]);",
1012         "in this case only one [IT]y[it]-value is read from each line.",
1013         "All lines starting with # and @ are skipped.",
1014         "All analyses can also be done for the derivative of a set",
1015         "(option [TT]-d[tt]).[PAR]",
1016
1017         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1018         "points are equidistant in time.[PAR]",
1019
1020         "[TT]g_analyze[tt] always shows the average and standard deviation of each",
1021         "set, as well as the relative deviation of the third",
1022         "and fourth cumulant from those of a Gaussian distribution with the same",
1023         "standard deviation.[PAR]",
1024
1025         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1026         "Be sure that the time interval between data points is",
1027         "much shorter than the time scale of the autocorrelation.[PAR]",
1028
1029         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1030         "i/2 periods. The formula is:[BR]"
1031         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1032         "This is useful for principal components obtained from covariance",
1033         "analysis, since the principal components of random diffusion are",
1034         "pure cosines.[PAR]",
1035
1036         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1037
1038         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1039
1040         "Option [TT]-av[tt] produces the average over the sets.",
1041         "Error bars can be added with the option [TT]-errbar[tt].",
1042         "The errorbars can represent the standard deviation, the error",
1043         "(assuming the points are independent) or the interval containing",
1044         "90% of the points, by discarding 5% of the points at the top and",
1045         "the bottom.[PAR]",
1046
1047         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1048         "A set is divided in a number of blocks and averages are calculated for",
1049         "each block. The error for the total average is calculated from",
1050         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1051         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1052         "These errors are plotted as a function of the block size.",
1053         "Also an analytical block average curve is plotted, assuming",
1054         "that the autocorrelation is a sum of two exponentials.",
1055         "The analytical curve for the block average is:[BR]",
1056         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1057         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1058         "where T is the total time.",
1059         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1060         "When the actual block average is very close to the analytical curve,",
1061         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1062         "The complete derivation is given in",
1063         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1064
1065         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1066         "component from a hydrogen bond autocorrelation function by the fitting",
1067         "of a sum of exponentials, as described in e.g.",
1068         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1069         "is the one with the most negative coefficient in the exponential,",
1070         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1071         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1072
1073         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1074         "(and optionally kD) to the hydrogen bond autocorrelation function",
1075         "according to the reversible geminate recombination model. Removal of",
1076         "the ballistic component first is strongly advised. The model is presented in",
1077         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1078
1079         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1080         "of each set and over all sets with respect to a filtered average.",
1081         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1082         "to len/2. len is supplied with the option [TT]-filter[tt].",
1083         "This filter reduces oscillations with period len/2 and len by a factor",
1084         "of 0.79 and 0.33 respectively.[PAR]",
1085
1086         "Option [TT]-g[tt] fits the data to the function given with option",
1087         "[TT]-fitfn[tt].[PAR]",
1088
1089         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1090         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1091         "zero or with a negative value are ignored.[PAR]"
1092
1093         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1094         "on output from [TT]g_hbond[tt]. The input file can be taken directly",
1095         "from [TT]g_hbond -ac[tt], and then the same result should be produced."
1096     };
1097     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1098     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1099     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1100     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1101     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1102     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1103
1104     /* must correspond to enum avbar* declared at beginning of file */
1105     static const char *avbar_opt[avbarNR+1] = {
1106         NULL, "none", "stddev", "error", "90", NULL
1107     };
1108
1109     t_pargs            pa[] = {
1110         { "-time",    FALSE, etBOOL, {&bHaveT},
1111           "Expect a time in the input" },
1112         { "-b",       FALSE, etREAL, {&tb},
1113           "First time to read from set" },
1114         { "-e",       FALSE, etREAL, {&te},
1115           "Last time to read from set" },
1116         { "-n",       FALSE, etINT, {&nsets_in},
1117           "Read this number of sets separated by &" },
1118         { "-d",       FALSE, etBOOL, {&bDer},
1119           "Use the derivative" },
1120         { "-dp",      FALSE, etINT, {&d},
1121           "HIDDENThe derivative is the difference over this number of points" },
1122         { "-bw",      FALSE, etREAL, {&binwidth},
1123           "Binwidth for the distribution" },
1124         { "-errbar",  FALSE, etENUM, {avbar_opt},
1125           "Error bars for [TT]-av[tt]" },
1126         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1127           "Integrate data function(s) numerically using trapezium rule" },
1128         { "-aver_start", FALSE, etREAL, {&aver_start},
1129           "Start averaging the integral from here" },
1130         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1131           "Interpret second data set as error in the y values for integrating" },
1132         { "-regression", FALSE, etBOOL, {&bRegression},
1133           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1134         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1135           "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1136         { "-temp",    FALSE, etREAL, {&temp},
1137           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1138         { "-fitstart", FALSE, etREAL, {&fit_start},
1139           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1140         { "-fitend", FALSE, etREAL, {&fit_end},
1141           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1142         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1143           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1144         { "-nbmin",   FALSE, etINT, {&nb_min},
1145           "HIDDENMinimum number of blocks for block averaging" },
1146         { "-resol", FALSE, etINT, {&resol},
1147           "HIDDENResolution for the block averaging, block size increases with"
1148           " a factor 2^(1/resol)" },
1149         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1150           "HIDDENAlways use a single exponential fit for the error estimate" },
1151         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1152           "HIDDENAllow a negative long-time correlation" },
1153         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1154           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1155         { "-filter",  FALSE, etREAL, {&filtlen},
1156           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1157         { "-power", FALSE, etBOOL, {&bPower},
1158           "Fit data to: b t^a" },
1159         { "-subav", FALSE, etBOOL, {&bSubAv},
1160           "Subtract the average before autocorrelating" },
1161         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1162           "Calculate one ACF over all sets" },
1163         { "-nbalexp", FALSE, etINT, {&nBalExp},
1164           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1165         { "-baltime", FALSE, etREAL, {&balTime},
1166           "HIDDENTime up to which the ballistic component will be fitted" },
1167 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1168 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1169 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1170 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1171 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1172 /*       "What type of gminate recombination to use"}, */
1173 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1174 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1175     };
1176 #define NPA asize(pa)
1177
1178     FILE           *out, *out_fit;
1179     int             n, nlast, s, nset, i, j = 0;
1180     real          **val, *t, dt, tot, error;
1181     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1182     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1183     output_env_t    oenv;
1184
1185     t_filenm        fnm[] = {
1186         { efXVG, "-f",    "graph",    ffREAD   },
1187         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1188         { efXVG, "-msd",  "msd",      ffOPTWR  },
1189         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1190         { efXVG, "-dist", "distr",    ffOPTWR  },
1191         { efXVG, "-av",   "average",  ffOPTWR  },
1192         { efXVG, "-ee",   "errest",   ffOPTWR  },
1193         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1194 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1195         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1196     };
1197 #define NFILE asize(fnm)
1198
1199     int      npargs;
1200     t_pargs *ppa;
1201
1202     npargs = asize(pa);
1203     ppa    = add_acf_pargs(&npargs, pa);
1204
1205     CopyRight(stderr, argv[0]);
1206     parse_common_args(&argc, argv, PCA_CAN_VIEW,
1207                       NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
1208
1209     acfile   = opt2fn_null("-ac", NFILE, fnm);
1210     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1211     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1212     distfile = opt2fn_null("-dist", NFILE, fnm);
1213     avfile   = opt2fn_null("-av", NFILE, fnm);
1214     eefile   = opt2fn_null("-ee", NFILE, fnm);
1215     balfile  = opt2fn_null("-bal", NFILE, fnm);
1216 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1217     /* When doing autocorrelation we don't want a fitlog for fitting
1218      * the function itself (not the acf) when the user did not ask for it.
1219      */
1220     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1221     {
1222         fitfile  = opt2fn("-g", NFILE, fnm);
1223     }
1224     else
1225     {
1226         fitfile  = opt2fn_null("-g", NFILE, fnm);
1227     }
1228
1229     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1230                         opt2parg_bSet("-b", npargs, ppa), tb,
1231                         opt2parg_bSet("-e", npargs, ppa), te,
1232                         nsets_in, &nset, &n, &dt, &t);
1233     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1234
1235     if (bDer)
1236     {
1237         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1238                d, d);
1239         n -= d;
1240         for (s = 0; s < nset; s++)
1241         {
1242             for (i = 0; (i < n); i++)
1243             {
1244                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1245             }
1246         }
1247     }
1248
1249     if (bIntegrate)
1250     {
1251         real sum, stddev;
1252
1253         printf("Calculating the integral using the trapezium rule\n");
1254
1255         if (bXYdy)
1256         {
1257             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1258             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1259         }
1260         else
1261         {
1262             for (s = 0; s < nset; s++)
1263             {
1264                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1265                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1266             }
1267         }
1268     }
1269
1270     if (fitfile != NULL)
1271     {
1272         out_fit = ffopen(fitfile, "w");
1273         if (bXYdy && nset >= 2)
1274         {
1275             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1276         }
1277         else
1278         {
1279             for (s = 0; s < nset; s++)
1280             {
1281                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1282             }
1283         }
1284         ffclose(out_fit);
1285     }
1286
1287     printf("                                      std. dev.    relative deviation of\n");
1288     printf("                       standard       ---------   cumulants from those of\n");
1289     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1290     printf("                                                      cum. 3   cum. 4\n");
1291     snew(av, nset);
1292     snew(sig, nset);
1293     for (s = 0; (s < nset); s++)
1294     {
1295         cum1 = 0;
1296         cum2 = 0;
1297         cum3 = 0;
1298         cum4 = 0;
1299         for (i = 0; (i < n); i++)
1300         {
1301             cum1 += val[s][i];
1302         }
1303         cum1 /= n;
1304         for (i = 0; (i < n); i++)
1305         {
1306             db    = val[s][i]-cum1;
1307             cum2 += db*db;
1308             cum3 += db*db*db;
1309             cum4 += db*db*db*db;
1310         }
1311         cum2  /= n;
1312         cum3  /= n;
1313         cum4  /= n;
1314         av[s]  = cum1;
1315         sig[s] = sqrt(cum2);
1316         if (n > 1)
1317         {
1318             error = sqrt(cum2/(n-1));
1319         }
1320         else
1321         {
1322             error = 0;
1323         }
1324         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1325                s+1, av[s], sig[s], error,
1326                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1327                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1328     }
1329     printf("\n");
1330
1331     if (filtlen)
1332     {
1333         filter(filtlen, n, nset, val, dt, oenv);
1334     }
1335
1336     if (msdfile)
1337     {
1338         out = xvgropen(msdfile, "Mean square displacement",
1339                        "time", "MSD (nm\\S2\\N)", oenv);
1340         nlast = (int)(n*frac);
1341         for (s = 0; s < nset; s++)
1342         {
1343             for (j = 0; j <= nlast; j++)
1344             {
1345                 if (j % 100 == 0)
1346                 {
1347                     fprintf(stderr, "\r%d", j);
1348                 }
1349                 tot = 0;
1350                 for (i = 0; i < n-j; i++)
1351                 {
1352                     tot += sqr(val[s][i]-val[s][i+j]);
1353                 }
1354                 tot /= (real)(n-j);
1355                 fprintf(out, " %g %8g\n", dt*j, tot);
1356             }
1357             if (s < nset-1)
1358             {
1359                 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1360             }
1361         }
1362         ffclose(out);
1363         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1364     }
1365     if (ccfile)
1366     {
1367         plot_coscont(ccfile, n, nset, val, oenv);
1368     }
1369
1370     if (distfile)
1371     {
1372         histogram(distfile, binwidth, n, nset, val, oenv);
1373     }
1374     if (avfile)
1375     {
1376         average(avfile, nenum(avbar_opt), n, nset, val, t);
1377     }
1378     if (eefile)
1379     {
1380         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1381                        bEeFitAc, bEESEF, bEENLC, oenv);
1382     }
1383     if (balfile)
1384     {
1385         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, bDer, oenv);
1386     }
1387 /*   if (gemfile) */
1388 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1389 /*                   nFitPoints, fit_start, fit_end, oenv); */
1390     if (bPower)
1391     {
1392         power_fit(n, nset, val, t);
1393     }
1394
1395     if (acfile != NULL)
1396     {
1397         if (bSubAv)
1398         {
1399             for (s = 0; s < nset; s++)
1400             {
1401                 for (i = 0; i < n; i++)
1402                 {
1403                     val[s][i] -= av[s];
1404                 }
1405             }
1406         }
1407         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1408                     eacNormal, bAverCorr);
1409     }
1410
1411     if (bRegression)
1412     {
1413         regression_analysis(n, bXYdy, t, nset, val);
1414     }
1415
1416     if (bLuzar)
1417     {
1418         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1419     }
1420
1421     view_all(oenv, NFILE, fnm);
1422
1423     thanx(stderr);
1424
1425     return 0;
1426 }