src/gromacs/gmxana/gmx_analyze.c

   1 /*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39 #include <math.h>
  40 #include <string.h>
  41 #include "statutil.h"
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "macros.h"
  46 #include "gmx_fatal.h"
  47 #include "vec.h"
  48 #include "copyrite.h"
  49 #include "futil.h"
  50 #include "readinp.h"
  51 #include "statutil.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gmx_statistics.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57 #include "geminate.h"
  58
  59 #include "gromacs/linearalgebra/matrix.h"
  60
  61 /* must correspond to char *avbar_opt[] declared in main() */
  62 enum {
  63     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  64 };
  65
  66 static void power_fit(int n, int nset, real **val, real *t)
  67 {
  68     real *x, *y, quality, a, b, r;
  69     int   s, i;
  70
  71     snew(x, n);
  72     snew(y, n);
  73
  74     if (t[0] > 0)
  75     {
  76         for (i = 0; i < n; i++)
  77         {
  78             if (t[0] > 0)
  79             {
  80                 x[i] = log(t[i]);
  81             }
  82         }
  83     }
  84     else
  85     {
  86         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  87         for (i = 0; i < n; i++)
  88         {
  89             x[i] = log(i+1);
  90         }
  91     }
  92
  93     for (s = 0; s < nset; s++)
  94     {
  95         i = 0;
  96         for (i = 0; i < n && val[s][i] >= 0; i++)
  97         {
  98             y[i] = log(val[s][i]);
  99         }
 100         if (i < n)
 101         {
 102             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 103         }
 104         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 105         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 106                 s+1, quality, a, exp(b));
 107     }
 108
 109     sfree(y);
 110     sfree(x);
 111 }
 112
 113 static real cosine_content(int nhp, int n, real *y)
 114 /* Assumes n equidistant points */
 115 {
 116     double fac, cosyint, yyint;
 117     int    i;
 118
 119     if (n < 2)
 120     {
 121         return 0;
 122     }
 123
 124     fac = M_PI*nhp/(n-1);
 125
 126     cosyint = 0;
 127     yyint   = 0;
 128     for (i = 0; i < n; i++)
 129     {
 130         cosyint += cos(fac*i)*y[i];
 131         yyint   += y[i]*y[i];
 132     }
 133
 134     return 2*cosyint*cosyint/(n*yyint);
 135 }
 136
 137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 138                          const output_env_t oenv)
 139 {
 140     FILE *fp;
 141     int   s;
 142     real  cc;
 143
 144     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 145                   oenv);
 146
 147     for (s = 0; s < nset; s++)
 148     {
 149         cc = cosine_content(s+1, n, val[s]);
 150         fprintf(fp, " %d %g\n", s+1, cc);
 151         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 152                 s+1, 0.5*(s+1), cc);
 153     }
 154     fprintf(stdout, "\n");
 155
 156     ffclose(fp);
 157 }
 158
 159 static void regression_analysis(int n, gmx_bool bXYdy,
 160                                 real *x, int nset, real **val)
 161 {
 162     real S, chi2, a, b, da, db, r = 0;
 163     int  ok;
 164
 165     if (bXYdy || (nset == 1))
 166     {
 167         printf("Fitting data to a function f(x) = ax + b\n");
 168         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 169         printf("Error estimates will be given if w_i (sigma) values are given\n");
 170         printf("(use option -xydy).\n\n");
 171         if (bXYdy)
 172         {
 173             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 174             {
 175                 gmx_fatal(FARGS, "Error fitting the data: %s",
 176                           gmx_stats_message(ok));
 177             }
 178         }
 179         else
 180         {
 181             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 182             {
 183                 gmx_fatal(FARGS, "Error fitting the data: %s",
 184                           gmx_stats_message(ok));
 185             }
 186         }
 187         chi2 = sqr((n-2)*S);
 188         printf("Chi2                    = %g\n", chi2);
 189         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 190         printf("Correlation coefficient = %.1f%%\n", 100*r);
 191         printf("\n");
 192         if (bXYdy)
 193         {
 194             printf("a    = %g +/- %g\n", a, da);
 195             printf("b    = %g +/- %g\n", b, db);
 196         }
 197         else
 198         {
 199             printf("a    = %g\n", a);
 200             printf("b    = %g\n", b);
 201         }
 202     }
 203     else
 204     {
 205         double chi2, *a, **xx, *y;
 206         int    i, j;
 207
 208         snew(y, n);
 209         snew(xx, nset-1);
 210         for (j = 0; (j < nset-1); j++)
 211         {
 212             snew(xx[j], n);
 213         }
 214         for (i = 0; (i < n); i++)
 215         {
 216             y[i] = val[0][i];
 217             for (j = 1; (j < nset); j++)
 218             {
 219                 xx[j-1][i] = val[j][i];
 220             }
 221         }
 222         snew(a, nset-1);
 223         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 224         printf("Fitting %d data points in %d sets\n", n, nset-1);
 225         printf("chi2 = %g\n", chi2);
 226         printf("A =");
 227         for (i = 0; (i < nset-1); i++)
 228         {
 229             printf("  %g", a[i]);
 230             sfree(xx[i]);
 231         }
 232         printf("\n");
 233         sfree(xx);
 234         sfree(y);
 235         sfree(a);
 236     }
 237 }
 238
 239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 240                const output_env_t oenv)
 241 {
 242     FILE          *fp;
 243     int            i, s;
 244     double         min, max;
 245     int            nbin;
 246 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 247     long long int *histo;
 248 #else
 249     double        *histo;
 250 #endif
 251
 252     min = val[0][0];
 253     max = val[0][0];
 254     for (s = 0; s < nset; s++)
 255     {
 256         for (i = 0; i < n; i++)
 257         {
 258             if (val[s][i] < min)
 259             {
 260                 min = val[s][i];
 261             }
 262             else if (val[s][i] > max)
 263             {
 264                 max = val[s][i];
 265             }
 266         }
 267     }
 268
 269     min = binwidth*floor(min/binwidth);
 270     max = binwidth*ceil(max/binwidth);
 271     if (min != 0)
 272     {
 273         min -= binwidth;
 274     }
 275     max += binwidth;
 276
 277     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 278     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 279     snew(histo, nbin);
 280     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 281     for (s = 0; s < nset; s++)
 282     {
 283         for (i = 0; i < nbin; i++)
 284         {
 285             histo[i] = 0;
 286         }
 287         for (i = 0; i < n; i++)
 288         {
 289             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 290         }
 291         for (i = 0; i < nbin; i++)
 292         {
 293             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 294         }
 295         if (s < nset-1)
 296         {
 297             fprintf(fp, "&\n");
 298         }
 299     }
 300     ffclose(fp);
 301 }
 302
 303 static int real_comp(const void *a, const void *b)
 304 {
 305     real dif = *(real *)a - *(real *)b;
 306
 307     if (dif < 0)
 308     {
 309         return -1;
 310     }
 311     else if (dif > 0)
 312     {
 313         return 1;
 314     }
 315     else
 316     {
 317         return 0;
 318     }
 319 }
 320
 321 static void average(const char *avfile, int avbar_opt,
 322                     int n, int nset, real **val, real *t)
 323 {
 324     FILE   *fp;
 325     int     i, s, edge = 0;
 326     double  av, var, err;
 327     real   *tmp = NULL;
 328
 329     fp = ffopen(avfile, "w");
 330     if ((avbar_opt == avbarERROR) && (nset == 1))
 331     {
 332         avbar_opt = avbarNONE;
 333     }
 334     if (avbar_opt != avbarNONE)
 335     {
 336         if (avbar_opt == avbar90)
 337         {
 338             snew(tmp, nset);
 339             fprintf(fp, "@TYPE xydydy\n");
 340             edge = (int)(nset*0.05+0.5);
 341             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 342                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 343         }
 344         else
 345         {
 346             fprintf(fp, "@TYPE xydy\n");
 347         }
 348     }
 349
 350     for (i = 0; i < n; i++)
 351     {
 352         av = 0;
 353         for (s = 0; s < nset; s++)
 354         {
 355             av += val[s][i];
 356         }
 357         av /= nset;
 358         fprintf(fp, " %g %g", t[i], av);
 359         var = 0;
 360         if (avbar_opt != avbarNONE)
 361         {
 362             if (avbar_opt == avbar90)
 363             {
 364                 for (s = 0; s < nset; s++)
 365                 {
 366                     tmp[s] = val[s][i];
 367                 }
 368                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 369                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 370             }
 371             else
 372             {
 373                 for (s = 0; s < nset; s++)
 374                 {
 375                     var += sqr(val[s][i]-av);
 376                 }
 377                 if (avbar_opt == avbarSTDDEV)
 378                 {
 379                     err = sqrt(var/nset);
 380                 }
 381                 else
 382                 {
 383                     err = sqrt(var/(nset*(nset-1)));
 384                 }
 385                 fprintf(fp, " %g", err);
 386             }
 387         }
 388         fprintf(fp, "\n");
 389     }
 390     ffclose(fp);
 391
 392     if (avbar_opt == avbar90)
 393     {
 394         sfree(tmp);
 395     }
 396 }
 397
 398 static real anal_ee_inf(real *parm, real T)
 399 {
 400     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 401 }
 402
 403 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 404                            int nset, double *av, double *sig, real **val, real dt,
 405                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 406                            const output_env_t oenv)
 407 {
 408     FILE    *fp;
 409     int      bs, prev_bs, nbs, nb;
 410     real     spacing, nbr;
 411     int      s, i, j;
 412     double   blav, var;
 413     char   **leg;
 414     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 415     real     fitparm[4];
 416     real     ee, a, tau1, tau2;
 417
 418     if (n < 4)
 419     {
 420         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 421
 422         return;
 423     }
 424
 425     fp = xvgropen(eefile, "Error estimates",
 426                   "Block size (time)", "Error estimate", oenv);
 427     if (output_env_get_print_xvgr_codes(oenv))
 428     {
 429         fprintf(fp,
 430                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 431                 (n-1)*dt, n);
 432     }
 433     snew(leg, 2*nset);
 434     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 435     sfree(leg);
 436
 437     spacing = pow(2, 1.0/resol);
 438     snew(tbs, n);
 439     snew(ybs, n);
 440     snew(fitsig, n);
 441     for (s = 0; s < nset; s++)
 442     {
 443         nbs     = 0;
 444         prev_bs = 0;
 445         nbr     = nb_min;
 446         while (nbr <= n)
 447         {
 448             bs = n/(int)nbr;
 449             if (bs != prev_bs)
 450             {
 451                 nb  = n/bs;
 452                 var = 0;
 453                 for (i = 0; i < nb; i++)
 454                 {
 455                     blav = 0;
 456                     for (j = 0; j < bs; j++)
 457                     {
 458                         blav += val[s][bs*i+j];
 459                     }
 460                     var += sqr(av[s] - blav/bs);
 461                 }
 462                 tbs[nbs] = bs*dt;
 463                 if (sig[s] == 0)
 464                 {
 465                     ybs[nbs] = 0;
 466                 }
 467                 else
 468                 {
 469                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 470                 }
 471                 nbs++;
 472             }
 473             nbr    *= spacing;
 474             nb      = (int)(nbr+0.5);
 475             prev_bs = bs;
 476         }
 477         if (sig[s] == 0)
 478         {
 479             ee   = 0;
 480             a    = 1;
 481             tau1 = 0;
 482             tau2 = 0;
 483         }
 484         else
 485         {
 486             for (i = 0; i < nbs/2; i++)
 487             {
 488                 rtmp         = tbs[i];
 489                 tbs[i]       = tbs[nbs-1-i];
 490                 tbs[nbs-1-i] = rtmp;
 491                 rtmp         = ybs[i];
 492                 ybs[i]       = ybs[nbs-1-i];
 493                 ybs[nbs-1-i] = rtmp;
 494             }
 495             /* The initial slope of the normalized ybs^2 is 1.
 496              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 497              * From this we take our initial guess for tau1.
 498              */
 499             twooe = 2/exp(1);
 500             i     = -1;
 501             do
 502             {
 503                 i++;
 504                 tau1_est = tbs[i];
 505             }
 506             while (i < nbs - 1 &&
 507                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 508
 509             if (ybs[0] > ybs[1])
 510             {
 511                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 512                         "the std. error using single points is larger than that of blocks of 2 points\n"
 513                         "The error estimate might be inaccurate, check the fit\n",
 514                         s+1);
 515                 /* Use the total time as tau for the fitting weights */
 516                 tau_sig = (n - 1)*dt;
 517             }
 518             else
 519             {
 520                 tau_sig = tau1_est;
 521             }
 522
 523             if (debug)
 524             {
 525                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 526             }
 527
 528             /* Generate more or less appropriate sigma's,
 529              * also taking the density of points into account.
 530              */
 531             for (i = 0; i < nbs; i++)
 532             {
 533                 if (i == 0)
 534                 {
 535                     dens = tbs[1]/tbs[0] - 1;
 536                 }
 537                 else if (i == nbs-1)
 538                 {
 539                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 540                 }
 541                 else
 542                 {
 543                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 544                 }
 545                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 546             }
 547
 548             if (!bSingleExpFit)
 549             {
 550                 fitparm[0] = tau1_est;
 551                 fitparm[1] = 0.95;
 552                 /* We set the initial guess for tau2
 553                  * to halfway between tau1_est and the total time (on log scale).
 554                  */
 555                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 556                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 557                          bDebugMode(), effnERREST, fitparm, 0);
 558                 fitparm[3] = 1-fitparm[1];
 559             }
 560             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 561                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 562             {
 563                 if (!bSingleExpFit)
 564                 {
 565                     if (fitparm[2] > (n-1)*dt)
 566                     {
 567                         fprintf(stdout,
 568                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 569                                 "         the statistics might be bad\n",
 570                                 (n-1)*dt);
 571                     }
 572                     else
 573                     {
 574                         fprintf(stdout, "a fitted parameter is negative\n");
 575                     }
 576                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 577                             sig[s]*anal_ee_inf(fitparm, n*dt),
 578                             fitparm[1], fitparm[0], fitparm[2]);
 579                     /* Do a fit with tau2 fixed at the total time.
 580                      * One could also choose any other large value for tau2.
 581                      */
 582                     fitparm[0] = tau1_est;
 583                     fitparm[1] = 0.95;
 584                     fitparm[2] = (n-1)*dt;
 585                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 586                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 587                              effnERREST, fitparm, 4);
 588                     fitparm[3] = 1-fitparm[1];
 589                 }
 590                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 591                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 592                 {
 593                     if (!bSingleExpFit)
 594                     {
 595                         fprintf(stdout, "a fitted parameter is negative\n");
 596                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 597                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 598                                 fitparm[1], fitparm[0], fitparm[2]);
 599                     }
 600                     /* Do a single exponential fit */
 601                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 602                     fitparm[0] = tau1_est;
 603                     fitparm[1] = 1.0;
 604                     fitparm[2] = 0.0;
 605                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 606                              effnERREST, fitparm, 6);
 607                     fitparm[3] = 1-fitparm[1];
 608                 }
 609             }
 610             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 611             a    = fitparm[1];
 612             tau1 = fitparm[0];
 613             tau2 = fitparm[2];
 614         }
 615         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 616                 s+1, ee, a, tau1, tau2);
 617         fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 618         fprintf(fp, "@ legend string %d \"ee %6g\"\n",
 619                 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 620         for (i = 0; i < nbs; i++)
 621         {
 622             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 623                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 624         }
 625
 626         if (bFitAc)
 627         {
 628             int   fitlen;
 629             real *ac, acint, ac_fit[4];
 630
 631             snew(ac, n);
 632             for (i = 0; i < n; i++)
 633             {
 634                 ac[i] = val[s][i] - av[s];
 635                 if (i > 0)
 636                 {
 637                     fitsig[i] = sqrt(i);
 638                 }
 639                 else
 640                 {
 641                     fitsig[i] = 1;
 642                 }
 643             }
 644             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 645                             dt, eacNormal, 1, FALSE, TRUE,
 646                             FALSE, 0, 0, effnNONE);
 647
 648             fitlen = n/nb_min;
 649
 650             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 651             acint = 0.5*ac[0];
 652             for (i = 1; i <= fitlen/2; i++)
 653             {
 654                 acint += ac[i];
 655             }
 656             acint *= dt;
 657
 658             /* Generate more or less appropriate sigma's */
 659             for (i = 0; i <= fitlen; i++)
 660             {
 661                 fitsig[i] = sqrt(acint + dt*i);
 662             }
 663
 664             ac_fit[0] = 0.5*acint;
 665             ac_fit[1] = 0.95;
 666             ac_fit[2] = 10*acint;
 667             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 668                      bDebugMode(), effnEXP3, ac_fit, 0);
 669             ac_fit[3] = 1 - ac_fit[1];
 670
 671             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 672                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 673                     ac_fit[1], ac_fit[0], ac_fit[2]);
 674
 675             fprintf(fp, "&\n");
 676             for (i = 0; i < nbs; i++)
 677             {
 678                 fprintf(fp, "%g %g\n", tbs[i],
 679                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 680             }
 681
 682             sfree(ac);
 683         }
 684         if (s < nset-1)
 685         {
 686             fprintf(fp, "&\n");
 687         }
 688     }
 689     sfree(fitsig);
 690     sfree(ybs);
 691     sfree(tbs);
 692     ffclose(fp);
 693 }
 694
 695 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 696                          gmx_bool bError, real fit_start, real smooth_tail_start,
 697                          const output_env_t oenv)
 698 {
 699     const real tol = 1e-8;
 700     real      *kt;
 701     real       weight, d2;
 702     int        j;
 703
 704     please_cite(stdout, "Spoel2006b");
 705
 706     /* Compute negative derivative k(t) = -dc(t)/dt */
 707     if (!bError)
 708     {
 709         snew(kt, nn);
 710         compute_derivative(nn, time, val[0], kt);
 711         for (j = 0; (j < nn); j++)
 712         {
 713             kt[j] = -kt[j];
 714         }
 715         if (debug)
 716         {
 717             d2 = 0;
 718             for (j = 0; (j < nn); j++)
 719             {
 720                 d2 += sqr(kt[j] - val[3][j]);
 721             }
 722             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 723         }
 724         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 725                      temp, smooth_tail_start, oenv);
 726         sfree(kt);
 727     }
 728     else if (nset == 6)
 729     {
 730         analyse_corr(nn, time, val[0], val[2], val[4],
 731                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 732     }
 733     else
 734     {
 735         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 736         printf("Not doing anything. Sorry.\n");
 737     }
 738 }
 739
 740 static void filter(real flen, int n, int nset, real **val, real dt)
 741 {
 742     int     f, s, i, j;
 743     double *filt, sum, vf, fluc, fluctot;
 744
 745     f = (int)(flen/(2*dt));
 746     snew(filt, f+1);
 747     filt[0] = 1;
 748     sum     = 1;
 749     for (i = 1; i <= f; i++)
 750     {
 751         filt[i] = cos(M_PI*dt*i/flen);
 752         sum    += 2*filt[i];
 753     }
 754     for (i = 0; i <= f; i++)
 755     {
 756         filt[i] /= sum;
 757     }
 758     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 759     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 760     fluctot = 0;
 761     for (s = 0; s < nset; s++)
 762     {
 763         fluc = 0;
 764         for (i = f; i < n-f; i++)
 765         {
 766             vf = filt[0]*val[s][i];
 767             for (j = 1; j <= f; j++)
 768             {
 769                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 770             }
 771             fluc += sqr(val[s][i] - vf);
 772         }
 773         fluc    /= n - 2*f;
 774         fluctot += fluc;
 775         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 776     }
 777     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 778     fprintf(stdout, "\n");
 779
 780     sfree(filt);
 781 }
 782
 783 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 784                    int ny, real *x0, real **val,
 785                    int npargs, t_pargs *ppa, const output_env_t oenv)
 786 {
 787     real *c1 = NULL, *sig = NULL, *fitparm;
 788     real  tendfit, tbeginfit;
 789     int   i, efitfn, nparm;
 790
 791     efitfn = get_acffitfn();
 792     nparm  = nfp_ffn[efitfn];
 793     fprintf(out, "Will fit to the following function:\n");
 794     fprintf(out, "%s\n", longs_ffn[efitfn]);
 795     c1 = val[n];
 796     if (bYdy)
 797     {
 798         c1  = val[n];
 799         sig = val[n+1];
 800         fprintf(out, "Using two columns as y and sigma values\n");
 801     }
 802     else
 803     {
 804         snew(sig, ny);
 805     }
 806     if (opt2parg_bSet("-beginfit", npargs, ppa))
 807     {
 808         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 809     }
 810     else
 811     {
 812         tbeginfit = x0[0];
 813     }
 814     if (opt2parg_bSet("-endfit", npargs, ppa))
 815     {
 816         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 817     }
 818     else
 819     {
 820         tendfit   = x0[ny-1];
 821     }
 822
 823     snew(fitparm, nparm);
 824     switch (efitfn)
 825     {
 826         case effnEXP1:
 827             fitparm[0] = 0.5;
 828             break;
 829         case effnEXP2:
 830             fitparm[0] = 0.5;
 831             fitparm[1] = c1[0];
 832             break;
 833         case effnEXP3:
 834             fitparm[0] = 1.0;
 835             fitparm[1] = 0.5*c1[0];
 836             fitparm[2] = 10.0;
 837             break;
 838         case effnEXP5:
 839             fitparm[0] = fitparm[2] = 0.5*c1[0];
 840             fitparm[1] = 10;
 841             fitparm[3] = 40;
 842             fitparm[4] = 0;
 843             break;
 844         case effnEXP7:
 845             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 846             fitparm[1] = 1;
 847             fitparm[3] = 10;
 848             fitparm[5] = 100;
 849             fitparm[6] = 0;
 850             break;
 851         case effnEXP9:
 852             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 853             fitparm[1] = 0.1;
 854             fitparm[3] = 1;
 855             fitparm[5] = 10;
 856             fitparm[7] = 100;
 857             fitparm[8] = 0;
 858             break;
 859         default:
 860             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 861             for (i = 0; (i < nparm); i++)
 862             {
 863                 fitparm[i] = 1;
 864             }
 865     }
 866     fprintf(out, "Starting parameters:\n");
 867     for (i = 0; (i < nparm); i++)
 868     {
 869         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 870     }
 871     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 872                  oenv, bDebugMode(), efitfn, fitparm, 0))
 873     {
 874         for (i = 0; (i < nparm); i++)
 875         {
 876             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 877         }
 878     }
 879     else
 880     {
 881         fprintf(out, "No solution was found\n");
 882     }
 883 }
 884
 885 static void do_ballistic(const char *balFile, int nData,
 886                          real *t, real **val, int nSet,
 887                          real balTime, int nBalExp,
 888                          const output_env_t oenv)
 889 {
 890     double     **ctd   = NULL, *td = NULL;
 891     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 892     static char *leg[] = {"Ac'(t)"};
 893     FILE        *fp;
 894     int          i, set;
 895
 896     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 897     {
 898         snew(ctd, nSet);
 899         snew(td,  nData);
 900
 901         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 902         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 903
 904         for (set = 0; set < nSet; set++)
 905         {
 906             snew(ctd[set], nData);
 907             for (i = 0; i < nData; i++)
 908             {
 909                 ctd[set][i] = (double)val[set][i];
 910                 if (set == 0)
 911                 {
 912                     td[i] = (double)t[i];
 913                 }
 914             }
 915
 916             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 917         }
 918
 919         for (i = 0; i < nData; i++)
 920         {
 921             fprintf(fp, "  %g", t[i]);
 922             for (set = 0; set < nSet; set++)
 923             {
 924                 fprintf(fp, "  %g", ctd[set][i]);
 925             }
 926             fprintf(fp, "\n");
 927         }
 928
 929
 930         for (set = 0; set < nSet; set++)
 931         {
 932             sfree(ctd[set]);
 933         }
 934         sfree(ctd);
 935         sfree(td);
 936     }
 937     else
 938     {
 939         printf("Number of data points is less than the number of parameters to fit\n."
 940                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 941     }
 942 }
 943
 944 static void do_geminate(const char *gemFile, int nData,
 945                         real *t, real **val, int nSet,
 946                         const real D, const real rcut, const real balTime,
 947                         const int nFitPoints, const real begFit, const real endFit,
 948                         const output_env_t oenv)
 949 {
 950     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 951     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 952                                       begFit, endFit, balTime, 1);
 953     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 954     FILE        *fp;
 955     int          i, set;
 956
 957     snew(ctd,    nSet);
 958     snew(ctdGem, nSet);
 959     snew(td,  nData);
 960
 961     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 962     xvgr_legend(fp, asize(leg), leg, oenv);
 963
 964     for (set = 0; set < nSet; set++)
 965     {
 966         snew(ctd[set],    nData);
 967         snew(ctdGem[set], nData);
 968         for (i = 0; i < nData; i++)
 969         {
 970             ctd[set][i] = (double)val[set][i];
 971             if (set == 0)
 972             {
 973                 td[i] = (double)t[i];
 974             }
 975         }
 976         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 977     }
 978
 979     for (i = 0; i < nData; i++)
 980     {
 981         fprintf(fp, "  %g", t[i]);
 982         for (set = 0; set < nSet; set++)
 983         {
 984             fprintf(fp, "  %g", ctdGem[set][i]);
 985         }
 986         fprintf(fp, "\n");
 987     }
 988
 989     for (set = 0; set < nSet; set++)
 990     {
 991         sfree(ctd[set]);
 992         sfree(ctdGem[set]);
 993     }
 994     sfree(ctd);
 995     sfree(ctdGem);
 996     sfree(td);
 997 }
 998
 999 int gmx_analyze(int argc, char *argv[])
1000 {
1001     static const char *desc[] = {
1002         "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
1003         "A line in the input file may start with a time",
1004         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1005         "Multiple sets can also be",
1006         "read when they are separated by & (option [TT]-n[tt]);",
1007         "in this case only one [IT]y[it]-value is read from each line.",
1008         "All lines starting with # and @ are skipped.",
1009         "All analyses can also be done for the derivative of a set",
1010         "(option [TT]-d[tt]).[PAR]",
1011
1012         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1013         "points are equidistant in time.[PAR]",
1014
1015         "[TT]g_analyze[tt] always shows the average and standard deviation of each",
1016         "set, as well as the relative deviation of the third",
1017         "and fourth cumulant from those of a Gaussian distribution with the same",
1018         "standard deviation.[PAR]",
1019
1020         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1021         "Be sure that the time interval between data points is",
1022         "much shorter than the time scale of the autocorrelation.[PAR]",
1023
1024         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1025         "i/2 periods. The formula is:[BR]"
1026         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1027         "This is useful for principal components obtained from covariance",
1028         "analysis, since the principal components of random diffusion are",
1029         "pure cosines.[PAR]",
1030
1031         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1032
1033         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1034
1035         "Option [TT]-av[tt] produces the average over the sets.",
1036         "Error bars can be added with the option [TT]-errbar[tt].",
1037         "The errorbars can represent the standard deviation, the error",
1038         "(assuming the points are independent) or the interval containing",
1039         "90% of the points, by discarding 5% of the points at the top and",
1040         "the bottom.[PAR]",
1041
1042         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1043         "A set is divided in a number of blocks and averages are calculated for",
1044         "each block. The error for the total average is calculated from",
1045         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1046         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1047         "These errors are plotted as a function of the block size.",
1048         "Also an analytical block average curve is plotted, assuming",
1049         "that the autocorrelation is a sum of two exponentials.",
1050         "The analytical curve for the block average is:[BR]",
1051         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1052         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1053         "where T is the total time.",
1054         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1055         "When the actual block average is very close to the analytical curve,",
1056         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1057         "The complete derivation is given in",
1058         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1059
1060         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1061         "component from a hydrogen bond autocorrelation function by the fitting",
1062         "of a sum of exponentials, as described in e.g.",
1063         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1064         "is the one with the most negative coefficient in the exponential,",
1065         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1066         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1067
1068         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1069         "(and optionally kD) to the hydrogen bond autocorrelation function",
1070         "according to the reversible geminate recombination model. Removal of",
1071         "the ballistic component first is strongly advised. The model is presented in",
1072         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1073
1074         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1075         "of each set and over all sets with respect to a filtered average.",
1076         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1077         "to len/2. len is supplied with the option [TT]-filter[tt].",
1078         "This filter reduces oscillations with period len/2 and len by a factor",
1079         "of 0.79 and 0.33 respectively.[PAR]",
1080
1081         "Option [TT]-g[tt] fits the data to the function given with option",
1082         "[TT]-fitfn[tt].[PAR]",
1083
1084         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1085         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1086         "zero or with a negative value are ignored.[PAR]"
1087
1088         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1089         "on output from [TT]g_hbond[tt]. The input file can be taken directly",
1090         "from [TT]g_hbond -ac[tt], and then the same result should be produced."
1091     };
1092     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1093     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1094     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1095     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1096     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1097     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1098
1099     /* must correspond to enum avbar* declared at beginning of file */
1100     static const char *avbar_opt[avbarNR+1] = {
1101         NULL, "none", "stddev", "error", "90", NULL
1102     };
1103
1104     t_pargs            pa[] = {
1105         { "-time",    FALSE, etBOOL, {&bHaveT},
1106           "Expect a time in the input" },
1107         { "-b",       FALSE, etREAL, {&tb},
1108           "First time to read from set" },
1109         { "-e",       FALSE, etREAL, {&te},
1110           "Last time to read from set" },
1111         { "-n",       FALSE, etINT, {&nsets_in},
1112           "Read this number of sets separated by &" },
1113         { "-d",       FALSE, etBOOL, {&bDer},
1114           "Use the derivative" },
1115         { "-dp",      FALSE, etINT, {&d},
1116           "HIDDENThe derivative is the difference over this number of points" },
1117         { "-bw",      FALSE, etREAL, {&binwidth},
1118           "Binwidth for the distribution" },
1119         { "-errbar",  FALSE, etENUM, {avbar_opt},
1120           "Error bars for [TT]-av[tt]" },
1121         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1122           "Integrate data function(s) numerically using trapezium rule" },
1123         { "-aver_start", FALSE, etREAL, {&aver_start},
1124           "Start averaging the integral from here" },
1125         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1126           "Interpret second data set as error in the y values for integrating" },
1127         { "-regression", FALSE, etBOOL, {&bRegression},
1128           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1129         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1130           "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1131         { "-temp",    FALSE, etREAL, {&temp},
1132           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1133         { "-fitstart", FALSE, etREAL, {&fit_start},
1134           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1135         { "-fitend", FALSE, etREAL, {&fit_end},
1136           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1137         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1138           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1139         { "-nbmin",   FALSE, etINT, {&nb_min},
1140           "HIDDENMinimum number of blocks for block averaging" },
1141         { "-resol", FALSE, etINT, {&resol},
1142           "HIDDENResolution for the block averaging, block size increases with"
1143           " a factor 2^(1/resol)" },
1144         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1145           "HIDDENAlways use a single exponential fit for the error estimate" },
1146         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1147           "HIDDENAllow a negative long-time correlation" },
1148         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1149           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1150         { "-filter",  FALSE, etREAL, {&filtlen},
1151           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1152         { "-power", FALSE, etBOOL, {&bPower},
1153           "Fit data to: b t^a" },
1154         { "-subav", FALSE, etBOOL, {&bSubAv},
1155           "Subtract the average before autocorrelating" },
1156         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1157           "Calculate one ACF over all sets" },
1158         { "-nbalexp", FALSE, etINT, {&nBalExp},
1159           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1160         { "-baltime", FALSE, etREAL, {&balTime},
1161           "HIDDENTime up to which the ballistic component will be fitted" },
1162 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1163 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1164 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1165 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1166 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1167 /*       "What type of gminate recombination to use"}, */
1168 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1169 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1170     };
1171 #define NPA asize(pa)
1172
1173     FILE           *out, *out_fit;
1174     int             n, nlast, s, nset, i, j = 0;
1175     real          **val, *t, dt, tot, error;
1176     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1177     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1178     output_env_t    oenv;
1179
1180     t_filenm        fnm[] = {
1181         { efXVG, "-f",    "graph",    ffREAD   },
1182         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1183         { efXVG, "-msd",  "msd",      ffOPTWR  },
1184         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1185         { efXVG, "-dist", "distr",    ffOPTWR  },
1186         { efXVG, "-av",   "average",  ffOPTWR  },
1187         { efXVG, "-ee",   "errest",   ffOPTWR  },
1188         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1189 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1190         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1191     };
1192 #define NFILE asize(fnm)
1193
1194     int      npargs;
1195     t_pargs *ppa;
1196
1197     npargs = asize(pa);
1198     ppa    = add_acf_pargs(&npargs, pa);
1199
1200     parse_common_args(&argc, argv, PCA_CAN_VIEW,
1201                       NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
1202
1203     acfile   = opt2fn_null("-ac", NFILE, fnm);
1204     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1205     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1206     distfile = opt2fn_null("-dist", NFILE, fnm);
1207     avfile   = opt2fn_null("-av", NFILE, fnm);
1208     eefile   = opt2fn_null("-ee", NFILE, fnm);
1209     balfile  = opt2fn_null("-bal", NFILE, fnm);
1210 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1211     /* When doing autocorrelation we don't want a fitlog for fitting
1212      * the function itself (not the acf) when the user did not ask for it.
1213      */
1214     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1215     {
1216         fitfile  = opt2fn("-g", NFILE, fnm);
1217     }
1218     else
1219     {
1220         fitfile  = opt2fn_null("-g", NFILE, fnm);
1221     }
1222
1223     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1224                         opt2parg_bSet("-b", npargs, ppa), tb,
1225                         opt2parg_bSet("-e", npargs, ppa), te,
1226                         nsets_in, &nset, &n, &dt, &t);
1227     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1228
1229     if (bDer)
1230     {
1231         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1232                d, d);
1233         n -= d;
1234         for (s = 0; s < nset; s++)
1235         {
1236             for (i = 0; (i < n); i++)
1237             {
1238                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1239             }
1240         }
1241     }
1242
1243     if (bIntegrate)
1244     {
1245         real sum, stddev;
1246
1247         printf("Calculating the integral using the trapezium rule\n");
1248
1249         if (bXYdy)
1250         {
1251             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1252             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1253         }
1254         else
1255         {
1256             for (s = 0; s < nset; s++)
1257             {
1258                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1259                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1260             }
1261         }
1262     }
1263
1264     if (fitfile != NULL)
1265     {
1266         out_fit = ffopen(fitfile, "w");
1267         if (bXYdy && nset >= 2)
1268         {
1269             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1270         }
1271         else
1272         {
1273             for (s = 0; s < nset; s++)
1274             {
1275                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1276             }
1277         }
1278         ffclose(out_fit);
1279     }
1280
1281     printf("                                      std. dev.    relative deviation of\n");
1282     printf("                       standard       ---------   cumulants from those of\n");
1283     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1284     printf("                                                      cum. 3   cum. 4\n");
1285     snew(av, nset);
1286     snew(sig, nset);
1287     for (s = 0; (s < nset); s++)
1288     {
1289         cum1 = 0;
1290         cum2 = 0;
1291         cum3 = 0;
1292         cum4 = 0;
1293         for (i = 0; (i < n); i++)
1294         {
1295             cum1 += val[s][i];
1296         }
1297         cum1 /= n;
1298         for (i = 0; (i < n); i++)
1299         {
1300             db    = val[s][i]-cum1;
1301             cum2 += db*db;
1302             cum3 += db*db*db;
1303             cum4 += db*db*db*db;
1304         }
1305         cum2  /= n;
1306         cum3  /= n;
1307         cum4  /= n;
1308         av[s]  = cum1;
1309         sig[s] = sqrt(cum2);
1310         if (n > 1)
1311         {
1312             error = sqrt(cum2/(n-1));
1313         }
1314         else
1315         {
1316             error = 0;
1317         }
1318         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1319                s+1, av[s], sig[s], error,
1320                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1321                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1322     }
1323     printf("\n");
1324
1325     if (filtlen)
1326     {
1327         filter(filtlen, n, nset, val, dt);
1328     }
1329
1330     if (msdfile)
1331     {
1332         out = xvgropen(msdfile, "Mean square displacement",
1333                        "time", "MSD (nm\\S2\\N)", oenv);
1334         nlast = (int)(n*frac);
1335         for (s = 0; s < nset; s++)
1336         {
1337             for (j = 0; j <= nlast; j++)
1338             {
1339                 if (j % 100 == 0)
1340                 {
1341                     fprintf(stderr, "\r%d", j);
1342                 }
1343                 tot = 0;
1344                 for (i = 0; i < n-j; i++)
1345                 {
1346                     tot += sqr(val[s][i]-val[s][i+j]);
1347                 }
1348                 tot /= (real)(n-j);
1349                 fprintf(out, " %g %8g\n", dt*j, tot);
1350             }
1351             if (s < nset-1)
1352             {
1353                 fprintf(out, "&\n");
1354             }
1355         }
1356         ffclose(out);
1357         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1358     }
1359     if (ccfile)
1360     {
1361         plot_coscont(ccfile, n, nset, val, oenv);
1362     }
1363
1364     if (distfile)
1365     {
1366         histogram(distfile, binwidth, n, nset, val, oenv);
1367     }
1368     if (avfile)
1369     {
1370         average(avfile, nenum(avbar_opt), n, nset, val, t);
1371     }
1372     if (eefile)
1373     {
1374         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1375                        bEeFitAc, bEESEF, bEENLC, oenv);
1376     }
1377     if (balfile)
1378     {
1379         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1380     }
1381 /*   if (gemfile) */
1382 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1383 /*                   nFitPoints, fit_start, fit_end, oenv); */
1384     if (bPower)
1385     {
1386         power_fit(n, nset, val, t);
1387     }
1388
1389     if (acfile != NULL)
1390     {
1391         if (bSubAv)
1392         {
1393             for (s = 0; s < nset; s++)
1394             {
1395                 for (i = 0; i < n; i++)
1396                 {
1397                     val[s][i] -= av[s];
1398                 }
1399             }
1400         }
1401         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1402                     eacNormal, bAverCorr);
1403     }
1404
1405     if (bRegression)
1406     {
1407         regression_analysis(n, bXYdy, t, nset, val);
1408     }
1409
1410     if (bLuzar)
1411     {
1412         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1413     }
1414
1415     view_all(oenv, NFILE, fnm);
1416
1417     return 0;
1418 }