src/gromacs/gmxana/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40 #include <math.h>
  41 #include <string.h>
  42 #include "gromacs/commandline/pargs.h"
  43 #include "sysstuff.h"
  44 #include "typedefs.h"
  45 #include "smalloc.h"
  46 #include "macros.h"
  47 #include "gmx_fatal.h"
  48 #include "vec.h"
  49 #include "copyrite.h"
  50 #include "gromacs/fileio/futil.h"
  51 #include "readinp.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gmx_statistics.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57 #include "geminate.h"
  58
  59 #include "gromacs/linearalgebra/matrix.h"
  60
  61 /* must correspond to char *avbar_opt[] declared in main() */
  62 enum {
  63     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  64 };
  65
  66 static void power_fit(int n, int nset, real **val, real *t)
  67 {
  68     real *x, *y, quality, a, b, r;
  69     int   s, i;
  70
  71     snew(x, n);
  72     snew(y, n);
  73
  74     if (t[0] > 0)
  75     {
  76         for (i = 0; i < n; i++)
  77         {
  78             if (t[0] > 0)
  79             {
  80                 x[i] = log(t[i]);
  81             }
  82         }
  83     }
  84     else
  85     {
  86         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  87         for (i = 0; i < n; i++)
  88         {
  89             x[i] = log(i+1);
  90         }
  91     }
  92
  93     for (s = 0; s < nset; s++)
  94     {
  95         i = 0;
  96         for (i = 0; i < n && val[s][i] >= 0; i++)
  97         {
  98             y[i] = log(val[s][i]);
  99         }
 100         if (i < n)
 101         {
 102             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 103         }
 104         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 105         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 106                 s+1, quality, a, exp(b));
 107     }
 108
 109     sfree(y);
 110     sfree(x);
 111 }
 112
 113 static real cosine_content(int nhp, int n, real *y)
 114 /* Assumes n equidistant points */
 115 {
 116     double fac, cosyint, yyint;
 117     int    i;
 118
 119     if (n < 2)
 120     {
 121         return 0;
 122     }
 123
 124     fac = M_PI*nhp/(n-1);
 125
 126     cosyint = 0;
 127     yyint   = 0;
 128     for (i = 0; i < n; i++)
 129     {
 130         cosyint += cos(fac*i)*y[i];
 131         yyint   += y[i]*y[i];
 132     }
 133
 134     return 2*cosyint*cosyint/(n*yyint);
 135 }
 136
 137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 138                          const output_env_t oenv)
 139 {
 140     FILE *fp;
 141     int   s;
 142     real  cc;
 143
 144     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 145                   oenv);
 146
 147     for (s = 0; s < nset; s++)
 148     {
 149         cc = cosine_content(s+1, n, val[s]);
 150         fprintf(fp, " %d %g\n", s+1, cc);
 151         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 152                 s+1, 0.5*(s+1), cc);
 153     }
 154     fprintf(stdout, "\n");
 155
 156     ffclose(fp);
 157 }
 158
 159 static void regression_analysis(int n, gmx_bool bXYdy,
 160                                 real *x, int nset, real **val)
 161 {
 162     real S, chi2, a, b, da, db, r = 0;
 163     int  ok;
 164
 165     if (bXYdy || (nset == 1))
 166     {
 167         printf("Fitting data to a function f(x) = ax + b\n");
 168         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 169         printf("Error estimates will be given if w_i (sigma) values are given\n");
 170         printf("(use option -xydy).\n\n");
 171         if (bXYdy)
 172         {
 173             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 174             {
 175                 gmx_fatal(FARGS, "Error fitting the data: %s",
 176                           gmx_stats_message(ok));
 177             }
 178         }
 179         else
 180         {
 181             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 182             {
 183                 gmx_fatal(FARGS, "Error fitting the data: %s",
 184                           gmx_stats_message(ok));
 185             }
 186         }
 187         chi2 = sqr((n-2)*S);
 188         printf("Chi2                    = %g\n", chi2);
 189         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 190         printf("Correlation coefficient = %.1f%%\n", 100*r);
 191         printf("\n");
 192         if (bXYdy)
 193         {
 194             printf("a    = %g +/- %g\n", a, da);
 195             printf("b    = %g +/- %g\n", b, db);
 196         }
 197         else
 198         {
 199             printf("a    = %g\n", a);
 200             printf("b    = %g\n", b);
 201         }
 202     }
 203     else
 204     {
 205         double chi2, *a, **xx, *y;
 206         int    i, j;
 207
 208         snew(y, n);
 209         snew(xx, nset-1);
 210         for (j = 0; (j < nset-1); j++)
 211         {
 212             snew(xx[j], n);
 213         }
 214         for (i = 0; (i < n); i++)
 215         {
 216             y[i] = val[0][i];
 217             for (j = 1; (j < nset); j++)
 218             {
 219                 xx[j-1][i] = val[j][i];
 220             }
 221         }
 222         snew(a, nset-1);
 223         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 224         printf("Fitting %d data points in %d sets\n", n, nset-1);
 225         printf("chi2 = %g\n", chi2);
 226         printf("A =");
 227         for (i = 0; (i < nset-1); i++)
 228         {
 229             printf("  %g", a[i]);
 230             sfree(xx[i]);
 231         }
 232         printf("\n");
 233         sfree(xx);
 234         sfree(y);
 235         sfree(a);
 236     }
 237 }
 238
 239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 240                const output_env_t oenv)
 241 {
 242     FILE          *fp;
 243     int            i, s;
 244     double         min, max;
 245     int            nbin;
 246 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 247     long long int *histo;
 248 #else
 249     double        *histo;
 250 #endif
 251
 252     min = val[0][0];
 253     max = val[0][0];
 254     for (s = 0; s < nset; s++)
 255     {
 256         for (i = 0; i < n; i++)
 257         {
 258             if (val[s][i] < min)
 259             {
 260                 min = val[s][i];
 261             }
 262             else if (val[s][i] > max)
 263             {
 264                 max = val[s][i];
 265             }
 266         }
 267     }
 268
 269     min = binwidth*floor(min/binwidth);
 270     max = binwidth*ceil(max/binwidth);
 271     if (min != 0)
 272     {
 273         min -= binwidth;
 274     }
 275     max += binwidth;
 276
 277     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 278     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 279     snew(histo, nbin);
 280     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 281     for (s = 0; s < nset; s++)
 282     {
 283         for (i = 0; i < nbin; i++)
 284         {
 285             histo[i] = 0;
 286         }
 287         for (i = 0; i < n; i++)
 288         {
 289             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 290         }
 291         for (i = 0; i < nbin; i++)
 292         {
 293             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 294         }
 295         if (s < nset-1)
 296         {
 297             fprintf(fp, "&\n");
 298         }
 299     }
 300     ffclose(fp);
 301 }
 302
 303 static int real_comp(const void *a, const void *b)
 304 {
 305     real dif = *(real *)a - *(real *)b;
 306
 307     if (dif < 0)
 308     {
 309         return -1;
 310     }
 311     else if (dif > 0)
 312     {
 313         return 1;
 314     }
 315     else
 316     {
 317         return 0;
 318     }
 319 }
 320
 321 static void average(const char *avfile, int avbar_opt,
 322                     int n, int nset, real **val, real *t)
 323 {
 324     FILE   *fp;
 325     int     i, s, edge = 0;
 326     double  av, var, err;
 327     real   *tmp = NULL;
 328
 329     fp = ffopen(avfile, "w");
 330     if ((avbar_opt == avbarERROR) && (nset == 1))
 331     {
 332         avbar_opt = avbarNONE;
 333     }
 334     if (avbar_opt != avbarNONE)
 335     {
 336         if (avbar_opt == avbar90)
 337         {
 338             snew(tmp, nset);
 339             fprintf(fp, "@TYPE xydydy\n");
 340             edge = (int)(nset*0.05+0.5);
 341             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 342                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 343         }
 344         else
 345         {
 346             fprintf(fp, "@TYPE xydy\n");
 347         }
 348     }
 349
 350     for (i = 0; i < n; i++)
 351     {
 352         av = 0;
 353         for (s = 0; s < nset; s++)
 354         {
 355             av += val[s][i];
 356         }
 357         av /= nset;
 358         fprintf(fp, " %g %g", t[i], av);
 359         var = 0;
 360         if (avbar_opt != avbarNONE)
 361         {
 362             if (avbar_opt == avbar90)
 363             {
 364                 for (s = 0; s < nset; s++)
 365                 {
 366                     tmp[s] = val[s][i];
 367                 }
 368                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 369                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 370             }
 371             else
 372             {
 373                 for (s = 0; s < nset; s++)
 374                 {
 375                     var += sqr(val[s][i]-av);
 376                 }
 377                 if (avbar_opt == avbarSTDDEV)
 378                 {
 379                     err = sqrt(var/nset);
 380                 }
 381                 else
 382                 {
 383                     err = sqrt(var/(nset*(nset-1)));
 384                 }
 385                 fprintf(fp, " %g", err);
 386             }
 387         }
 388         fprintf(fp, "\n");
 389     }
 390     ffclose(fp);
 391
 392     if (avbar_opt == avbar90)
 393     {
 394         sfree(tmp);
 395     }
 396 }
 397
 398 static real anal_ee_inf(real *parm, real T)
 399 {
 400     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 401 }
 402
 403 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 404                            int nset, double *av, double *sig, real **val, real dt,
 405                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 406                            const output_env_t oenv)
 407 {
 408     FILE    *fp;
 409     int      bs, prev_bs, nbs, nb;
 410     real     spacing, nbr;
 411     int      s, i, j;
 412     double   blav, var;
 413     char   **leg;
 414     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 415     real     fitparm[4];
 416     real     ee, a, tau1, tau2;
 417
 418     if (n < 4)
 419     {
 420         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 421
 422         return;
 423     }
 424
 425     fp = xvgropen(eefile, "Error estimates",
 426                   "Block size (time)", "Error estimate", oenv);
 427     if (output_env_get_print_xvgr_codes(oenv))
 428     {
 429         fprintf(fp,
 430                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 431                 (n-1)*dt, n);
 432     }
 433     snew(leg, 2*nset);
 434     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 435     sfree(leg);
 436
 437     spacing = pow(2, 1.0/resol);
 438     snew(tbs, n);
 439     snew(ybs, n);
 440     snew(fitsig, n);
 441     for (s = 0; s < nset; s++)
 442     {
 443         nbs     = 0;
 444         prev_bs = 0;
 445         nbr     = nb_min;
 446         while (nbr <= n)
 447         {
 448             bs = n/(int)nbr;
 449             if (bs != prev_bs)
 450             {
 451                 nb  = n/bs;
 452                 var = 0;
 453                 for (i = 0; i < nb; i++)
 454                 {
 455                     blav = 0;
 456                     for (j = 0; j < bs; j++)
 457                     {
 458                         blav += val[s][bs*i+j];
 459                     }
 460                     var += sqr(av[s] - blav/bs);
 461                 }
 462                 tbs[nbs] = bs*dt;
 463                 if (sig[s] == 0)
 464                 {
 465                     ybs[nbs] = 0;
 466                 }
 467                 else
 468                 {
 469                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 470                 }
 471                 nbs++;
 472             }
 473             nbr    *= spacing;
 474             nb      = (int)(nbr+0.5);
 475             prev_bs = bs;
 476         }
 477         if (sig[s] == 0)
 478         {
 479             ee   = 0;
 480             a    = 1;
 481             tau1 = 0;
 482             tau2 = 0;
 483         }
 484         else
 485         {
 486             for (i = 0; i < nbs/2; i++)
 487             {
 488                 rtmp         = tbs[i];
 489                 tbs[i]       = tbs[nbs-1-i];
 490                 tbs[nbs-1-i] = rtmp;
 491                 rtmp         = ybs[i];
 492                 ybs[i]       = ybs[nbs-1-i];
 493                 ybs[nbs-1-i] = rtmp;
 494             }
 495             /* The initial slope of the normalized ybs^2 is 1.
 496              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 497              * From this we take our initial guess for tau1.
 498              */
 499             twooe = 2/exp(1);
 500             i     = -1;
 501             do
 502             {
 503                 i++;
 504                 tau1_est = tbs[i];
 505             }
 506             while (i < nbs - 1 &&
 507                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 508
 509             if (ybs[0] > ybs[1])
 510             {
 511                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 512                         "the std. error using single points is larger than that of blocks of 2 points\n"
 513                         "The error estimate might be inaccurate, check the fit\n",
 514                         s+1);
 515                 /* Use the total time as tau for the fitting weights */
 516                 tau_sig = (n - 1)*dt;
 517             }
 518             else
 519             {
 520                 tau_sig = tau1_est;
 521             }
 522
 523             if (debug)
 524             {
 525                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 526             }
 527
 528             /* Generate more or less appropriate sigma's,
 529              * also taking the density of points into account.
 530              */
 531             for (i = 0; i < nbs; i++)
 532             {
 533                 if (i == 0)
 534                 {
 535                     dens = tbs[1]/tbs[0] - 1;
 536                 }
 537                 else if (i == nbs-1)
 538                 {
 539                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 540                 }
 541                 else
 542                 {
 543                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 544                 }
 545                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 546             }
 547
 548             if (!bSingleExpFit)
 549             {
 550                 fitparm[0] = tau1_est;
 551                 fitparm[1] = 0.95;
 552                 /* We set the initial guess for tau2
 553                  * to halfway between tau1_est and the total time (on log scale).
 554                  */
 555                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 556                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 557                          bDebugMode(), effnERREST, fitparm, 0);
 558                 fitparm[3] = 1-fitparm[1];
 559             }
 560             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 561                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 562             {
 563                 if (!bSingleExpFit)
 564                 {
 565                     if (fitparm[2] > (n-1)*dt)
 566                     {
 567                         fprintf(stdout,
 568                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 569                                 "         the statistics might be bad\n",
 570                                 (n-1)*dt);
 571                     }
 572                     else
 573                     {
 574                         fprintf(stdout, "a fitted parameter is negative\n");
 575                     }
 576                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 577                             sig[s]*anal_ee_inf(fitparm, n*dt),
 578                             fitparm[1], fitparm[0], fitparm[2]);
 579                     /* Do a fit with tau2 fixed at the total time.
 580                      * One could also choose any other large value for tau2.
 581                      */
 582                     fitparm[0] = tau1_est;
 583                     fitparm[1] = 0.95;
 584                     fitparm[2] = (n-1)*dt;
 585                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 586                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 587                              effnERREST, fitparm, 4);
 588                     fitparm[3] = 1-fitparm[1];
 589                 }
 590                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 591                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 592                 {
 593                     if (!bSingleExpFit)
 594                     {
 595                         fprintf(stdout, "a fitted parameter is negative\n");
 596                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 597                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 598                                 fitparm[1], fitparm[0], fitparm[2]);
 599                     }
 600                     /* Do a single exponential fit */
 601                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 602                     fitparm[0] = tau1_est;
 603                     fitparm[1] = 1.0;
 604                     fitparm[2] = 0.0;
 605                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 606                              effnERREST, fitparm, 6);
 607                     fitparm[3] = 1-fitparm[1];
 608                 }
 609             }
 610             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 611             a    = fitparm[1];
 612             tau1 = fitparm[0];
 613             tau2 = fitparm[2];
 614         }
 615         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 616                 s+1, ee, a, tau1, tau2);
 617         fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 618         fprintf(fp, "@ legend string %d \"ee %6g\"\n",
 619                 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 620         for (i = 0; i < nbs; i++)
 621         {
 622             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 623                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 624         }
 625
 626         if (bFitAc)
 627         {
 628             int   fitlen;
 629             real *ac, acint, ac_fit[4];
 630
 631             snew(ac, n);
 632             for (i = 0; i < n; i++)
 633             {
 634                 ac[i] = val[s][i] - av[s];
 635                 if (i > 0)
 636                 {
 637                     fitsig[i] = sqrt(i);
 638                 }
 639                 else
 640                 {
 641                     fitsig[i] = 1;
 642                 }
 643             }
 644             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 645                             dt, eacNormal, 1, FALSE, TRUE,
 646                             FALSE, 0, 0, effnNONE);
 647
 648             fitlen = n/nb_min;
 649
 650             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 651             acint = 0.5*ac[0];
 652             for (i = 1; i <= fitlen/2; i++)
 653             {
 654                 acint += ac[i];
 655             }
 656             acint *= dt;
 657
 658             /* Generate more or less appropriate sigma's */
 659             for (i = 0; i <= fitlen; i++)
 660             {
 661                 fitsig[i] = sqrt(acint + dt*i);
 662             }
 663
 664             ac_fit[0] = 0.5*acint;
 665             ac_fit[1] = 0.95;
 666             ac_fit[2] = 10*acint;
 667             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 668                      bDebugMode(), effnEXP3, ac_fit, 0);
 669             ac_fit[3] = 1 - ac_fit[1];
 670
 671             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 672                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 673                     ac_fit[1], ac_fit[0], ac_fit[2]);
 674
 675             fprintf(fp, "&\n");
 676             for (i = 0; i < nbs; i++)
 677             {
 678                 fprintf(fp, "%g %g\n", tbs[i],
 679                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 680             }
 681
 682             sfree(ac);
 683         }
 684         if (s < nset-1)
 685         {
 686             fprintf(fp, "&\n");
 687         }
 688     }
 689     sfree(fitsig);
 690     sfree(ybs);
 691     sfree(tbs);
 692     ffclose(fp);
 693 }
 694
 695 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 696                          gmx_bool bError, real fit_start, real smooth_tail_start,
 697                          const output_env_t oenv)
 698 {
 699     const real tol = 1e-8;
 700     real      *kt;
 701     real       weight, d2;
 702     int        j;
 703
 704     please_cite(stdout, "Spoel2006b");
 705
 706     /* Compute negative derivative k(t) = -dc(t)/dt */
 707     if (!bError)
 708     {
 709         snew(kt, nn);
 710         compute_derivative(nn, time, val[0], kt);
 711         for (j = 0; (j < nn); j++)
 712         {
 713             kt[j] = -kt[j];
 714         }
 715         if (debug)
 716         {
 717             d2 = 0;
 718             for (j = 0; (j < nn); j++)
 719             {
 720                 d2 += sqr(kt[j] - val[3][j]);
 721             }
 722             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 723         }
 724         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 725                      temp, smooth_tail_start, oenv);
 726         sfree(kt);
 727     }
 728     else if (nset == 6)
 729     {
 730         analyse_corr(nn, time, val[0], val[2], val[4],
 731                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 732     }
 733     else
 734     {
 735         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 736         printf("Not doing anything. Sorry.\n");
 737     }
 738 }
 739
 740 static void filter(real flen, int n, int nset, real **val, real dt)
 741 {
 742     int     f, s, i, j;
 743     double *filt, sum, vf, fluc, fluctot;
 744
 745     f = (int)(flen/(2*dt));
 746     snew(filt, f+1);
 747     filt[0] = 1;
 748     sum     = 1;
 749     for (i = 1; i <= f; i++)
 750     {
 751         filt[i] = cos(M_PI*dt*i/flen);
 752         sum    += 2*filt[i];
 753     }
 754     for (i = 0; i <= f; i++)
 755     {
 756         filt[i] /= sum;
 757     }
 758     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 759     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 760     fluctot = 0;
 761     for (s = 0; s < nset; s++)
 762     {
 763         fluc = 0;
 764         for (i = f; i < n-f; i++)
 765         {
 766             vf = filt[0]*val[s][i];
 767             for (j = 1; j <= f; j++)
 768             {
 769                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 770             }
 771             fluc += sqr(val[s][i] - vf);
 772         }
 773         fluc    /= n - 2*f;
 774         fluctot += fluc;
 775         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 776     }
 777     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 778     fprintf(stdout, "\n");
 779
 780     sfree(filt);
 781 }
 782
 783 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 784                    int ny, real *x0, real **val,
 785                    int npargs, t_pargs *ppa, const output_env_t oenv)
 786 {
 787     real *c1 = NULL, *sig = NULL, *fitparm;
 788     real  tendfit, tbeginfit;
 789     int   i, efitfn, nparm;
 790
 791     efitfn = get_acffitfn();
 792     nparm  = nfp_ffn[efitfn];
 793     fprintf(out, "Will fit to the following function:\n");
 794     fprintf(out, "%s\n", longs_ffn[efitfn]);
 795     c1 = val[n];
 796     if (bYdy)
 797     {
 798         c1  = val[n];
 799         sig = val[n+1];
 800         fprintf(out, "Using two columns as y and sigma values\n");
 801     }
 802     else
 803     {
 804         snew(sig, ny);
 805     }
 806     if (opt2parg_bSet("-beginfit", npargs, ppa))
 807     {
 808         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 809     }
 810     else
 811     {
 812         tbeginfit = x0[0];
 813     }
 814     if (opt2parg_bSet("-endfit", npargs, ppa))
 815     {
 816         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 817     }
 818     else
 819     {
 820         tendfit   = x0[ny-1];
 821     }
 822
 823     snew(fitparm, nparm);
 824     switch (efitfn)
 825     {
 826         case effnEXP1:
 827             fitparm[0] = 0.5;
 828             break;
 829         case effnEXP2:
 830             fitparm[0] = 0.5;
 831             fitparm[1] = c1[0];
 832             break;
 833         case effnEXP3:
 834             fitparm[0] = 1.0;
 835             fitparm[1] = 0.5*c1[0];
 836             fitparm[2] = 10.0;
 837             break;
 838         case effnEXP5:
 839             fitparm[0] = fitparm[2] = 0.5*c1[0];
 840             fitparm[1] = 10;
 841             fitparm[3] = 40;
 842             fitparm[4] = 0;
 843             break;
 844         case effnEXP7:
 845             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 846             fitparm[1] = 1;
 847             fitparm[3] = 10;
 848             fitparm[5] = 100;
 849             fitparm[6] = 0;
 850             break;
 851         case effnEXP9:
 852             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 853             fitparm[1] = 0.1;
 854             fitparm[3] = 1;
 855             fitparm[5] = 10;
 856             fitparm[7] = 100;
 857             fitparm[8] = 0;
 858             break;
 859         default:
 860             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 861             for (i = 0; (i < nparm); i++)
 862             {
 863                 fitparm[i] = 1;
 864             }
 865     }
 866     fprintf(out, "Starting parameters:\n");
 867     for (i = 0; (i < nparm); i++)
 868     {
 869         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 870     }
 871     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 872                  oenv, bDebugMode(), efitfn, fitparm, 0))
 873     {
 874         for (i = 0; (i < nparm); i++)
 875         {
 876             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 877         }
 878     }
 879     else
 880     {
 881         fprintf(out, "No solution was found\n");
 882     }
 883 }
 884
 885 static void do_ballistic(const char *balFile, int nData,
 886                          real *t, real **val, int nSet,
 887                          real balTime, int nBalExp,
 888                          const output_env_t oenv)
 889 {
 890     double     **ctd   = NULL, *td = NULL;
 891     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 892     static char *leg[] = {"Ac'(t)"};
 893     FILE        *fp;
 894     int          i, set;
 895
 896     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 897     {
 898         snew(ctd, nSet);
 899         snew(td,  nData);
 900
 901         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 902         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 903
 904         for (set = 0; set < nSet; set++)
 905         {
 906             snew(ctd[set], nData);
 907             for (i = 0; i < nData; i++)
 908             {
 909                 ctd[set][i] = (double)val[set][i];
 910                 if (set == 0)
 911                 {
 912                     td[i] = (double)t[i];
 913                 }
 914             }
 915
 916             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 917         }
 918
 919         for (i = 0; i < nData; i++)
 920         {
 921             fprintf(fp, "  %g", t[i]);
 922             for (set = 0; set < nSet; set++)
 923             {
 924                 fprintf(fp, "  %g", ctd[set][i]);
 925             }
 926             fprintf(fp, "\n");
 927         }
 928
 929
 930         for (set = 0; set < nSet; set++)
 931         {
 932             sfree(ctd[set]);
 933         }
 934         sfree(ctd);
 935         sfree(td);
 936     }
 937     else
 938     {
 939         printf("Number of data points is less than the number of parameters to fit\n."
 940                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 941     }
 942 }
 943
 944 static void do_geminate(const char *gemFile, int nData,
 945                         real *t, real **val, int nSet,
 946                         const real D, const real rcut, const real balTime,
 947                         const int nFitPoints, const real begFit, const real endFit,
 948                         const output_env_t oenv)
 949 {
 950     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 951     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 952                                       begFit, endFit, balTime, 1);
 953     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 954     FILE        *fp;
 955     int          i, set;
 956
 957     snew(ctd,    nSet);
 958     snew(ctdGem, nSet);
 959     snew(td,  nData);
 960
 961     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 962     xvgr_legend(fp, asize(leg), leg, oenv);
 963
 964     for (set = 0; set < nSet; set++)
 965     {
 966         snew(ctd[set],    nData);
 967         snew(ctdGem[set], nData);
 968         for (i = 0; i < nData; i++)
 969         {
 970             ctd[set][i] = (double)val[set][i];
 971             if (set == 0)
 972             {
 973                 td[i] = (double)t[i];
 974             }
 975         }
 976         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 977     }
 978
 979     for (i = 0; i < nData; i++)
 980     {
 981         fprintf(fp, "  %g", t[i]);
 982         for (set = 0; set < nSet; set++)
 983         {
 984             fprintf(fp, "  %g", ctdGem[set][i]);
 985         }
 986         fprintf(fp, "\n");
 987     }
 988
 989     for (set = 0; set < nSet; set++)
 990     {
 991         sfree(ctd[set]);
 992         sfree(ctdGem[set]);
 993     }
 994     sfree(ctd);
 995     sfree(ctdGem);
 996     sfree(td);
 997 }
 998
 999 int gmx_analyze(int argc, char *argv[])
1000 {
1001     static const char *desc[] = {
1002         "[THISMODULE] reads an ASCII file and analyzes data sets.",
1003         "A line in the input file may start with a time",
1004         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1005         "Multiple sets can also be",
1006         "read when they are separated by & (option [TT]-n[tt]);",
1007         "in this case only one [IT]y[it]-value is read from each line.",
1008         "All lines starting with # and @ are skipped.",
1009         "All analyses can also be done for the derivative of a set",
1010         "(option [TT]-d[tt]).[PAR]",
1011
1012         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1013         "points are equidistant in time.[PAR]",
1014
1015         "[THISMODULE] always shows the average and standard deviation of each",
1016         "set, as well as the relative deviation of the third",
1017         "and fourth cumulant from those of a Gaussian distribution with the same",
1018         "standard deviation.[PAR]",
1019
1020         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1021         "Be sure that the time interval between data points is",
1022         "much shorter than the time scale of the autocorrelation.[PAR]",
1023
1024         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1025         "i/2 periods. The formula is:[BR]"
1026         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1027         "This is useful for principal components obtained from covariance",
1028         "analysis, since the principal components of random diffusion are",
1029         "pure cosines.[PAR]",
1030
1031         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1032
1033         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1034
1035         "Option [TT]-av[tt] produces the average over the sets.",
1036         "Error bars can be added with the option [TT]-errbar[tt].",
1037         "The errorbars can represent the standard deviation, the error",
1038         "(assuming the points are independent) or the interval containing",
1039         "90% of the points, by discarding 5% of the points at the top and",
1040         "the bottom.[PAR]",
1041
1042         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1043         "A set is divided in a number of blocks and averages are calculated for",
1044         "each block. The error for the total average is calculated from",
1045         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1046         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1047         "These errors are plotted as a function of the block size.",
1048         "Also an analytical block average curve is plotted, assuming",
1049         "that the autocorrelation is a sum of two exponentials.",
1050         "The analytical curve for the block average is:[BR]",
1051         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1052         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1053         "where T is the total time.",
1054         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1055         "When the actual block average is very close to the analytical curve,",
1056         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1057         "The complete derivation is given in",
1058         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1059
1060         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1061         "component from a hydrogen bond autocorrelation function by the fitting",
1062         "of a sum of exponentials, as described in e.g.",
1063         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1064         "is the one with the most negative coefficient in the exponential,",
1065         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1066         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1067
1068         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1069         "(and optionally kD) to the hydrogen bond autocorrelation function",
1070         "according to the reversible geminate recombination model. Removal of",
1071         "the ballistic component first is strongly advised. The model is presented in",
1072         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1073
1074         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1075         "of each set and over all sets with respect to a filtered average.",
1076         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1077         "to len/2. len is supplied with the option [TT]-filter[tt].",
1078         "This filter reduces oscillations with period len/2 and len by a factor",
1079         "of 0.79 and 0.33 respectively.[PAR]",
1080
1081         "Option [TT]-g[tt] fits the data to the function given with option",
1082         "[TT]-fitfn[tt].[PAR]",
1083
1084         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1085         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1086         "zero or with a negative value are ignored.[PAR]"
1087
1088         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1089         "on output from [gmx-hbond]. The input file can be taken directly",
1090         "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1091     };
1092     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1093     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1094     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1095     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1096     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1097     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1098
1099     /* must correspond to enum avbar* declared at beginning of file */
1100     static const char *avbar_opt[avbarNR+1] = {
1101         NULL, "none", "stddev", "error", "90", NULL
1102     };
1103
1104     t_pargs            pa[] = {
1105         { "-time",    FALSE, etBOOL, {&bHaveT},
1106           "Expect a time in the input" },
1107         { "-b",       FALSE, etREAL, {&tb},
1108           "First time to read from set" },
1109         { "-e",       FALSE, etREAL, {&te},
1110           "Last time to read from set" },
1111         { "-n",       FALSE, etINT, {&nsets_in},
1112           "Read this number of sets separated by &" },
1113         { "-d",       FALSE, etBOOL, {&bDer},
1114           "Use the derivative" },
1115         { "-dp",      FALSE, etINT, {&d},
1116           "HIDDENThe derivative is the difference over this number of points" },
1117         { "-bw",      FALSE, etREAL, {&binwidth},
1118           "Binwidth for the distribution" },
1119         { "-errbar",  FALSE, etENUM, {avbar_opt},
1120           "Error bars for [TT]-av[tt]" },
1121         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1122           "Integrate data function(s) numerically using trapezium rule" },
1123         { "-aver_start", FALSE, etREAL, {&aver_start},
1124           "Start averaging the integral from here" },
1125         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1126           "Interpret second data set as error in the y values for integrating" },
1127         { "-regression", FALSE, etBOOL, {&bRegression},
1128           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1129         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1130           "Do a Luzar and Chandler analysis on a correlation function and "
1131           "related as produced by [gmx-hbond]. When in addition the "
1132           "[TT]-xydy[tt] flag is given the second and fourth column will be "
1133           "interpreted as errors in c(t) and n(t)." },
1134         { "-temp",    FALSE, etREAL, {&temp},
1135           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1136         { "-fitstart", FALSE, etREAL, {&fit_start},
1137           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1138         { "-fitend", FALSE, etREAL, {&fit_end},
1139           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1140         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1141           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1142         { "-nbmin",   FALSE, etINT, {&nb_min},
1143           "HIDDENMinimum number of blocks for block averaging" },
1144         { "-resol", FALSE, etINT, {&resol},
1145           "HIDDENResolution for the block averaging, block size increases with"
1146           " a factor 2^(1/resol)" },
1147         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1148           "HIDDENAlways use a single exponential fit for the error estimate" },
1149         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1150           "HIDDENAllow a negative long-time correlation" },
1151         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1152           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1153         { "-filter",  FALSE, etREAL, {&filtlen},
1154           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1155         { "-power", FALSE, etBOOL, {&bPower},
1156           "Fit data to: b t^a" },
1157         { "-subav", FALSE, etBOOL, {&bSubAv},
1158           "Subtract the average before autocorrelating" },
1159         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1160           "Calculate one ACF over all sets" },
1161         { "-nbalexp", FALSE, etINT, {&nBalExp},
1162           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1163         { "-baltime", FALSE, etREAL, {&balTime},
1164           "HIDDENTime up to which the ballistic component will be fitted" },
1165 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1166 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1167 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1168 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1169 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1170 /*       "What type of gminate recombination to use"}, */
1171 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1172 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1173     };
1174 #define NPA asize(pa)
1175
1176     FILE           *out, *out_fit;
1177     int             n, nlast, s, nset, i, j = 0;
1178     real          **val, *t, dt, tot, error;
1179     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1180     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1181     output_env_t    oenv;
1182
1183     t_filenm        fnm[] = {
1184         { efXVG, "-f",    "graph",    ffREAD   },
1185         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1186         { efXVG, "-msd",  "msd",      ffOPTWR  },
1187         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1188         { efXVG, "-dist", "distr",    ffOPTWR  },
1189         { efXVG, "-av",   "average",  ffOPTWR  },
1190         { efXVG, "-ee",   "errest",   ffOPTWR  },
1191         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1192 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1193         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1194     };
1195 #define NFILE asize(fnm)
1196
1197     int      npargs;
1198     t_pargs *ppa;
1199
1200     npargs = asize(pa);
1201     ppa    = add_acf_pargs(&npargs, pa);
1202
1203     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1204                            NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1205     {
1206         return 0;
1207     }
1208
1209     acfile   = opt2fn_null("-ac", NFILE, fnm);
1210     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1211     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1212     distfile = opt2fn_null("-dist", NFILE, fnm);
1213     avfile   = opt2fn_null("-av", NFILE, fnm);
1214     eefile   = opt2fn_null("-ee", NFILE, fnm);
1215     balfile  = opt2fn_null("-bal", NFILE, fnm);
1216 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1217     /* When doing autocorrelation we don't want a fitlog for fitting
1218      * the function itself (not the acf) when the user did not ask for it.
1219      */
1220     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1221     {
1222         fitfile  = opt2fn("-g", NFILE, fnm);
1223     }
1224     else
1225     {
1226         fitfile  = opt2fn_null("-g", NFILE, fnm);
1227     }
1228
1229     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1230                         opt2parg_bSet("-b", npargs, ppa), tb,
1231                         opt2parg_bSet("-e", npargs, ppa), te,
1232                         nsets_in, &nset, &n, &dt, &t);
1233     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1234
1235     if (bDer)
1236     {
1237         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1238                d, d);
1239         n -= d;
1240         for (s = 0; s < nset; s++)
1241         {
1242             for (i = 0; (i < n); i++)
1243             {
1244                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1245             }
1246         }
1247     }
1248
1249     if (bIntegrate)
1250     {
1251         real sum, stddev;
1252
1253         printf("Calculating the integral using the trapezium rule\n");
1254
1255         if (bXYdy)
1256         {
1257             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1258             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1259         }
1260         else
1261         {
1262             for (s = 0; s < nset; s++)
1263             {
1264                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1265                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1266             }
1267         }
1268     }
1269
1270     if (fitfile != NULL)
1271     {
1272         out_fit = ffopen(fitfile, "w");
1273         if (bXYdy && nset >= 2)
1274         {
1275             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1276         }
1277         else
1278         {
1279             for (s = 0; s < nset; s++)
1280             {
1281                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1282             }
1283         }
1284         ffclose(out_fit);
1285     }
1286
1287     printf("                                      std. dev.    relative deviation of\n");
1288     printf("                       standard       ---------   cumulants from those of\n");
1289     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1290     printf("                                                      cum. 3   cum. 4\n");
1291     snew(av, nset);
1292     snew(sig, nset);
1293     for (s = 0; (s < nset); s++)
1294     {
1295         cum1 = 0;
1296         cum2 = 0;
1297         cum3 = 0;
1298         cum4 = 0;
1299         for (i = 0; (i < n); i++)
1300         {
1301             cum1 += val[s][i];
1302         }
1303         cum1 /= n;
1304         for (i = 0; (i < n); i++)
1305         {
1306             db    = val[s][i]-cum1;
1307             cum2 += db*db;
1308             cum3 += db*db*db;
1309             cum4 += db*db*db*db;
1310         }
1311         cum2  /= n;
1312         cum3  /= n;
1313         cum4  /= n;
1314         av[s]  = cum1;
1315         sig[s] = sqrt(cum2);
1316         if (n > 1)
1317         {
1318             error = sqrt(cum2/(n-1));
1319         }
1320         else
1321         {
1322             error = 0;
1323         }
1324         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1325                s+1, av[s], sig[s], error,
1326                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1327                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1328     }
1329     printf("\n");
1330
1331     if (filtlen)
1332     {
1333         filter(filtlen, n, nset, val, dt);
1334     }
1335
1336     if (msdfile)
1337     {
1338         out = xvgropen(msdfile, "Mean square displacement",
1339                        "time", "MSD (nm\\S2\\N)", oenv);
1340         nlast = (int)(n*frac);
1341         for (s = 0; s < nset; s++)
1342         {
1343             for (j = 0; j <= nlast; j++)
1344             {
1345                 if (j % 100 == 0)
1346                 {
1347                     fprintf(stderr, "\r%d", j);
1348                 }
1349                 tot = 0;
1350                 for (i = 0; i < n-j; i++)
1351                 {
1352                     tot += sqr(val[s][i]-val[s][i+j]);
1353                 }
1354                 tot /= (real)(n-j);
1355                 fprintf(out, " %g %8g\n", dt*j, tot);
1356             }
1357             if (s < nset-1)
1358             {
1359                 fprintf(out, "&\n");
1360             }
1361         }
1362         ffclose(out);
1363         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1364     }
1365     if (ccfile)
1366     {
1367         plot_coscont(ccfile, n, nset, val, oenv);
1368     }
1369
1370     if (distfile)
1371     {
1372         histogram(distfile, binwidth, n, nset, val, oenv);
1373     }
1374     if (avfile)
1375     {
1376         average(avfile, nenum(avbar_opt), n, nset, val, t);
1377     }
1378     if (eefile)
1379     {
1380         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1381                        bEeFitAc, bEESEF, bEENLC, oenv);
1382     }
1383     if (balfile)
1384     {
1385         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1386     }
1387 /*   if (gemfile) */
1388 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1389 /*                   nFitPoints, fit_start, fit_end, oenv); */
1390     if (bPower)
1391     {
1392         power_fit(n, nset, val, t);
1393     }
1394
1395     if (acfile != NULL)
1396     {
1397         if (bSubAv)
1398         {
1399             for (s = 0; s < nset; s++)
1400             {
1401                 for (i = 0; i < n; i++)
1402                 {
1403                     val[s][i] -= av[s];
1404                 }
1405             }
1406         }
1407         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1408                     eacNormal, bAverCorr);
1409     }
1410
1411     if (bRegression)
1412     {
1413         regression_analysis(n, bXYdy, t, nset, val);
1414     }
1415
1416     if (bLuzar)
1417     {
1418         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1419     }
1420
1421     view_all(oenv, NFILE, fnm);
1422
1423     return 0;
1424 }