src/gromacs/gmxana/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "config.h"
  38
  39 #include <math.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "gromacs/commandline/pargs.h"
  44 #include "typedefs.h"
  45 #include "gromacs/utility/smalloc.h"
  46 #include "macros.h"
  47 #include "gromacs/utility/fatalerror.h"
  48 #include "gromacs/math/vec.h"
  49 #include "copyrite.h"
  50 #include "gromacs/utility/futil.h"
  51 #include "readinp.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gromacs/statistics/statistics.h"
  55 #include "gromacs/fileio/xvgr.h"
  56 #include "viewit.h"
  57 #include "gmx_ana.h"
  58 #include "geminate.h"
  59
  60 #include "gromacs/linearalgebra/matrix.h"
  61
  62 /* must correspond to char *avbar_opt[] declared in main() */
  63 enum {
  64     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  65 };
  66
  67 static void power_fit(int n, int nset, real **val, real *t)
  68 {
  69     real *x, *y, quality, a, b, r;
  70     int   s, i;
  71
  72     snew(x, n);
  73     snew(y, n);
  74
  75     if (t[0] > 0)
  76     {
  77         for (i = 0; i < n; i++)
  78         {
  79             if (t[0] > 0)
  80             {
  81                 x[i] = log(t[i]);
  82             }
  83         }
  84     }
  85     else
  86     {
  87         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  88         for (i = 0; i < n; i++)
  89         {
  90             x[i] = log(i+1);
  91         }
  92     }
  93
  94     for (s = 0; s < nset; s++)
  95     {
  96         i = 0;
  97         for (i = 0; i < n && val[s][i] >= 0; i++)
  98         {
  99             y[i] = log(val[s][i]);
 100         }
 101         if (i < n)
 102         {
 103             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 104         }
 105         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 106         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 107                 s+1, quality, a, exp(b));
 108     }
 109
 110     sfree(y);
 111     sfree(x);
 112 }
 113
 114 static real cosine_content(int nhp, int n, real *y)
 115 /* Assumes n equidistant points */
 116 {
 117     double fac, cosyint, yyint;
 118     int    i;
 119
 120     if (n < 2)
 121     {
 122         return 0;
 123     }
 124
 125     fac = M_PI*nhp/(n-1);
 126
 127     cosyint = 0;
 128     yyint   = 0;
 129     for (i = 0; i < n; i++)
 130     {
 131         cosyint += cos(fac*i)*y[i];
 132         yyint   += y[i]*y[i];
 133     }
 134
 135     return 2*cosyint*cosyint/(n*yyint);
 136 }
 137
 138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 139                          const output_env_t oenv)
 140 {
 141     FILE *fp;
 142     int   s;
 143     real  cc;
 144
 145     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 146                   oenv);
 147
 148     for (s = 0; s < nset; s++)
 149     {
 150         cc = cosine_content(s+1, n, val[s]);
 151         fprintf(fp, " %d %g\n", s+1, cc);
 152         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 153                 s+1, 0.5*(s+1), cc);
 154     }
 155     fprintf(stdout, "\n");
 156
 157     gmx_ffclose(fp);
 158 }
 159
 160 static void regression_analysis(int n, gmx_bool bXYdy,
 161                                 real *x, int nset, real **val)
 162 {
 163     real S, chi2, a, b, da, db, r = 0;
 164     int  ok;
 165
 166     if (bXYdy || (nset == 1))
 167     {
 168         printf("Fitting data to a function f(x) = ax + b\n");
 169         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 170         printf("Error estimates will be given if w_i (sigma) values are given\n");
 171         printf("(use option -xydy).\n\n");
 172         if (bXYdy)
 173         {
 174             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 175             {
 176                 gmx_fatal(FARGS, "Error fitting the data: %s",
 177                           gmx_stats_message(ok));
 178             }
 179         }
 180         else
 181         {
 182             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 183             {
 184                 gmx_fatal(FARGS, "Error fitting the data: %s",
 185                           gmx_stats_message(ok));
 186             }
 187         }
 188         chi2 = sqr((n-2)*S);
 189         printf("Chi2                    = %g\n", chi2);
 190         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 191         printf("Correlation coefficient = %.1f%%\n", 100*r);
 192         printf("\n");
 193         if (bXYdy)
 194         {
 195             printf("a    = %g +/- %g\n", a, da);
 196             printf("b    = %g +/- %g\n", b, db);
 197         }
 198         else
 199         {
 200             printf("a    = %g\n", a);
 201             printf("b    = %g\n", b);
 202         }
 203     }
 204     else
 205     {
 206         double chi2, *a, **xx, *y;
 207         int    i, j;
 208
 209         snew(y, n);
 210         snew(xx, nset-1);
 211         for (j = 0; (j < nset-1); j++)
 212         {
 213             snew(xx[j], n);
 214         }
 215         for (i = 0; (i < n); i++)
 216         {
 217             y[i] = val[0][i];
 218             for (j = 1; (j < nset); j++)
 219             {
 220                 xx[j-1][i] = val[j][i];
 221             }
 222         }
 223         snew(a, nset-1);
 224         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 225         printf("Fitting %d data points in %d sets\n", n, nset-1);
 226         printf("chi2 = %g\n", chi2);
 227         printf("A =");
 228         for (i = 0; (i < nset-1); i++)
 229         {
 230             printf("  %g", a[i]);
 231             sfree(xx[i]);
 232         }
 233         printf("\n");
 234         sfree(xx);
 235         sfree(y);
 236         sfree(a);
 237     }
 238 }
 239
 240 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 241                const output_env_t oenv)
 242 {
 243     FILE          *fp;
 244     int            i, s;
 245     double         min, max;
 246     int            nbin;
 247     gmx_int64_t   *histo;
 248
 249     min = val[0][0];
 250     max = val[0][0];
 251     for (s = 0; s < nset; s++)
 252     {
 253         for (i = 0; i < n; i++)
 254         {
 255             if (val[s][i] < min)
 256             {
 257                 min = val[s][i];
 258             }
 259             else if (val[s][i] > max)
 260             {
 261                 max = val[s][i];
 262             }
 263         }
 264     }
 265
 266     min = binwidth*floor(min/binwidth);
 267     max = binwidth*ceil(max/binwidth);
 268     if (min != 0)
 269     {
 270         min -= binwidth;
 271     }
 272     max += binwidth;
 273
 274     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 275     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 276     snew(histo, nbin);
 277     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 278     for (s = 0; s < nset; s++)
 279     {
 280         for (i = 0; i < nbin; i++)
 281         {
 282             histo[i] = 0;
 283         }
 284         for (i = 0; i < n; i++)
 285         {
 286             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 287         }
 288         for (i = 0; i < nbin; i++)
 289         {
 290             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 291         }
 292         if (s < nset-1)
 293         {
 294             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 295         }
 296     }
 297     gmx_ffclose(fp);
 298 }
 299
 300 static int real_comp(const void *a, const void *b)
 301 {
 302     real dif = *(real *)a - *(real *)b;
 303
 304     if (dif < 0)
 305     {
 306         return -1;
 307     }
 308     else if (dif > 0)
 309     {
 310         return 1;
 311     }
 312     else
 313     {
 314         return 0;
 315     }
 316 }
 317
 318 static void average(const char *avfile, int avbar_opt,
 319                     int n, int nset, real **val, real *t)
 320 {
 321     FILE   *fp;
 322     int     i, s, edge = 0;
 323     double  av, var, err;
 324     real   *tmp = NULL;
 325
 326     fp = gmx_ffopen(avfile, "w");
 327     if ((avbar_opt == avbarERROR) && (nset == 1))
 328     {
 329         avbar_opt = avbarNONE;
 330     }
 331     if (avbar_opt != avbarNONE)
 332     {
 333         if (avbar_opt == avbar90)
 334         {
 335             snew(tmp, nset);
 336             fprintf(fp, "@TYPE xydydy\n");
 337             edge = (int)(nset*0.05+0.5);
 338             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 339                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 340         }
 341         else
 342         {
 343             fprintf(fp, "@TYPE xydy\n");
 344         }
 345     }
 346
 347     for (i = 0; i < n; i++)
 348     {
 349         av = 0;
 350         for (s = 0; s < nset; s++)
 351         {
 352             av += val[s][i];
 353         }
 354         av /= nset;
 355         fprintf(fp, " %g %g", t[i], av);
 356         var = 0;
 357         if (avbar_opt != avbarNONE)
 358         {
 359             if (avbar_opt == avbar90)
 360             {
 361                 for (s = 0; s < nset; s++)
 362                 {
 363                     tmp[s] = val[s][i];
 364                 }
 365                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 366                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 367             }
 368             else
 369             {
 370                 for (s = 0; s < nset; s++)
 371                 {
 372                     var += sqr(val[s][i]-av);
 373                 }
 374                 if (avbar_opt == avbarSTDDEV)
 375                 {
 376                     err = sqrt(var/nset);
 377                 }
 378                 else
 379                 {
 380                     err = sqrt(var/(nset*(nset-1)));
 381                 }
 382                 fprintf(fp, " %g", err);
 383             }
 384         }
 385         fprintf(fp, "\n");
 386     }
 387     gmx_ffclose(fp);
 388
 389     if (avbar_opt == avbar90)
 390     {
 391         sfree(tmp);
 392     }
 393 }
 394
 395 static real anal_ee_inf(real *parm, real T)
 396 {
 397     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 398 }
 399
 400 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 401                            int nset, double *av, double *sig, real **val, real dt,
 402                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 403                            const output_env_t oenv)
 404 {
 405     FILE    *fp;
 406     int      bs, prev_bs, nbs, nb;
 407     real     spacing, nbr;
 408     int      s, i, j;
 409     double   blav, var;
 410     char   **leg;
 411     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 412     real     fitparm[4];
 413     real     ee, a, tau1, tau2;
 414
 415     if (n < 4)
 416     {
 417         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 418
 419         return;
 420     }
 421
 422     fp = xvgropen(eefile, "Error estimates",
 423                   "Block size (time)", "Error estimate", oenv);
 424     if (output_env_get_print_xvgr_codes(oenv))
 425     {
 426         fprintf(fp,
 427                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 428                 (n-1)*dt, n);
 429     }
 430     snew(leg, 2*nset);
 431     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 432     sfree(leg);
 433
 434     spacing = pow(2, 1.0/resol);
 435     snew(tbs, n);
 436     snew(ybs, n);
 437     snew(fitsig, n);
 438     for (s = 0; s < nset; s++)
 439     {
 440         nbs     = 0;
 441         prev_bs = 0;
 442         nbr     = nb_min;
 443         while (nbr <= n)
 444         {
 445             bs = n/(int)nbr;
 446             if (bs != prev_bs)
 447             {
 448                 nb  = n/bs;
 449                 var = 0;
 450                 for (i = 0; i < nb; i++)
 451                 {
 452                     blav = 0;
 453                     for (j = 0; j < bs; j++)
 454                     {
 455                         blav += val[s][bs*i+j];
 456                     }
 457                     var += sqr(av[s] - blav/bs);
 458                 }
 459                 tbs[nbs] = bs*dt;
 460                 if (sig[s] == 0)
 461                 {
 462                     ybs[nbs] = 0;
 463                 }
 464                 else
 465                 {
 466                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 467                 }
 468                 nbs++;
 469             }
 470             nbr    *= spacing;
 471             nb      = (int)(nbr+0.5);
 472             prev_bs = bs;
 473         }
 474         if (sig[s] == 0)
 475         {
 476             ee   = 0;
 477             a    = 1;
 478             tau1 = 0;
 479             tau2 = 0;
 480         }
 481         else
 482         {
 483             for (i = 0; i < nbs/2; i++)
 484             {
 485                 rtmp         = tbs[i];
 486                 tbs[i]       = tbs[nbs-1-i];
 487                 tbs[nbs-1-i] = rtmp;
 488                 rtmp         = ybs[i];
 489                 ybs[i]       = ybs[nbs-1-i];
 490                 ybs[nbs-1-i] = rtmp;
 491             }
 492             /* The initial slope of the normalized ybs^2 is 1.
 493              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 494              * From this we take our initial guess for tau1.
 495              */
 496             twooe = 2/exp(1);
 497             i     = -1;
 498             do
 499             {
 500                 i++;
 501                 tau1_est = tbs[i];
 502             }
 503             while (i < nbs - 1 &&
 504                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 505
 506             if (ybs[0] > ybs[1])
 507             {
 508                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 509                         "the std. error using single points is larger than that of blocks of 2 points\n"
 510                         "The error estimate might be inaccurate, check the fit\n",
 511                         s+1);
 512                 /* Use the total time as tau for the fitting weights */
 513                 tau_sig = (n - 1)*dt;
 514             }
 515             else
 516             {
 517                 tau_sig = tau1_est;
 518             }
 519
 520             if (debug)
 521             {
 522                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 523             }
 524
 525             /* Generate more or less appropriate sigma's,
 526              * also taking the density of points into account.
 527              */
 528             for (i = 0; i < nbs; i++)
 529             {
 530                 if (i == 0)
 531                 {
 532                     dens = tbs[1]/tbs[0] - 1;
 533                 }
 534                 else if (i == nbs-1)
 535                 {
 536                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 537                 }
 538                 else
 539                 {
 540                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 541                 }
 542                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 543             }
 544
 545             if (!bSingleExpFit)
 546             {
 547                 fitparm[0] = tau1_est;
 548                 fitparm[1] = 0.95;
 549                 /* We set the initial guess for tau2
 550                  * to halfway between tau1_est and the total time (on log scale).
 551                  */
 552                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 553                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 554                          bDebugMode(), effnERREST, fitparm, 0);
 555                 fitparm[3] = 1-fitparm[1];
 556             }
 557             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 558                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 559             {
 560                 if (!bSingleExpFit)
 561                 {
 562                     if (fitparm[2] > (n-1)*dt)
 563                     {
 564                         fprintf(stdout,
 565                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 566                                 "         the statistics might be bad\n",
 567                                 (n-1)*dt);
 568                     }
 569                     else
 570                     {
 571                         fprintf(stdout, "a fitted parameter is negative\n");
 572                     }
 573                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 574                             sig[s]*anal_ee_inf(fitparm, n*dt),
 575                             fitparm[1], fitparm[0], fitparm[2]);
 576                     /* Do a fit with tau2 fixed at the total time.
 577                      * One could also choose any other large value for tau2.
 578                      */
 579                     fitparm[0] = tau1_est;
 580                     fitparm[1] = 0.95;
 581                     fitparm[2] = (n-1)*dt;
 582                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 583                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 584                              effnERREST, fitparm, 4);
 585                     fitparm[3] = 1-fitparm[1];
 586                 }
 587                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 588                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 589                 {
 590                     if (!bSingleExpFit)
 591                     {
 592                         fprintf(stdout, "a fitted parameter is negative\n");
 593                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 594                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 595                                 fitparm[1], fitparm[0], fitparm[2]);
 596                     }
 597                     /* Do a single exponential fit */
 598                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 599                     fitparm[0] = tau1_est;
 600                     fitparm[1] = 1.0;
 601                     fitparm[2] = 0.0;
 602                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 603                              effnERREST, fitparm, 6);
 604                     fitparm[3] = 1-fitparm[1];
 605                 }
 606             }
 607             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 608             a    = fitparm[1];
 609             tau1 = fitparm[0];
 610             tau2 = fitparm[2];
 611         }
 612         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 613                 s+1, ee, a, tau1, tau2);
 614         if (output_env_get_xvg_format(oenv) == exvgXMGR)
 615         {
 616             fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 617             fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 618         }
 619         else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
 620         {
 621             fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
 622             fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 623         }
 624         for (i = 0; i < nbs; i++)
 625         {
 626             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 627                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 628         }
 629
 630         if (bFitAc)
 631         {
 632             int   fitlen;
 633             real *ac, acint, ac_fit[4];
 634
 635             snew(ac, n);
 636             for (i = 0; i < n; i++)
 637             {
 638                 ac[i] = val[s][i] - av[s];
 639                 if (i > 0)
 640                 {
 641                     fitsig[i] = sqrt(i);
 642                 }
 643                 else
 644                 {
 645                     fitsig[i] = 1;
 646                 }
 647             }
 648             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 649                             dt, eacNormal, 1, FALSE, TRUE,
 650                             FALSE, 0, 0, effnNONE);
 651
 652             fitlen = n/nb_min;
 653
 654             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 655             acint = 0.5*ac[0];
 656             for (i = 1; i <= fitlen/2; i++)
 657             {
 658                 acint += ac[i];
 659             }
 660             acint *= dt;
 661
 662             /* Generate more or less appropriate sigma's */
 663             for (i = 0; i <= fitlen; i++)
 664             {
 665                 fitsig[i] = sqrt(acint + dt*i);
 666             }
 667
 668             ac_fit[0] = 0.5*acint;
 669             ac_fit[1] = 0.95;
 670             ac_fit[2] = 10*acint;
 671             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 672                      bDebugMode(), effnEXP3, ac_fit, 0);
 673             ac_fit[3] = 1 - ac_fit[1];
 674
 675             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 676                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 677                     ac_fit[1], ac_fit[0], ac_fit[2]);
 678
 679             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 680             for (i = 0; i < nbs; i++)
 681             {
 682                 fprintf(fp, "%g %g\n", tbs[i],
 683                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 684             }
 685
 686             sfree(ac);
 687         }
 688         if (s < nset-1)
 689         {
 690             fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
 691         }
 692     }
 693     sfree(fitsig);
 694     sfree(ybs);
 695     sfree(tbs);
 696     gmx_ffclose(fp);
 697 }
 698
 699 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 700                          gmx_bool bError, real fit_start, real smooth_tail_start,
 701                          const output_env_t oenv)
 702 {
 703     const real tol = 1e-8;
 704     real      *kt;
 705     real       weight, d2;
 706     int        j;
 707
 708     please_cite(stdout, "Spoel2006b");
 709
 710     /* Compute negative derivative k(t) = -dc(t)/dt */
 711     if (!bError)
 712     {
 713         snew(kt, nn);
 714         compute_derivative(nn, time, val[0], kt);
 715         for (j = 0; (j < nn); j++)
 716         {
 717             kt[j] = -kt[j];
 718         }
 719         if (debug)
 720         {
 721             d2 = 0;
 722             for (j = 0; (j < nn); j++)
 723             {
 724                 d2 += sqr(kt[j] - val[3][j]);
 725             }
 726             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 727         }
 728         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 729                      temp, smooth_tail_start, oenv);
 730         sfree(kt);
 731     }
 732     else if (nset == 6)
 733     {
 734         analyse_corr(nn, time, val[0], val[2], val[4],
 735                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 736     }
 737     else
 738     {
 739         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 740         printf("Not doing anything. Sorry.\n");
 741     }
 742 }
 743
 744 static void filter(real flen, int n, int nset, real **val, real dt)
 745 {
 746     int     f, s, i, j;
 747     double *filt, sum, vf, fluc, fluctot;
 748
 749     f = (int)(flen/(2*dt));
 750     snew(filt, f+1);
 751     filt[0] = 1;
 752     sum     = 1;
 753     for (i = 1; i <= f; i++)
 754     {
 755         filt[i] = cos(M_PI*dt*i/flen);
 756         sum    += 2*filt[i];
 757     }
 758     for (i = 0; i <= f; i++)
 759     {
 760         filt[i] /= sum;
 761     }
 762     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 763     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 764     fluctot = 0;
 765     for (s = 0; s < nset; s++)
 766     {
 767         fluc = 0;
 768         for (i = f; i < n-f; i++)
 769         {
 770             vf = filt[0]*val[s][i];
 771             for (j = 1; j <= f; j++)
 772             {
 773                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 774             }
 775             fluc += sqr(val[s][i] - vf);
 776         }
 777         fluc    /= n - 2*f;
 778         fluctot += fluc;
 779         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 780     }
 781     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 782     fprintf(stdout, "\n");
 783
 784     sfree(filt);
 785 }
 786
 787 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 788                    int ny, real *x0, real **val,
 789                    int npargs, t_pargs *ppa, const output_env_t oenv)
 790 {
 791     real *c1 = NULL, *sig = NULL, *fitparm;
 792     real  tendfit, tbeginfit;
 793     int   i, efitfn, nparm;
 794
 795     efitfn = get_acffitfn();
 796     nparm  = nfp_ffn[efitfn];
 797     fprintf(out, "Will fit to the following function:\n");
 798     fprintf(out, "%s\n", longs_ffn[efitfn]);
 799     c1 = val[n];
 800     if (bYdy)
 801     {
 802         c1  = val[n];
 803         sig = val[n+1];
 804         fprintf(out, "Using two columns as y and sigma values\n");
 805     }
 806     else
 807     {
 808         snew(sig, ny);
 809     }
 810     if (opt2parg_bSet("-beginfit", npargs, ppa))
 811     {
 812         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 813     }
 814     else
 815     {
 816         tbeginfit = x0[0];
 817     }
 818     if (opt2parg_bSet("-endfit", npargs, ppa))
 819     {
 820         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 821     }
 822     else
 823     {
 824         tendfit   = x0[ny-1];
 825     }
 826
 827     snew(fitparm, nparm);
 828     switch (efitfn)
 829     {
 830         case effnEXP1:
 831             fitparm[0] = 0.5;
 832             break;
 833         case effnEXP2:
 834             fitparm[0] = 0.5;
 835             fitparm[1] = c1[0];
 836             break;
 837         case effnEXP3:
 838             fitparm[0] = 1.0;
 839             fitparm[1] = 0.5*c1[0];
 840             fitparm[2] = 10.0;
 841             break;
 842         case effnEXP5:
 843             fitparm[0] = fitparm[2] = 0.5*c1[0];
 844             fitparm[1] = 10;
 845             fitparm[3] = 40;
 846             fitparm[4] = 0;
 847             break;
 848         case effnEXP7:
 849             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 850             fitparm[1] = 1;
 851             fitparm[3] = 10;
 852             fitparm[5] = 100;
 853             fitparm[6] = 0;
 854             break;
 855         case effnEXP9:
 856             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 857             fitparm[1] = 0.1;
 858             fitparm[3] = 1;
 859             fitparm[5] = 10;
 860             fitparm[7] = 100;
 861             fitparm[8] = 0;
 862             break;
 863         default:
 864             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 865             for (i = 0; (i < nparm); i++)
 866             {
 867                 fitparm[i] = 1;
 868             }
 869     }
 870     fprintf(out, "Starting parameters:\n");
 871     for (i = 0; (i < nparm); i++)
 872     {
 873         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 874     }
 875     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 876                  oenv, bDebugMode(), efitfn, fitparm, 0))
 877     {
 878         for (i = 0; (i < nparm); i++)
 879         {
 880             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 881         }
 882     }
 883     else
 884     {
 885         fprintf(out, "No solution was found\n");
 886     }
 887 }
 888
 889 static void do_ballistic(const char *balFile, int nData,
 890                          real *t, real **val, int nSet,
 891                          real balTime, int nBalExp,
 892                          const output_env_t oenv)
 893 {
 894     double     **ctd   = NULL, *td = NULL;
 895     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 896     static char *leg[] = {"Ac'(t)"};
 897     FILE        *fp;
 898     int          i, set;
 899
 900     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 901     {
 902         snew(ctd, nSet);
 903         snew(td,  nData);
 904
 905         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 906         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 907
 908         for (set = 0; set < nSet; set++)
 909         {
 910             snew(ctd[set], nData);
 911             for (i = 0; i < nData; i++)
 912             {
 913                 ctd[set][i] = (double)val[set][i];
 914                 if (set == 0)
 915                 {
 916                     td[i] = (double)t[i];
 917                 }
 918             }
 919
 920             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 921         }
 922
 923         for (i = 0; i < nData; i++)
 924         {
 925             fprintf(fp, "  %g", t[i]);
 926             for (set = 0; set < nSet; set++)
 927             {
 928                 fprintf(fp, "  %g", ctd[set][i]);
 929             }
 930             fprintf(fp, "\n");
 931         }
 932
 933
 934         for (set = 0; set < nSet; set++)
 935         {
 936             sfree(ctd[set]);
 937         }
 938         sfree(ctd);
 939         sfree(td);
 940     }
 941     else
 942     {
 943         printf("Number of data points is less than the number of parameters to fit\n."
 944                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 945     }
 946 }
 947
 948 static void do_geminate(const char *gemFile, int nData,
 949                         real *t, real **val, int nSet,
 950                         const real D, const real rcut, const real balTime,
 951                         const int nFitPoints, const real begFit, const real endFit,
 952                         const output_env_t oenv)
 953 {
 954     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 955     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 956                                       begFit, endFit, balTime, 1);
 957     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 958     FILE        *fp;
 959     int          i, set;
 960
 961     snew(ctd,    nSet);
 962     snew(ctdGem, nSet);
 963     snew(td,  nData);
 964
 965     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 966     xvgr_legend(fp, asize(leg), leg, oenv);
 967
 968     for (set = 0; set < nSet; set++)
 969     {
 970         snew(ctd[set],    nData);
 971         snew(ctdGem[set], nData);
 972         for (i = 0; i < nData; i++)
 973         {
 974             ctd[set][i] = (double)val[set][i];
 975             if (set == 0)
 976             {
 977                 td[i] = (double)t[i];
 978             }
 979         }
 980         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 981     }
 982
 983     for (i = 0; i < nData; i++)
 984     {
 985         fprintf(fp, "  %g", t[i]);
 986         for (set = 0; set < nSet; set++)
 987         {
 988             fprintf(fp, "  %g", ctdGem[set][i]);
 989         }
 990         fprintf(fp, "\n");
 991     }
 992
 993     for (set = 0; set < nSet; set++)
 994     {
 995         sfree(ctd[set]);
 996         sfree(ctdGem[set]);
 997     }
 998     sfree(ctd);
 999     sfree(ctdGem);
1000     sfree(td);
1001 }
1002
1003 int gmx_analyze(int argc, char *argv[])
1004 {
1005     static const char *desc[] = {
1006         "[THISMODULE] reads an ASCII file and analyzes data sets.",
1007         "A line in the input file may start with a time",
1008         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1009         "Multiple sets can also be",
1010         "read when they are separated by & (option [TT]-n[tt]);",
1011         "in this case only one [IT]y[it]-value is read from each line.",
1012         "All lines starting with # and @ are skipped.",
1013         "All analyses can also be done for the derivative of a set",
1014         "(option [TT]-d[tt]).[PAR]",
1015
1016         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1017         "points are equidistant in time.[PAR]",
1018
1019         "[THISMODULE] always shows the average and standard deviation of each",
1020         "set, as well as the relative deviation of the third",
1021         "and fourth cumulant from those of a Gaussian distribution with the same",
1022         "standard deviation.[PAR]",
1023
1024         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1025         "Be sure that the time interval between data points is",
1026         "much shorter than the time scale of the autocorrelation.[PAR]",
1027
1028         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1029         "i/2 periods. The formula is:[BR]"
1030         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1031         "This is useful for principal components obtained from covariance",
1032         "analysis, since the principal components of random diffusion are",
1033         "pure cosines.[PAR]",
1034
1035         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1036
1037         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1038
1039         "Option [TT]-av[tt] produces the average over the sets.",
1040         "Error bars can be added with the option [TT]-errbar[tt].",
1041         "The errorbars can represent the standard deviation, the error",
1042         "(assuming the points are independent) or the interval containing",
1043         "90% of the points, by discarding 5% of the points at the top and",
1044         "the bottom.[PAR]",
1045
1046         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1047         "A set is divided in a number of blocks and averages are calculated for",
1048         "each block. The error for the total average is calculated from",
1049         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1050         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1051         "These errors are plotted as a function of the block size.",
1052         "Also an analytical block average curve is plotted, assuming",
1053         "that the autocorrelation is a sum of two exponentials.",
1054         "The analytical curve for the block average is:[BR]",
1055         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1056         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1057         "where T is the total time.",
1058         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1059         "When the actual block average is very close to the analytical curve,",
1060         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1061         "The complete derivation is given in",
1062         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1063
1064         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1065         "component from a hydrogen bond autocorrelation function by the fitting",
1066         "of a sum of exponentials, as described in e.g.",
1067         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1068         "is the one with the most negative coefficient in the exponential,",
1069         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1070         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1071
1072         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1073         "(and optionally kD) to the hydrogen bond autocorrelation function",
1074         "according to the reversible geminate recombination model. Removal of",
1075         "the ballistic component first is strongly advised. The model is presented in",
1076         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1077
1078         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1079         "of each set and over all sets with respect to a filtered average.",
1080         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1081         "to len/2. len is supplied with the option [TT]-filter[tt].",
1082         "This filter reduces oscillations with period len/2 and len by a factor",
1083         "of 0.79 and 0.33 respectively.[PAR]",
1084
1085         "Option [TT]-g[tt] fits the data to the function given with option",
1086         "[TT]-fitfn[tt].[PAR]",
1087
1088         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1089         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1090         "zero or with a negative value are ignored.[PAR]"
1091
1092         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1093         "on output from [gmx-hbond]. The input file can be taken directly",
1094         "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1095     };
1096     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1097     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1098     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1099     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1100     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1101     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1102
1103     /* must correspond to enum avbar* declared at beginning of file */
1104     static const char *avbar_opt[avbarNR+1] = {
1105         NULL, "none", "stddev", "error", "90", NULL
1106     };
1107
1108     t_pargs            pa[] = {
1109         { "-time",    FALSE, etBOOL, {&bHaveT},
1110           "Expect a time in the input" },
1111         { "-b",       FALSE, etREAL, {&tb},
1112           "First time to read from set" },
1113         { "-e",       FALSE, etREAL, {&te},
1114           "Last time to read from set" },
1115         { "-n",       FALSE, etINT, {&nsets_in},
1116           "Read this number of sets separated by &" },
1117         { "-d",       FALSE, etBOOL, {&bDer},
1118           "Use the derivative" },
1119         { "-dp",      FALSE, etINT, {&d},
1120           "HIDDENThe derivative is the difference over this number of points" },
1121         { "-bw",      FALSE, etREAL, {&binwidth},
1122           "Binwidth for the distribution" },
1123         { "-errbar",  FALSE, etENUM, {avbar_opt},
1124           "Error bars for [TT]-av[tt]" },
1125         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1126           "Integrate data function(s) numerically using trapezium rule" },
1127         { "-aver_start", FALSE, etREAL, {&aver_start},
1128           "Start averaging the integral from here" },
1129         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1130           "Interpret second data set as error in the y values for integrating" },
1131         { "-regression", FALSE, etBOOL, {&bRegression},
1132           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1133         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1134           "Do a Luzar and Chandler analysis on a correlation function and "
1135           "related as produced by [gmx-hbond]. When in addition the "
1136           "[TT]-xydy[tt] flag is given the second and fourth column will be "
1137           "interpreted as errors in c(t) and n(t)." },
1138         { "-temp",    FALSE, etREAL, {&temp},
1139           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1140         { "-fitstart", FALSE, etREAL, {&fit_start},
1141           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1142         { "-fitend", FALSE, etREAL, {&fit_end},
1143           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1144         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1145           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1146         { "-nbmin",   FALSE, etINT, {&nb_min},
1147           "HIDDENMinimum number of blocks for block averaging" },
1148         { "-resol", FALSE, etINT, {&resol},
1149           "HIDDENResolution for the block averaging, block size increases with"
1150           " a factor 2^(1/resol)" },
1151         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1152           "HIDDENAlways use a single exponential fit for the error estimate" },
1153         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1154           "HIDDENAllow a negative long-time correlation" },
1155         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1156           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1157         { "-filter",  FALSE, etREAL, {&filtlen},
1158           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1159         { "-power", FALSE, etBOOL, {&bPower},
1160           "Fit data to: b t^a" },
1161         { "-subav", FALSE, etBOOL, {&bSubAv},
1162           "Subtract the average before autocorrelating" },
1163         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1164           "Calculate one ACF over all sets" },
1165         { "-nbalexp", FALSE, etINT, {&nBalExp},
1166           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1167         { "-baltime", FALSE, etREAL, {&balTime},
1168           "HIDDENTime up to which the ballistic component will be fitted" },
1169 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1170 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1171 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1172 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1173 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1174 /*       "What type of gminate recombination to use"}, */
1175 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1176 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1177     };
1178 #define NPA asize(pa)
1179
1180     FILE           *out, *out_fit;
1181     int             n, nlast, s, nset, i, j = 0;
1182     real          **val, *t, dt, tot, error;
1183     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1184     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1185     output_env_t    oenv;
1186
1187     t_filenm        fnm[] = {
1188         { efXVG, "-f",    "graph",    ffREAD   },
1189         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1190         { efXVG, "-msd",  "msd",      ffOPTWR  },
1191         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1192         { efXVG, "-dist", "distr",    ffOPTWR  },
1193         { efXVG, "-av",   "average",  ffOPTWR  },
1194         { efXVG, "-ee",   "errest",   ffOPTWR  },
1195         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1196 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1197         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1198     };
1199 #define NFILE asize(fnm)
1200
1201     int      npargs;
1202     t_pargs *ppa;
1203
1204     npargs = asize(pa);
1205     ppa    = add_acf_pargs(&npargs, pa);
1206
1207     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1208                            NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1209     {
1210         return 0;
1211     }
1212
1213     acfile   = opt2fn_null("-ac", NFILE, fnm);
1214     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1215     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1216     distfile = opt2fn_null("-dist", NFILE, fnm);
1217     avfile   = opt2fn_null("-av", NFILE, fnm);
1218     eefile   = opt2fn_null("-ee", NFILE, fnm);
1219     balfile  = opt2fn_null("-bal", NFILE, fnm);
1220 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1221     /* When doing autocorrelation we don't want a fitlog for fitting
1222      * the function itself (not the acf) when the user did not ask for it.
1223      */
1224     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1225     {
1226         fitfile  = opt2fn("-g", NFILE, fnm);
1227     }
1228     else
1229     {
1230         fitfile  = opt2fn_null("-g", NFILE, fnm);
1231     }
1232
1233     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1234                         opt2parg_bSet("-b", npargs, ppa), tb,
1235                         opt2parg_bSet("-e", npargs, ppa), te,
1236                         nsets_in, &nset, &n, &dt, &t);
1237     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1238
1239     if (bDer)
1240     {
1241         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1242                d, d);
1243         n -= d;
1244         for (s = 0; s < nset; s++)
1245         {
1246             for (i = 0; (i < n); i++)
1247             {
1248                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1249             }
1250         }
1251     }
1252
1253     if (bIntegrate)
1254     {
1255         real sum, stddev;
1256
1257         printf("Calculating the integral using the trapezium rule\n");
1258
1259         if (bXYdy)
1260         {
1261             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1262             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1263         }
1264         else
1265         {
1266             for (s = 0; s < nset; s++)
1267             {
1268                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1269                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1270             }
1271         }
1272     }
1273
1274     if (fitfile != NULL)
1275     {
1276         out_fit = gmx_ffopen(fitfile, "w");
1277         if (bXYdy && nset >= 2)
1278         {
1279             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1280         }
1281         else
1282         {
1283             for (s = 0; s < nset; s++)
1284             {
1285                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1286             }
1287         }
1288         gmx_ffclose(out_fit);
1289     }
1290
1291     printf("                                      std. dev.    relative deviation of\n");
1292     printf("                       standard       ---------   cumulants from those of\n");
1293     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1294     printf("                                                      cum. 3   cum. 4\n");
1295     snew(av, nset);
1296     snew(sig, nset);
1297     for (s = 0; (s < nset); s++)
1298     {
1299         cum1 = 0;
1300         cum2 = 0;
1301         cum3 = 0;
1302         cum4 = 0;
1303         for (i = 0; (i < n); i++)
1304         {
1305             cum1 += val[s][i];
1306         }
1307         cum1 /= n;
1308         for (i = 0; (i < n); i++)
1309         {
1310             db    = val[s][i]-cum1;
1311             cum2 += db*db;
1312             cum3 += db*db*db;
1313             cum4 += db*db*db*db;
1314         }
1315         cum2  /= n;
1316         cum3  /= n;
1317         cum4  /= n;
1318         av[s]  = cum1;
1319         sig[s] = sqrt(cum2);
1320         if (n > 1)
1321         {
1322             error = sqrt(cum2/(n-1));
1323         }
1324         else
1325         {
1326             error = 0;
1327         }
1328         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1329                s+1, av[s], sig[s], error,
1330                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1331                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1332     }
1333     printf("\n");
1334
1335     if (filtlen)
1336     {
1337         filter(filtlen, n, nset, val, dt);
1338     }
1339
1340     if (msdfile)
1341     {
1342         out = xvgropen(msdfile, "Mean square displacement",
1343                        "time", "MSD (nm\\S2\\N)", oenv);
1344         nlast = (int)(n*frac);
1345         for (s = 0; s < nset; s++)
1346         {
1347             for (j = 0; j <= nlast; j++)
1348             {
1349                 if (j % 100 == 0)
1350                 {
1351                     fprintf(stderr, "\r%d", j);
1352                 }
1353                 tot = 0;
1354                 for (i = 0; i < n-j; i++)
1355                 {
1356                     tot += sqr(val[s][i]-val[s][i+j]);
1357                 }
1358                 tot /= (real)(n-j);
1359                 fprintf(out, " %g %8g\n", dt*j, tot);
1360             }
1361             if (s < nset-1)
1362             {
1363                 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1364             }
1365         }
1366         gmx_ffclose(out);
1367         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1368     }
1369     if (ccfile)
1370     {
1371         plot_coscont(ccfile, n, nset, val, oenv);
1372     }
1373
1374     if (distfile)
1375     {
1376         histogram(distfile, binwidth, n, nset, val, oenv);
1377     }
1378     if (avfile)
1379     {
1380         average(avfile, nenum(avbar_opt), n, nset, val, t);
1381     }
1382     if (eefile)
1383     {
1384         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1385                        bEeFitAc, bEESEF, bEENLC, oenv);
1386     }
1387     if (balfile)
1388     {
1389         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1390     }
1391 /*   if (gemfile) */
1392 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1393 /*                   nFitPoints, fit_start, fit_end, oenv); */
1394     if (bPower)
1395     {
1396         power_fit(n, nset, val, t);
1397     }
1398
1399     if (acfile != NULL)
1400     {
1401         if (bSubAv)
1402         {
1403             for (s = 0; s < nset; s++)
1404             {
1405                 for (i = 0; i < n; i++)
1406                 {
1407                     val[s][i] -= av[s];
1408                 }
1409             }
1410         }
1411         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1412                     eacNormal, bAverCorr);
1413     }
1414
1415     if (bRegression)
1416     {
1417         regression_analysis(n, bXYdy, t, nset, val);
1418     }
1419
1420     if (bLuzar)
1421     {
1422         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1423     }
1424
1425     view_all(oenv, NFILE, fnm);
1426
1427     return 0;
1428 }