src/gromacs/gmxana/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40 #include <math.h>
  41 #include <string.h>
  42 #include "gromacs/commandline/pargs.h"
  43 #include "sysstuff.h"
  44 #include "typedefs.h"
  45 #include "smalloc.h"
  46 #include "macros.h"
  47 #include "gmx_fatal.h"
  48 #include "vec.h"
  49 #include "copyrite.h"
  50 #include "gromacs/fileio/futil.h"
  51 #include "readinp.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gromacs/statistics/statistics.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57 #include "geminate.h"
  58
  59 #include "gromacs/linearalgebra/matrix.h"
  60
  61 /* must correspond to char *avbar_opt[] declared in main() */
  62 enum {
  63     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  64 };
  65
  66 static void power_fit(int n, int nset, real **val, real *t)
  67 {
  68     real *x, *y, quality, a, b, r;
  69     int   s, i;
  70
  71     snew(x, n);
  72     snew(y, n);
  73
  74     if (t[0] > 0)
  75     {
  76         for (i = 0; i < n; i++)
  77         {
  78             if (t[0] > 0)
  79             {
  80                 x[i] = log(t[i]);
  81             }
  82         }
  83     }
  84     else
  85     {
  86         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  87         for (i = 0; i < n; i++)
  88         {
  89             x[i] = log(i+1);
  90         }
  91     }
  92
  93     for (s = 0; s < nset; s++)
  94     {
  95         i = 0;
  96         for (i = 0; i < n && val[s][i] >= 0; i++)
  97         {
  98             y[i] = log(val[s][i]);
  99         }
 100         if (i < n)
 101         {
 102             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 103         }
 104         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 105         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 106                 s+1, quality, a, exp(b));
 107     }
 108
 109     sfree(y);
 110     sfree(x);
 111 }
 112
 113 static real cosine_content(int nhp, int n, real *y)
 114 /* Assumes n equidistant points */
 115 {
 116     double fac, cosyint, yyint;
 117     int    i;
 118
 119     if (n < 2)
 120     {
 121         return 0;
 122     }
 123
 124     fac = M_PI*nhp/(n-1);
 125
 126     cosyint = 0;
 127     yyint   = 0;
 128     for (i = 0; i < n; i++)
 129     {
 130         cosyint += cos(fac*i)*y[i];
 131         yyint   += y[i]*y[i];
 132     }
 133
 134     return 2*cosyint*cosyint/(n*yyint);
 135 }
 136
 137 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 138                          const output_env_t oenv)
 139 {
 140     FILE *fp;
 141     int   s;
 142     real  cc;
 143
 144     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 145                   oenv);
 146
 147     for (s = 0; s < nset; s++)
 148     {
 149         cc = cosine_content(s+1, n, val[s]);
 150         fprintf(fp, " %d %g\n", s+1, cc);
 151         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 152                 s+1, 0.5*(s+1), cc);
 153     }
 154     fprintf(stdout, "\n");
 155
 156     ffclose(fp);
 157 }
 158
 159 static void regression_analysis(int n, gmx_bool bXYdy,
 160                                 real *x, int nset, real **val)
 161 {
 162     real S, chi2, a, b, da, db, r = 0;
 163     int  ok;
 164
 165     if (bXYdy || (nset == 1))
 166     {
 167         printf("Fitting data to a function f(x) = ax + b\n");
 168         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 169         printf("Error estimates will be given if w_i (sigma) values are given\n");
 170         printf("(use option -xydy).\n\n");
 171         if (bXYdy)
 172         {
 173             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 174             {
 175                 gmx_fatal(FARGS, "Error fitting the data: %s",
 176                           gmx_stats_message(ok));
 177             }
 178         }
 179         else
 180         {
 181             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 182             {
 183                 gmx_fatal(FARGS, "Error fitting the data: %s",
 184                           gmx_stats_message(ok));
 185             }
 186         }
 187         chi2 = sqr((n-2)*S);
 188         printf("Chi2                    = %g\n", chi2);
 189         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 190         printf("Correlation coefficient = %.1f%%\n", 100*r);
 191         printf("\n");
 192         if (bXYdy)
 193         {
 194             printf("a    = %g +/- %g\n", a, da);
 195             printf("b    = %g +/- %g\n", b, db);
 196         }
 197         else
 198         {
 199             printf("a    = %g\n", a);
 200             printf("b    = %g\n", b);
 201         }
 202     }
 203     else
 204     {
 205         double chi2, *a, **xx, *y;
 206         int    i, j;
 207
 208         snew(y, n);
 209         snew(xx, nset-1);
 210         for (j = 0; (j < nset-1); j++)
 211         {
 212             snew(xx[j], n);
 213         }
 214         for (i = 0; (i < n); i++)
 215         {
 216             y[i] = val[0][i];
 217             for (j = 1; (j < nset); j++)
 218             {
 219                 xx[j-1][i] = val[j][i];
 220             }
 221         }
 222         snew(a, nset-1);
 223         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 224         printf("Fitting %d data points in %d sets\n", n, nset-1);
 225         printf("chi2 = %g\n", chi2);
 226         printf("A =");
 227         for (i = 0; (i < nset-1); i++)
 228         {
 229             printf("  %g", a[i]);
 230             sfree(xx[i]);
 231         }
 232         printf("\n");
 233         sfree(xx);
 234         sfree(y);
 235         sfree(a);
 236     }
 237 }
 238
 239 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 240                const output_env_t oenv)
 241 {
 242     FILE          *fp;
 243     int            i, s;
 244     double         min, max;
 245     int            nbin;
 246     gmx_int64_t   *histo;
 247
 248     min = val[0][0];
 249     max = val[0][0];
 250     for (s = 0; s < nset; s++)
 251     {
 252         for (i = 0; i < n; i++)
 253         {
 254             if (val[s][i] < min)
 255             {
 256                 min = val[s][i];
 257             }
 258             else if (val[s][i] > max)
 259             {
 260                 max = val[s][i];
 261             }
 262         }
 263     }
 264
 265     min = binwidth*floor(min/binwidth);
 266     max = binwidth*ceil(max/binwidth);
 267     if (min != 0)
 268     {
 269         min -= binwidth;
 270     }
 271     max += binwidth;
 272
 273     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 274     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 275     snew(histo, nbin);
 276     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 277     for (s = 0; s < nset; s++)
 278     {
 279         for (i = 0; i < nbin; i++)
 280         {
 281             histo[i] = 0;
 282         }
 283         for (i = 0; i < n; i++)
 284         {
 285             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 286         }
 287         for (i = 0; i < nbin; i++)
 288         {
 289             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 290         }
 291         if (s < nset-1)
 292         {
 293             fprintf(fp, "&\n");
 294         }
 295     }
 296     ffclose(fp);
 297 }
 298
 299 static int real_comp(const void *a, const void *b)
 300 {
 301     real dif = *(real *)a - *(real *)b;
 302
 303     if (dif < 0)
 304     {
 305         return -1;
 306     }
 307     else if (dif > 0)
 308     {
 309         return 1;
 310     }
 311     else
 312     {
 313         return 0;
 314     }
 315 }
 316
 317 static void average(const char *avfile, int avbar_opt,
 318                     int n, int nset, real **val, real *t)
 319 {
 320     FILE   *fp;
 321     int     i, s, edge = 0;
 322     double  av, var, err;
 323     real   *tmp = NULL;
 324
 325     fp = ffopen(avfile, "w");
 326     if ((avbar_opt == avbarERROR) && (nset == 1))
 327     {
 328         avbar_opt = avbarNONE;
 329     }
 330     if (avbar_opt != avbarNONE)
 331     {
 332         if (avbar_opt == avbar90)
 333         {
 334             snew(tmp, nset);
 335             fprintf(fp, "@TYPE xydydy\n");
 336             edge = (int)(nset*0.05+0.5);
 337             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 338                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 339         }
 340         else
 341         {
 342             fprintf(fp, "@TYPE xydy\n");
 343         }
 344     }
 345
 346     for (i = 0; i < n; i++)
 347     {
 348         av = 0;
 349         for (s = 0; s < nset; s++)
 350         {
 351             av += val[s][i];
 352         }
 353         av /= nset;
 354         fprintf(fp, " %g %g", t[i], av);
 355         var = 0;
 356         if (avbar_opt != avbarNONE)
 357         {
 358             if (avbar_opt == avbar90)
 359             {
 360                 for (s = 0; s < nset; s++)
 361                 {
 362                     tmp[s] = val[s][i];
 363                 }
 364                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 365                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 366             }
 367             else
 368             {
 369                 for (s = 0; s < nset; s++)
 370                 {
 371                     var += sqr(val[s][i]-av);
 372                 }
 373                 if (avbar_opt == avbarSTDDEV)
 374                 {
 375                     err = sqrt(var/nset);
 376                 }
 377                 else
 378                 {
 379                     err = sqrt(var/(nset*(nset-1)));
 380                 }
 381                 fprintf(fp, " %g", err);
 382             }
 383         }
 384         fprintf(fp, "\n");
 385     }
 386     ffclose(fp);
 387
 388     if (avbar_opt == avbar90)
 389     {
 390         sfree(tmp);
 391     }
 392 }
 393
 394 static real anal_ee_inf(real *parm, real T)
 395 {
 396     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 397 }
 398
 399 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 400                            int nset, double *av, double *sig, real **val, real dt,
 401                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 402                            const output_env_t oenv)
 403 {
 404     FILE    *fp;
 405     int      bs, prev_bs, nbs, nb;
 406     real     spacing, nbr;
 407     int      s, i, j;
 408     double   blav, var;
 409     char   **leg;
 410     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 411     real     fitparm[4];
 412     real     ee, a, tau1, tau2;
 413
 414     if (n < 4)
 415     {
 416         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 417
 418         return;
 419     }
 420
 421     fp = xvgropen(eefile, "Error estimates",
 422                   "Block size (time)", "Error estimate", oenv);
 423     if (output_env_get_print_xvgr_codes(oenv))
 424     {
 425         fprintf(fp,
 426                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 427                 (n-1)*dt, n);
 428     }
 429     snew(leg, 2*nset);
 430     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 431     sfree(leg);
 432
 433     spacing = pow(2, 1.0/resol);
 434     snew(tbs, n);
 435     snew(ybs, n);
 436     snew(fitsig, n);
 437     for (s = 0; s < nset; s++)
 438     {
 439         nbs     = 0;
 440         prev_bs = 0;
 441         nbr     = nb_min;
 442         while (nbr <= n)
 443         {
 444             bs = n/(int)nbr;
 445             if (bs != prev_bs)
 446             {
 447                 nb  = n/bs;
 448                 var = 0;
 449                 for (i = 0; i < nb; i++)
 450                 {
 451                     blav = 0;
 452                     for (j = 0; j < bs; j++)
 453                     {
 454                         blav += val[s][bs*i+j];
 455                     }
 456                     var += sqr(av[s] - blav/bs);
 457                 }
 458                 tbs[nbs] = bs*dt;
 459                 if (sig[s] == 0)
 460                 {
 461                     ybs[nbs] = 0;
 462                 }
 463                 else
 464                 {
 465                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 466                 }
 467                 nbs++;
 468             }
 469             nbr    *= spacing;
 470             nb      = (int)(nbr+0.5);
 471             prev_bs = bs;
 472         }
 473         if (sig[s] == 0)
 474         {
 475             ee   = 0;
 476             a    = 1;
 477             tau1 = 0;
 478             tau2 = 0;
 479         }
 480         else
 481         {
 482             for (i = 0; i < nbs/2; i++)
 483             {
 484                 rtmp         = tbs[i];
 485                 tbs[i]       = tbs[nbs-1-i];
 486                 tbs[nbs-1-i] = rtmp;
 487                 rtmp         = ybs[i];
 488                 ybs[i]       = ybs[nbs-1-i];
 489                 ybs[nbs-1-i] = rtmp;
 490             }
 491             /* The initial slope of the normalized ybs^2 is 1.
 492              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 493              * From this we take our initial guess for tau1.
 494              */
 495             twooe = 2/exp(1);
 496             i     = -1;
 497             do
 498             {
 499                 i++;
 500                 tau1_est = tbs[i];
 501             }
 502             while (i < nbs - 1 &&
 503                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 504
 505             if (ybs[0] > ybs[1])
 506             {
 507                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 508                         "the std. error using single points is larger than that of blocks of 2 points\n"
 509                         "The error estimate might be inaccurate, check the fit\n",
 510                         s+1);
 511                 /* Use the total time as tau for the fitting weights */
 512                 tau_sig = (n - 1)*dt;
 513             }
 514             else
 515             {
 516                 tau_sig = tau1_est;
 517             }
 518
 519             if (debug)
 520             {
 521                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 522             }
 523
 524             /* Generate more or less appropriate sigma's,
 525              * also taking the density of points into account.
 526              */
 527             for (i = 0; i < nbs; i++)
 528             {
 529                 if (i == 0)
 530                 {
 531                     dens = tbs[1]/tbs[0] - 1;
 532                 }
 533                 else if (i == nbs-1)
 534                 {
 535                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 536                 }
 537                 else
 538                 {
 539                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 540                 }
 541                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 542             }
 543
 544             if (!bSingleExpFit)
 545             {
 546                 fitparm[0] = tau1_est;
 547                 fitparm[1] = 0.95;
 548                 /* We set the initial guess for tau2
 549                  * to halfway between tau1_est and the total time (on log scale).
 550                  */
 551                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 552                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 553                          bDebugMode(), effnERREST, fitparm, 0);
 554                 fitparm[3] = 1-fitparm[1];
 555             }
 556             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 557                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 558             {
 559                 if (!bSingleExpFit)
 560                 {
 561                     if (fitparm[2] > (n-1)*dt)
 562                     {
 563                         fprintf(stdout,
 564                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 565                                 "         the statistics might be bad\n",
 566                                 (n-1)*dt);
 567                     }
 568                     else
 569                     {
 570                         fprintf(stdout, "a fitted parameter is negative\n");
 571                     }
 572                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 573                             sig[s]*anal_ee_inf(fitparm, n*dt),
 574                             fitparm[1], fitparm[0], fitparm[2]);
 575                     /* Do a fit with tau2 fixed at the total time.
 576                      * One could also choose any other large value for tau2.
 577                      */
 578                     fitparm[0] = tau1_est;
 579                     fitparm[1] = 0.95;
 580                     fitparm[2] = (n-1)*dt;
 581                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 582                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 583                              effnERREST, fitparm, 4);
 584                     fitparm[3] = 1-fitparm[1];
 585                 }
 586                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 587                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 588                 {
 589                     if (!bSingleExpFit)
 590                     {
 591                         fprintf(stdout, "a fitted parameter is negative\n");
 592                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 593                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 594                                 fitparm[1], fitparm[0], fitparm[2]);
 595                     }
 596                     /* Do a single exponential fit */
 597                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 598                     fitparm[0] = tau1_est;
 599                     fitparm[1] = 1.0;
 600                     fitparm[2] = 0.0;
 601                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 602                              effnERREST, fitparm, 6);
 603                     fitparm[3] = 1-fitparm[1];
 604                 }
 605             }
 606             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 607             a    = fitparm[1];
 608             tau1 = fitparm[0];
 609             tau2 = fitparm[2];
 610         }
 611         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 612                 s+1, ee, a, tau1, tau2);
 613         fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 614         fprintf(fp, "@ legend string %d \"ee %6g\"\n",
 615                 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 616         for (i = 0; i < nbs; i++)
 617         {
 618             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 619                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 620         }
 621
 622         if (bFitAc)
 623         {
 624             int   fitlen;
 625             real *ac, acint, ac_fit[4];
 626
 627             snew(ac, n);
 628             for (i = 0; i < n; i++)
 629             {
 630                 ac[i] = val[s][i] - av[s];
 631                 if (i > 0)
 632                 {
 633                     fitsig[i] = sqrt(i);
 634                 }
 635                 else
 636                 {
 637                     fitsig[i] = 1;
 638                 }
 639             }
 640             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 641                             dt, eacNormal, 1, FALSE, TRUE,
 642                             FALSE, 0, 0, effnNONE);
 643
 644             fitlen = n/nb_min;
 645
 646             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 647             acint = 0.5*ac[0];
 648             for (i = 1; i <= fitlen/2; i++)
 649             {
 650                 acint += ac[i];
 651             }
 652             acint *= dt;
 653
 654             /* Generate more or less appropriate sigma's */
 655             for (i = 0; i <= fitlen; i++)
 656             {
 657                 fitsig[i] = sqrt(acint + dt*i);
 658             }
 659
 660             ac_fit[0] = 0.5*acint;
 661             ac_fit[1] = 0.95;
 662             ac_fit[2] = 10*acint;
 663             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 664                      bDebugMode(), effnEXP3, ac_fit, 0);
 665             ac_fit[3] = 1 - ac_fit[1];
 666
 667             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 668                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 669                     ac_fit[1], ac_fit[0], ac_fit[2]);
 670
 671             fprintf(fp, "&\n");
 672             for (i = 0; i < nbs; i++)
 673             {
 674                 fprintf(fp, "%g %g\n", tbs[i],
 675                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 676             }
 677
 678             sfree(ac);
 679         }
 680         if (s < nset-1)
 681         {
 682             fprintf(fp, "&\n");
 683         }
 684     }
 685     sfree(fitsig);
 686     sfree(ybs);
 687     sfree(tbs);
 688     ffclose(fp);
 689 }
 690
 691 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 692                          gmx_bool bError, real fit_start, real smooth_tail_start,
 693                          const output_env_t oenv)
 694 {
 695     const real tol = 1e-8;
 696     real      *kt;
 697     real       weight, d2;
 698     int        j;
 699
 700     please_cite(stdout, "Spoel2006b");
 701
 702     /* Compute negative derivative k(t) = -dc(t)/dt */
 703     if (!bError)
 704     {
 705         snew(kt, nn);
 706         compute_derivative(nn, time, val[0], kt);
 707         for (j = 0; (j < nn); j++)
 708         {
 709             kt[j] = -kt[j];
 710         }
 711         if (debug)
 712         {
 713             d2 = 0;
 714             for (j = 0; (j < nn); j++)
 715             {
 716                 d2 += sqr(kt[j] - val[3][j]);
 717             }
 718             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 719         }
 720         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 721                      temp, smooth_tail_start, oenv);
 722         sfree(kt);
 723     }
 724     else if (nset == 6)
 725     {
 726         analyse_corr(nn, time, val[0], val[2], val[4],
 727                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 728     }
 729     else
 730     {
 731         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 732         printf("Not doing anything. Sorry.\n");
 733     }
 734 }
 735
 736 static void filter(real flen, int n, int nset, real **val, real dt)
 737 {
 738     int     f, s, i, j;
 739     double *filt, sum, vf, fluc, fluctot;
 740
 741     f = (int)(flen/(2*dt));
 742     snew(filt, f+1);
 743     filt[0] = 1;
 744     sum     = 1;
 745     for (i = 1; i <= f; i++)
 746     {
 747         filt[i] = cos(M_PI*dt*i/flen);
 748         sum    += 2*filt[i];
 749     }
 750     for (i = 0; i <= f; i++)
 751     {
 752         filt[i] /= sum;
 753     }
 754     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 755     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 756     fluctot = 0;
 757     for (s = 0; s < nset; s++)
 758     {
 759         fluc = 0;
 760         for (i = f; i < n-f; i++)
 761         {
 762             vf = filt[0]*val[s][i];
 763             for (j = 1; j <= f; j++)
 764             {
 765                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 766             }
 767             fluc += sqr(val[s][i] - vf);
 768         }
 769         fluc    /= n - 2*f;
 770         fluctot += fluc;
 771         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 772     }
 773     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 774     fprintf(stdout, "\n");
 775
 776     sfree(filt);
 777 }
 778
 779 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 780                    int ny, real *x0, real **val,
 781                    int npargs, t_pargs *ppa, const output_env_t oenv)
 782 {
 783     real *c1 = NULL, *sig = NULL, *fitparm;
 784     real  tendfit, tbeginfit;
 785     int   i, efitfn, nparm;
 786
 787     efitfn = get_acffitfn();
 788     nparm  = nfp_ffn[efitfn];
 789     fprintf(out, "Will fit to the following function:\n");
 790     fprintf(out, "%s\n", longs_ffn[efitfn]);
 791     c1 = val[n];
 792     if (bYdy)
 793     {
 794         c1  = val[n];
 795         sig = val[n+1];
 796         fprintf(out, "Using two columns as y and sigma values\n");
 797     }
 798     else
 799     {
 800         snew(sig, ny);
 801     }
 802     if (opt2parg_bSet("-beginfit", npargs, ppa))
 803     {
 804         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 805     }
 806     else
 807     {
 808         tbeginfit = x0[0];
 809     }
 810     if (opt2parg_bSet("-endfit", npargs, ppa))
 811     {
 812         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 813     }
 814     else
 815     {
 816         tendfit   = x0[ny-1];
 817     }
 818
 819     snew(fitparm, nparm);
 820     switch (efitfn)
 821     {
 822         case effnEXP1:
 823             fitparm[0] = 0.5;
 824             break;
 825         case effnEXP2:
 826             fitparm[0] = 0.5;
 827             fitparm[1] = c1[0];
 828             break;
 829         case effnEXP3:
 830             fitparm[0] = 1.0;
 831             fitparm[1] = 0.5*c1[0];
 832             fitparm[2] = 10.0;
 833             break;
 834         case effnEXP5:
 835             fitparm[0] = fitparm[2] = 0.5*c1[0];
 836             fitparm[1] = 10;
 837             fitparm[3] = 40;
 838             fitparm[4] = 0;
 839             break;
 840         case effnEXP7:
 841             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 842             fitparm[1] = 1;
 843             fitparm[3] = 10;
 844             fitparm[5] = 100;
 845             fitparm[6] = 0;
 846             break;
 847         case effnEXP9:
 848             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 849             fitparm[1] = 0.1;
 850             fitparm[3] = 1;
 851             fitparm[5] = 10;
 852             fitparm[7] = 100;
 853             fitparm[8] = 0;
 854             break;
 855         default:
 856             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 857             for (i = 0; (i < nparm); i++)
 858             {
 859                 fitparm[i] = 1;
 860             }
 861     }
 862     fprintf(out, "Starting parameters:\n");
 863     for (i = 0; (i < nparm); i++)
 864     {
 865         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 866     }
 867     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 868                  oenv, bDebugMode(), efitfn, fitparm, 0))
 869     {
 870         for (i = 0; (i < nparm); i++)
 871         {
 872             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 873         }
 874     }
 875     else
 876     {
 877         fprintf(out, "No solution was found\n");
 878     }
 879 }
 880
 881 static void do_ballistic(const char *balFile, int nData,
 882                          real *t, real **val, int nSet,
 883                          real balTime, int nBalExp,
 884                          const output_env_t oenv)
 885 {
 886     double     **ctd   = NULL, *td = NULL;
 887     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 888     static char *leg[] = {"Ac'(t)"};
 889     FILE        *fp;
 890     int          i, set;
 891
 892     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 893     {
 894         snew(ctd, nSet);
 895         snew(td,  nData);
 896
 897         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 898         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 899
 900         for (set = 0; set < nSet; set++)
 901         {
 902             snew(ctd[set], nData);
 903             for (i = 0; i < nData; i++)
 904             {
 905                 ctd[set][i] = (double)val[set][i];
 906                 if (set == 0)
 907                 {
 908                     td[i] = (double)t[i];
 909                 }
 910             }
 911
 912             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 913         }
 914
 915         for (i = 0; i < nData; i++)
 916         {
 917             fprintf(fp, "  %g", t[i]);
 918             for (set = 0; set < nSet; set++)
 919             {
 920                 fprintf(fp, "  %g", ctd[set][i]);
 921             }
 922             fprintf(fp, "\n");
 923         }
 924
 925
 926         for (set = 0; set < nSet; set++)
 927         {
 928             sfree(ctd[set]);
 929         }
 930         sfree(ctd);
 931         sfree(td);
 932     }
 933     else
 934     {
 935         printf("Number of data points is less than the number of parameters to fit\n."
 936                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 937     }
 938 }
 939
 940 static void do_geminate(const char *gemFile, int nData,
 941                         real *t, real **val, int nSet,
 942                         const real D, const real rcut, const real balTime,
 943                         const int nFitPoints, const real begFit, const real endFit,
 944                         const output_env_t oenv)
 945 {
 946     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 947     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 948                                       begFit, endFit, balTime, 1);
 949     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 950     FILE        *fp;
 951     int          i, set;
 952
 953     snew(ctd,    nSet);
 954     snew(ctdGem, nSet);
 955     snew(td,  nData);
 956
 957     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 958     xvgr_legend(fp, asize(leg), leg, oenv);
 959
 960     for (set = 0; set < nSet; set++)
 961     {
 962         snew(ctd[set],    nData);
 963         snew(ctdGem[set], nData);
 964         for (i = 0; i < nData; i++)
 965         {
 966             ctd[set][i] = (double)val[set][i];
 967             if (set == 0)
 968             {
 969                 td[i] = (double)t[i];
 970             }
 971         }
 972         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 973     }
 974
 975     for (i = 0; i < nData; i++)
 976     {
 977         fprintf(fp, "  %g", t[i]);
 978         for (set = 0; set < nSet; set++)
 979         {
 980             fprintf(fp, "  %g", ctdGem[set][i]);
 981         }
 982         fprintf(fp, "\n");
 983     }
 984
 985     for (set = 0; set < nSet; set++)
 986     {
 987         sfree(ctd[set]);
 988         sfree(ctdGem[set]);
 989     }
 990     sfree(ctd);
 991     sfree(ctdGem);
 992     sfree(td);
 993 }
 994
 995 int gmx_analyze(int argc, char *argv[])
 996 {
 997     static const char *desc[] = {
 998         "[THISMODULE] reads an ASCII file and analyzes data sets.",
 999         "A line in the input file may start with a time",
1000         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1001         "Multiple sets can also be",
1002         "read when they are separated by & (option [TT]-n[tt]);",
1003         "in this case only one [IT]y[it]-value is read from each line.",
1004         "All lines starting with # and @ are skipped.",
1005         "All analyses can also be done for the derivative of a set",
1006         "(option [TT]-d[tt]).[PAR]",
1007
1008         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1009         "points are equidistant in time.[PAR]",
1010
1011         "[THISMODULE] always shows the average and standard deviation of each",
1012         "set, as well as the relative deviation of the third",
1013         "and fourth cumulant from those of a Gaussian distribution with the same",
1014         "standard deviation.[PAR]",
1015
1016         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1017         "Be sure that the time interval between data points is",
1018         "much shorter than the time scale of the autocorrelation.[PAR]",
1019
1020         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1021         "i/2 periods. The formula is:[BR]"
1022         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1023         "This is useful for principal components obtained from covariance",
1024         "analysis, since the principal components of random diffusion are",
1025         "pure cosines.[PAR]",
1026
1027         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1028
1029         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1030
1031         "Option [TT]-av[tt] produces the average over the sets.",
1032         "Error bars can be added with the option [TT]-errbar[tt].",
1033         "The errorbars can represent the standard deviation, the error",
1034         "(assuming the points are independent) or the interval containing",
1035         "90% of the points, by discarding 5% of the points at the top and",
1036         "the bottom.[PAR]",
1037
1038         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1039         "A set is divided in a number of blocks and averages are calculated for",
1040         "each block. The error for the total average is calculated from",
1041         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1042         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1043         "These errors are plotted as a function of the block size.",
1044         "Also an analytical block average curve is plotted, assuming",
1045         "that the autocorrelation is a sum of two exponentials.",
1046         "The analytical curve for the block average is:[BR]",
1047         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1048         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1049         "where T is the total time.",
1050         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1051         "When the actual block average is very close to the analytical curve,",
1052         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1053         "The complete derivation is given in",
1054         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1055
1056         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1057         "component from a hydrogen bond autocorrelation function by the fitting",
1058         "of a sum of exponentials, as described in e.g.",
1059         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1060         "is the one with the most negative coefficient in the exponential,",
1061         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1062         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1063
1064         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1065         "(and optionally kD) to the hydrogen bond autocorrelation function",
1066         "according to the reversible geminate recombination model. Removal of",
1067         "the ballistic component first is strongly advised. The model is presented in",
1068         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1069
1070         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1071         "of each set and over all sets with respect to a filtered average.",
1072         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1073         "to len/2. len is supplied with the option [TT]-filter[tt].",
1074         "This filter reduces oscillations with period len/2 and len by a factor",
1075         "of 0.79 and 0.33 respectively.[PAR]",
1076
1077         "Option [TT]-g[tt] fits the data to the function given with option",
1078         "[TT]-fitfn[tt].[PAR]",
1079
1080         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1081         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1082         "zero or with a negative value are ignored.[PAR]"
1083
1084         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1085         "on output from [gmx-hbond]. The input file can be taken directly",
1086         "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1087     };
1088     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1089     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1090     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1091     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1092     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1093     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1094
1095     /* must correspond to enum avbar* declared at beginning of file */
1096     static const char *avbar_opt[avbarNR+1] = {
1097         NULL, "none", "stddev", "error", "90", NULL
1098     };
1099
1100     t_pargs            pa[] = {
1101         { "-time",    FALSE, etBOOL, {&bHaveT},
1102           "Expect a time in the input" },
1103         { "-b",       FALSE, etREAL, {&tb},
1104           "First time to read from set" },
1105         { "-e",       FALSE, etREAL, {&te},
1106           "Last time to read from set" },
1107         { "-n",       FALSE, etINT, {&nsets_in},
1108           "Read this number of sets separated by &" },
1109         { "-d",       FALSE, etBOOL, {&bDer},
1110           "Use the derivative" },
1111         { "-dp",      FALSE, etINT, {&d},
1112           "HIDDENThe derivative is the difference over this number of points" },
1113         { "-bw",      FALSE, etREAL, {&binwidth},
1114           "Binwidth for the distribution" },
1115         { "-errbar",  FALSE, etENUM, {avbar_opt},
1116           "Error bars for [TT]-av[tt]" },
1117         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1118           "Integrate data function(s) numerically using trapezium rule" },
1119         { "-aver_start", FALSE, etREAL, {&aver_start},
1120           "Start averaging the integral from here" },
1121         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1122           "Interpret second data set as error in the y values for integrating" },
1123         { "-regression", FALSE, etBOOL, {&bRegression},
1124           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1125         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1126           "Do a Luzar and Chandler analysis on a correlation function and "
1127           "related as produced by [gmx-hbond]. When in addition the "
1128           "[TT]-xydy[tt] flag is given the second and fourth column will be "
1129           "interpreted as errors in c(t) and n(t)." },
1130         { "-temp",    FALSE, etREAL, {&temp},
1131           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1132         { "-fitstart", FALSE, etREAL, {&fit_start},
1133           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1134         { "-fitend", FALSE, etREAL, {&fit_end},
1135           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1136         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1137           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1138         { "-nbmin",   FALSE, etINT, {&nb_min},
1139           "HIDDENMinimum number of blocks for block averaging" },
1140         { "-resol", FALSE, etINT, {&resol},
1141           "HIDDENResolution for the block averaging, block size increases with"
1142           " a factor 2^(1/resol)" },
1143         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1144           "HIDDENAlways use a single exponential fit for the error estimate" },
1145         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1146           "HIDDENAllow a negative long-time correlation" },
1147         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1148           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1149         { "-filter",  FALSE, etREAL, {&filtlen},
1150           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1151         { "-power", FALSE, etBOOL, {&bPower},
1152           "Fit data to: b t^a" },
1153         { "-subav", FALSE, etBOOL, {&bSubAv},
1154           "Subtract the average before autocorrelating" },
1155         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1156           "Calculate one ACF over all sets" },
1157         { "-nbalexp", FALSE, etINT, {&nBalExp},
1158           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1159         { "-baltime", FALSE, etREAL, {&balTime},
1160           "HIDDENTime up to which the ballistic component will be fitted" },
1161 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1162 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1163 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1164 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1165 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1166 /*       "What type of gminate recombination to use"}, */
1167 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1168 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1169     };
1170 #define NPA asize(pa)
1171
1172     FILE           *out, *out_fit;
1173     int             n, nlast, s, nset, i, j = 0;
1174     real          **val, *t, dt, tot, error;
1175     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1176     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1177     output_env_t    oenv;
1178
1179     t_filenm        fnm[] = {
1180         { efXVG, "-f",    "graph",    ffREAD   },
1181         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1182         { efXVG, "-msd",  "msd",      ffOPTWR  },
1183         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1184         { efXVG, "-dist", "distr",    ffOPTWR  },
1185         { efXVG, "-av",   "average",  ffOPTWR  },
1186         { efXVG, "-ee",   "errest",   ffOPTWR  },
1187         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1188 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1189         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1190     };
1191 #define NFILE asize(fnm)
1192
1193     int      npargs;
1194     t_pargs *ppa;
1195
1196     npargs = asize(pa);
1197     ppa    = add_acf_pargs(&npargs, pa);
1198
1199     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1200                            NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1201     {
1202         return 0;
1203     }
1204
1205     acfile   = opt2fn_null("-ac", NFILE, fnm);
1206     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1207     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1208     distfile = opt2fn_null("-dist", NFILE, fnm);
1209     avfile   = opt2fn_null("-av", NFILE, fnm);
1210     eefile   = opt2fn_null("-ee", NFILE, fnm);
1211     balfile  = opt2fn_null("-bal", NFILE, fnm);
1212 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1213     /* When doing autocorrelation we don't want a fitlog for fitting
1214      * the function itself (not the acf) when the user did not ask for it.
1215      */
1216     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1217     {
1218         fitfile  = opt2fn("-g", NFILE, fnm);
1219     }
1220     else
1221     {
1222         fitfile  = opt2fn_null("-g", NFILE, fnm);
1223     }
1224
1225     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1226                         opt2parg_bSet("-b", npargs, ppa), tb,
1227                         opt2parg_bSet("-e", npargs, ppa), te,
1228                         nsets_in, &nset, &n, &dt, &t);
1229     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1230
1231     if (bDer)
1232     {
1233         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1234                d, d);
1235         n -= d;
1236         for (s = 0; s < nset; s++)
1237         {
1238             for (i = 0; (i < n); i++)
1239             {
1240                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1241             }
1242         }
1243     }
1244
1245     if (bIntegrate)
1246     {
1247         real sum, stddev;
1248
1249         printf("Calculating the integral using the trapezium rule\n");
1250
1251         if (bXYdy)
1252         {
1253             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1254             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1255         }
1256         else
1257         {
1258             for (s = 0; s < nset; s++)
1259             {
1260                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1261                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1262             }
1263         }
1264     }
1265
1266     if (fitfile != NULL)
1267     {
1268         out_fit = ffopen(fitfile, "w");
1269         if (bXYdy && nset >= 2)
1270         {
1271             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1272         }
1273         else
1274         {
1275             for (s = 0; s < nset; s++)
1276             {
1277                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1278             }
1279         }
1280         ffclose(out_fit);
1281     }
1282
1283     printf("                                      std. dev.    relative deviation of\n");
1284     printf("                       standard       ---------   cumulants from those of\n");
1285     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1286     printf("                                                      cum. 3   cum. 4\n");
1287     snew(av, nset);
1288     snew(sig, nset);
1289     for (s = 0; (s < nset); s++)
1290     {
1291         cum1 = 0;
1292         cum2 = 0;
1293         cum3 = 0;
1294         cum4 = 0;
1295         for (i = 0; (i < n); i++)
1296         {
1297             cum1 += val[s][i];
1298         }
1299         cum1 /= n;
1300         for (i = 0; (i < n); i++)
1301         {
1302             db    = val[s][i]-cum1;
1303             cum2 += db*db;
1304             cum3 += db*db*db;
1305             cum4 += db*db*db*db;
1306         }
1307         cum2  /= n;
1308         cum3  /= n;
1309         cum4  /= n;
1310         av[s]  = cum1;
1311         sig[s] = sqrt(cum2);
1312         if (n > 1)
1313         {
1314             error = sqrt(cum2/(n-1));
1315         }
1316         else
1317         {
1318             error = 0;
1319         }
1320         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1321                s+1, av[s], sig[s], error,
1322                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1323                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1324     }
1325     printf("\n");
1326
1327     if (filtlen)
1328     {
1329         filter(filtlen, n, nset, val, dt);
1330     }
1331
1332     if (msdfile)
1333     {
1334         out = xvgropen(msdfile, "Mean square displacement",
1335                        "time", "MSD (nm\\S2\\N)", oenv);
1336         nlast = (int)(n*frac);
1337         for (s = 0; s < nset; s++)
1338         {
1339             for (j = 0; j <= nlast; j++)
1340             {
1341                 if (j % 100 == 0)
1342                 {
1343                     fprintf(stderr, "\r%d", j);
1344                 }
1345                 tot = 0;
1346                 for (i = 0; i < n-j; i++)
1347                 {
1348                     tot += sqr(val[s][i]-val[s][i+j]);
1349                 }
1350                 tot /= (real)(n-j);
1351                 fprintf(out, " %g %8g\n", dt*j, tot);
1352             }
1353             if (s < nset-1)
1354             {
1355                 fprintf(out, "&\n");
1356             }
1357         }
1358         ffclose(out);
1359         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1360     }
1361     if (ccfile)
1362     {
1363         plot_coscont(ccfile, n, nset, val, oenv);
1364     }
1365
1366     if (distfile)
1367     {
1368         histogram(distfile, binwidth, n, nset, val, oenv);
1369     }
1370     if (avfile)
1371     {
1372         average(avfile, nenum(avbar_opt), n, nset, val, t);
1373     }
1374     if (eefile)
1375     {
1376         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1377                        bEeFitAc, bEESEF, bEENLC, oenv);
1378     }
1379     if (balfile)
1380     {
1381         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1382     }
1383 /*   if (gemfile) */
1384 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1385 /*                   nFitPoints, fit_start, fit_end, oenv); */
1386     if (bPower)
1387     {
1388         power_fit(n, nset, val, t);
1389     }
1390
1391     if (acfile != NULL)
1392     {
1393         if (bSubAv)
1394         {
1395             for (s = 0; s < nset; s++)
1396             {
1397                 for (i = 0; i < n; i++)
1398                 {
1399                     val[s][i] -= av[s];
1400                 }
1401             }
1402         }
1403         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1404                     eacNormal, bAverCorr);
1405     }
1406
1407     if (bRegression)
1408     {
1409         regression_analysis(n, bXYdy, t, nset, val);
1410     }
1411
1412     if (bLuzar)
1413     {
1414         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1415     }
1416
1417     view_all(oenv, NFILE, fnm);
1418
1419     return 0;
1420 }