src/gromacs/gmxana/gmx_analyze.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "typedefs.h"
  47 #include "gromacs/utility/smalloc.h"
  48 #include "macros.h"
  49 #include "gromacs/utility/fatalerror.h"
  50 #include "gromacs/math/vec.h"
  51 #include "copyrite.h"
  52 #include "gromacs/utility/futil.h"
  53 #include "readinp.h"
  54 #include "txtdump.h"
  55 #include "gstat.h"
  56 #include "gromacs/statistics/statistics.h"
  57 #include "gromacs/fileio/xvgr.h"
  58 #include "viewit.h"
  59 #include "gmx_ana.h"
  60 #include "geminate.h"
  61
  62 #include "gromacs/linearalgebra/matrix.h"
  63
  64 /* must correspond to char *avbar_opt[] declared in main() */
  65 enum {
  66     avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
  67 };
  68
  69 static void power_fit(int n, int nset, real **val, real *t)
  70 {
  71     real *x, *y, quality, a, b, r;
  72     int   s, i;
  73
  74     snew(x, n);
  75     snew(y, n);
  76
  77     if (t[0] > 0)
  78     {
  79         for (i = 0; i < n; i++)
  80         {
  81             if (t[0] > 0)
  82             {
  83                 x[i] = log(t[i]);
  84             }
  85         }
  86     }
  87     else
  88     {
  89         fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
  90         for (i = 0; i < n; i++)
  91         {
  92             x[i] = log(i+1);
  93         }
  94     }
  95
  96     for (s = 0; s < nset; s++)
  97     {
  98         i = 0;
  99         for (i = 0; i < n && val[s][i] >= 0; i++)
 100         {
 101             y[i] = log(val[s][i]);
 102         }
 103         if (i < n)
 104         {
 105             fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
 106         }
 107         lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
 108         fprintf(stdout, "Power fit set %3d:  error %.3f  a %g  b %g\n",
 109                 s+1, quality, a, exp(b));
 110     }
 111
 112     sfree(y);
 113     sfree(x);
 114 }
 115
 116 static real cosine_content(int nhp, int n, real *y)
 117 /* Assumes n equidistant points */
 118 {
 119     double fac, cosyint, yyint;
 120     int    i;
 121
 122     if (n < 2)
 123     {
 124         return 0;
 125     }
 126
 127     fac = M_PI*nhp/(n-1);
 128
 129     cosyint = 0;
 130     yyint   = 0;
 131     for (i = 0; i < n; i++)
 132     {
 133         cosyint += cos(fac*i)*y[i];
 134         yyint   += y[i]*y[i];
 135     }
 136
 137     return 2*cosyint*cosyint/(n*yyint);
 138 }
 139
 140 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
 141                          const output_env_t oenv)
 142 {
 143     FILE *fp;
 144     int   s;
 145     real  cc;
 146
 147     fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
 148                   oenv);
 149
 150     for (s = 0; s < nset; s++)
 151     {
 152         cc = cosine_content(s+1, n, val[s]);
 153         fprintf(fp, " %d %g\n", s+1, cc);
 154         fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
 155                 s+1, 0.5*(s+1), cc);
 156     }
 157     fprintf(stdout, "\n");
 158
 159     gmx_ffclose(fp);
 160 }
 161
 162 static void regression_analysis(int n, gmx_bool bXYdy,
 163                                 real *x, int nset, real **val)
 164 {
 165     real S, chi2, a, b, da, db, r = 0;
 166     int  ok;
 167
 168     if (bXYdy || (nset == 1))
 169     {
 170         printf("Fitting data to a function f(x) = ax + b\n");
 171         printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 172         printf("Error estimates will be given if w_i (sigma) values are given\n");
 173         printf("(use option -xydy).\n\n");
 174         if (bXYdy)
 175         {
 176             if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
 177             {
 178                 gmx_fatal(FARGS, "Error fitting the data: %s",
 179                           gmx_stats_message(ok));
 180             }
 181         }
 182         else
 183         {
 184             if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
 185             {
 186                 gmx_fatal(FARGS, "Error fitting the data: %s",
 187                           gmx_stats_message(ok));
 188             }
 189         }
 190         chi2 = sqr((n-2)*S);
 191         printf("Chi2                    = %g\n", chi2);
 192         printf("S (Sqrt(Chi2/(n-2))     = %g\n", S);
 193         printf("Correlation coefficient = %.1f%%\n", 100*r);
 194         printf("\n");
 195         if (bXYdy)
 196         {
 197             printf("a    = %g +/- %g\n", a, da);
 198             printf("b    = %g +/- %g\n", b, db);
 199         }
 200         else
 201         {
 202             printf("a    = %g\n", a);
 203             printf("b    = %g\n", b);
 204         }
 205     }
 206     else
 207     {
 208         double chi2, *a, **xx, *y;
 209         int    i, j;
 210
 211         snew(y, n);
 212         snew(xx, nset-1);
 213         for (j = 0; (j < nset-1); j++)
 214         {
 215             snew(xx[j], n);
 216         }
 217         for (i = 0; (i < n); i++)
 218         {
 219             y[i] = val[0][i];
 220             for (j = 1; (j < nset); j++)
 221             {
 222                 xx[j-1][i] = val[j][i];
 223             }
 224         }
 225         snew(a, nset-1);
 226         chi2 = multi_regression(NULL, n, y, nset-1, xx, a);
 227         printf("Fitting %d data points in %d sets\n", n, nset-1);
 228         printf("chi2 = %g\n", chi2);
 229         printf("A =");
 230         for (i = 0; (i < nset-1); i++)
 231         {
 232             printf("  %g", a[i]);
 233             sfree(xx[i]);
 234         }
 235         printf("\n");
 236         sfree(xx);
 237         sfree(y);
 238         sfree(a);
 239     }
 240 }
 241
 242 void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
 243                const output_env_t oenv)
 244 {
 245     FILE          *fp;
 246     int            i, s;
 247     double         min, max;
 248     int            nbin;
 249     gmx_int64_t   *histo;
 250
 251     min = val[0][0];
 252     max = val[0][0];
 253     for (s = 0; s < nset; s++)
 254     {
 255         for (i = 0; i < n; i++)
 256         {
 257             if (val[s][i] < min)
 258             {
 259                 min = val[s][i];
 260             }
 261             else if (val[s][i] > max)
 262             {
 263                 max = val[s][i];
 264             }
 265         }
 266     }
 267
 268     min = binwidth*floor(min/binwidth);
 269     max = binwidth*ceil(max/binwidth);
 270     if (min != 0)
 271     {
 272         min -= binwidth;
 273     }
 274     max += binwidth;
 275
 276     nbin = (int)((max - min)/binwidth + 0.5) + 1;
 277     fprintf(stderr, "Making distributions with %d bins\n", nbin);
 278     snew(histo, nbin);
 279     fp = xvgropen(distfile, "Distribution", "", "", oenv);
 280     for (s = 0; s < nset; s++)
 281     {
 282         for (i = 0; i < nbin; i++)
 283         {
 284             histo[i] = 0;
 285         }
 286         for (i = 0; i < n; i++)
 287         {
 288             histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 289         }
 290         for (i = 0; i < nbin; i++)
 291         {
 292             fprintf(fp, " %g  %g\n", min+i*binwidth, (double)histo[i]/(n*binwidth));
 293         }
 294         if (s < nset-1)
 295         {
 296             fprintf(fp, "&\n");
 297         }
 298     }
 299     gmx_ffclose(fp);
 300 }
 301
 302 static int real_comp(const void *a, const void *b)
 303 {
 304     real dif = *(real *)a - *(real *)b;
 305
 306     if (dif < 0)
 307     {
 308         return -1;
 309     }
 310     else if (dif > 0)
 311     {
 312         return 1;
 313     }
 314     else
 315     {
 316         return 0;
 317     }
 318 }
 319
 320 static void average(const char *avfile, int avbar_opt,
 321                     int n, int nset, real **val, real *t)
 322 {
 323     FILE   *fp;
 324     int     i, s, edge = 0;
 325     double  av, var, err;
 326     real   *tmp = NULL;
 327
 328     fp = gmx_ffopen(avfile, "w");
 329     if ((avbar_opt == avbarERROR) && (nset == 1))
 330     {
 331         avbar_opt = avbarNONE;
 332     }
 333     if (avbar_opt != avbarNONE)
 334     {
 335         if (avbar_opt == avbar90)
 336         {
 337             snew(tmp, nset);
 338             fprintf(fp, "@TYPE xydydy\n");
 339             edge = (int)(nset*0.05+0.5);
 340             fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
 341                     " interval\n", edge, (int)(100*(nset-2*edge)/nset+0.5));
 342         }
 343         else
 344         {
 345             fprintf(fp, "@TYPE xydy\n");
 346         }
 347     }
 348
 349     for (i = 0; i < n; i++)
 350     {
 351         av = 0;
 352         for (s = 0; s < nset; s++)
 353         {
 354             av += val[s][i];
 355         }
 356         av /= nset;
 357         fprintf(fp, " %g %g", t[i], av);
 358         var = 0;
 359         if (avbar_opt != avbarNONE)
 360         {
 361             if (avbar_opt == avbar90)
 362             {
 363                 for (s = 0; s < nset; s++)
 364                 {
 365                     tmp[s] = val[s][i];
 366                 }
 367                 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
 368                 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
 369             }
 370             else
 371             {
 372                 for (s = 0; s < nset; s++)
 373                 {
 374                     var += sqr(val[s][i]-av);
 375                 }
 376                 if (avbar_opt == avbarSTDDEV)
 377                 {
 378                     err = sqrt(var/nset);
 379                 }
 380                 else
 381                 {
 382                     err = sqrt(var/(nset*(nset-1)));
 383                 }
 384                 fprintf(fp, " %g", err);
 385             }
 386         }
 387         fprintf(fp, "\n");
 388     }
 389     gmx_ffclose(fp);
 390
 391     if (avbar_opt == avbar90)
 392     {
 393         sfree(tmp);
 394     }
 395 }
 396
 397 static real anal_ee_inf(real *parm, real T)
 398 {
 399     return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 400 }
 401
 402 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
 403                            int nset, double *av, double *sig, real **val, real dt,
 404                            gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
 405                            const output_env_t oenv)
 406 {
 407     FILE    *fp;
 408     int      bs, prev_bs, nbs, nb;
 409     real     spacing, nbr;
 410     int      s, i, j;
 411     double   blav, var;
 412     char   **leg;
 413     real    *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
 414     real     fitparm[4];
 415     real     ee, a, tau1, tau2;
 416
 417     if (n < 4)
 418     {
 419         fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
 420
 421         return;
 422     }
 423
 424     fp = xvgropen(eefile, "Error estimates",
 425                   "Block size (time)", "Error estimate", oenv);
 426     if (output_env_get_print_xvgr_codes(oenv))
 427     {
 428         fprintf(fp,
 429                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 430                 (n-1)*dt, n);
 431     }
 432     snew(leg, 2*nset);
 433     xvgr_legend(fp, 2*nset, (const char**)leg, oenv);
 434     sfree(leg);
 435
 436     spacing = pow(2, 1.0/resol);
 437     snew(tbs, n);
 438     snew(ybs, n);
 439     snew(fitsig, n);
 440     for (s = 0; s < nset; s++)
 441     {
 442         nbs     = 0;
 443         prev_bs = 0;
 444         nbr     = nb_min;
 445         while (nbr <= n)
 446         {
 447             bs = n/(int)nbr;
 448             if (bs != prev_bs)
 449             {
 450                 nb  = n/bs;
 451                 var = 0;
 452                 for (i = 0; i < nb; i++)
 453                 {
 454                     blav = 0;
 455                     for (j = 0; j < bs; j++)
 456                     {
 457                         blav += val[s][bs*i+j];
 458                     }
 459                     var += sqr(av[s] - blav/bs);
 460                 }
 461                 tbs[nbs] = bs*dt;
 462                 if (sig[s] == 0)
 463                 {
 464                     ybs[nbs] = 0;
 465                 }
 466                 else
 467                 {
 468                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 469                 }
 470                 nbs++;
 471             }
 472             nbr    *= spacing;
 473             nb      = (int)(nbr+0.5);
 474             prev_bs = bs;
 475         }
 476         if (sig[s] == 0)
 477         {
 478             ee   = 0;
 479             a    = 1;
 480             tau1 = 0;
 481             tau2 = 0;
 482         }
 483         else
 484         {
 485             for (i = 0; i < nbs/2; i++)
 486             {
 487                 rtmp         = tbs[i];
 488                 tbs[i]       = tbs[nbs-1-i];
 489                 tbs[nbs-1-i] = rtmp;
 490                 rtmp         = ybs[i];
 491                 ybs[i]       = ybs[nbs-1-i];
 492                 ybs[nbs-1-i] = rtmp;
 493             }
 494             /* The initial slope of the normalized ybs^2 is 1.
 495              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 496              * From this we take our initial guess for tau1.
 497              */
 498             twooe = 2/exp(1);
 499             i     = -1;
 500             do
 501             {
 502                 i++;
 503                 tau1_est = tbs[i];
 504             }
 505             while (i < nbs - 1 &&
 506                    (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 507
 508             if (ybs[0] > ybs[1])
 509             {
 510                 fprintf(stdout, "Data set %d has strange time correlations:\n"
 511                         "the std. error using single points is larger than that of blocks of 2 points\n"
 512                         "The error estimate might be inaccurate, check the fit\n",
 513                         s+1);
 514                 /* Use the total time as tau for the fitting weights */
 515                 tau_sig = (n - 1)*dt;
 516             }
 517             else
 518             {
 519                 tau_sig = tau1_est;
 520             }
 521
 522             if (debug)
 523             {
 524                 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
 525             }
 526
 527             /* Generate more or less appropriate sigma's,
 528              * also taking the density of points into account.
 529              */
 530             for (i = 0; i < nbs; i++)
 531             {
 532                 if (i == 0)
 533                 {
 534                     dens = tbs[1]/tbs[0] - 1;
 535                 }
 536                 else if (i == nbs-1)
 537                 {
 538                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 539                 }
 540                 else
 541                 {
 542                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 543                 }
 544                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 545             }
 546
 547             if (!bSingleExpFit)
 548             {
 549                 fitparm[0] = tau1_est;
 550                 fitparm[1] = 0.95;
 551                 /* We set the initial guess for tau2
 552                  * to halfway between tau1_est and the total time (on log scale).
 553                  */
 554                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 555                 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
 556                          bDebugMode(), effnERREST, fitparm, 0);
 557                 fitparm[3] = 1-fitparm[1];
 558             }
 559             if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
 560                 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
 561             {
 562                 if (!bSingleExpFit)
 563                 {
 564                     if (fitparm[2] > (n-1)*dt)
 565                     {
 566                         fprintf(stdout,
 567                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 568                                 "         the statistics might be bad\n",
 569                                 (n-1)*dt);
 570                     }
 571                     else
 572                     {
 573                         fprintf(stdout, "a fitted parameter is negative\n");
 574                     }
 575                     fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 576                             sig[s]*anal_ee_inf(fitparm, n*dt),
 577                             fitparm[1], fitparm[0], fitparm[2]);
 578                     /* Do a fit with tau2 fixed at the total time.
 579                      * One could also choose any other large value for tau2.
 580                      */
 581                     fitparm[0] = tau1_est;
 582                     fitparm[1] = 0.95;
 583                     fitparm[2] = (n-1)*dt;
 584                     fprintf(stderr, "Will fix tau2 at the total time: %g\n", fitparm[2]);
 585                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 586                              effnERREST, fitparm, 4);
 587                     fitparm[3] = 1-fitparm[1];
 588                 }
 589                 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
 590                     || (fitparm[1] > 1 && !bAllowNegLTCorr))
 591                 {
 592                     if (!bSingleExpFit)
 593                     {
 594                         fprintf(stdout, "a fitted parameter is negative\n");
 595                         fprintf(stdout, "invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 596                                 sig[s]*anal_ee_inf(fitparm, n*dt),
 597                                 fitparm[1], fitparm[0], fitparm[2]);
 598                     }
 599                     /* Do a single exponential fit */
 600                     fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
 601                     fitparm[0] = tau1_est;
 602                     fitparm[1] = 1.0;
 603                     fitparm[2] = 0.0;
 604                     do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
 605                              effnERREST, fitparm, 6);
 606                     fitparm[3] = 1-fitparm[1];
 607                 }
 608             }
 609             ee   = sig[s]*anal_ee_inf(fitparm, n*dt);
 610             a    = fitparm[1];
 611             tau1 = fitparm[0];
 612             tau2 = fitparm[2];
 613         }
 614         fprintf(stdout, "Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 615                 s+1, ee, a, tau1, tau2);
 616         fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
 617         fprintf(fp, "@ legend string %d \"ee %6g\"\n",
 618                 2*s+1, sig[s]*anal_ee_inf(fitparm, n*dt));
 619         for (i = 0; i < nbs; i++)
 620         {
 621             fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*sqrt(ybs[i]/(n*dt)),
 622                     sig[s]*sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
 623         }
 624
 625         if (bFitAc)
 626         {
 627             int   fitlen;
 628             real *ac, acint, ac_fit[4];
 629
 630             snew(ac, n);
 631             for (i = 0; i < n; i++)
 632             {
 633                 ac[i] = val[s][i] - av[s];
 634                 if (i > 0)
 635                 {
 636                     fitsig[i] = sqrt(i);
 637                 }
 638                 else
 639                 {
 640                     fitsig[i] = 1;
 641                 }
 642             }
 643             low_do_autocorr(NULL, oenv, NULL, n, 1, -1, &ac,
 644                             dt, eacNormal, 1, FALSE, TRUE,
 645                             FALSE, 0, 0, effnNONE);
 646
 647             fitlen = n/nb_min;
 648
 649             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 650             acint = 0.5*ac[0];
 651             for (i = 1; i <= fitlen/2; i++)
 652             {
 653                 acint += ac[i];
 654             }
 655             acint *= dt;
 656
 657             /* Generate more or less appropriate sigma's */
 658             for (i = 0; i <= fitlen; i++)
 659             {
 660                 fitsig[i] = sqrt(acint + dt*i);
 661             }
 662
 663             ac_fit[0] = 0.5*acint;
 664             ac_fit[1] = 0.95;
 665             ac_fit[2] = 10*acint;
 666             do_lmfit(n/nb_min, ac, fitsig, dt, 0, 0, fitlen*dt, oenv,
 667                      bDebugMode(), effnEXP3, ac_fit, 0);
 668             ac_fit[3] = 1 - ac_fit[1];
 669
 670             fprintf(stdout, "Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 671                     s+1, sig[s]*anal_ee_inf(ac_fit, n*dt),
 672                     ac_fit[1], ac_fit[0], ac_fit[2]);
 673
 674             fprintf(fp, "&\n");
 675             for (i = 0; i < nbs; i++)
 676             {
 677                 fprintf(fp, "%g %g\n", tbs[i],
 678                         sig[s]*sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
 679             }
 680
 681             sfree(ac);
 682         }
 683         if (s < nset-1)
 684         {
 685             fprintf(fp, "&\n");
 686         }
 687     }
 688     sfree(fitsig);
 689     sfree(ybs);
 690     sfree(tbs);
 691     gmx_ffclose(fp);
 692 }
 693
 694 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
 695                          gmx_bool bError, real fit_start, real smooth_tail_start,
 696                          const output_env_t oenv)
 697 {
 698     const real tol = 1e-8;
 699     real      *kt;
 700     real       weight, d2;
 701     int        j;
 702
 703     please_cite(stdout, "Spoel2006b");
 704
 705     /* Compute negative derivative k(t) = -dc(t)/dt */
 706     if (!bError)
 707     {
 708         snew(kt, nn);
 709         compute_derivative(nn, time, val[0], kt);
 710         for (j = 0; (j < nn); j++)
 711         {
 712             kt[j] = -kt[j];
 713         }
 714         if (debug)
 715         {
 716             d2 = 0;
 717             for (j = 0; (j < nn); j++)
 718             {
 719                 d2 += sqr(kt[j] - val[3][j]);
 720             }
 721             fprintf(debug, "RMS difference in derivatives is %g\n", sqrt(d2/nn));
 722         }
 723         analyse_corr(nn, time, val[0], val[2], kt, NULL, NULL, NULL, fit_start,
 724                      temp, smooth_tail_start, oenv);
 725         sfree(kt);
 726     }
 727     else if (nset == 6)
 728     {
 729         analyse_corr(nn, time, val[0], val[2], val[4],
 730                      val[1], val[3], val[5], fit_start, temp, smooth_tail_start, oenv);
 731     }
 732     else
 733     {
 734         printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 735         printf("Not doing anything. Sorry.\n");
 736     }
 737 }
 738
 739 static void filter(real flen, int n, int nset, real **val, real dt)
 740 {
 741     int     f, s, i, j;
 742     double *filt, sum, vf, fluc, fluctot;
 743
 744     f = (int)(flen/(2*dt));
 745     snew(filt, f+1);
 746     filt[0] = 1;
 747     sum     = 1;
 748     for (i = 1; i <= f; i++)
 749     {
 750         filt[i] = cos(M_PI*dt*i/flen);
 751         sum    += 2*filt[i];
 752     }
 753     for (i = 0; i <= f; i++)
 754     {
 755         filt[i] /= sum;
 756     }
 757     fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
 758     fprintf(stdout, "  using a filter of length %g of %d points\n", flen, 2*f+1);
 759     fluctot = 0;
 760     for (s = 0; s < nset; s++)
 761     {
 762         fluc = 0;
 763         for (i = f; i < n-f; i++)
 764         {
 765             vf = filt[0]*val[s][i];
 766             for (j = 1; j <= f; j++)
 767             {
 768                 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 769             }
 770             fluc += sqr(val[s][i] - vf);
 771         }
 772         fluc    /= n - 2*f;
 773         fluctot += fluc;
 774         fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, sqrt(fluc));
 775     }
 776     fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", sqrt(fluctot/nset));
 777     fprintf(stdout, "\n");
 778
 779     sfree(filt);
 780 }
 781
 782 static void do_fit(FILE *out, int n, gmx_bool bYdy,
 783                    int ny, real *x0, real **val,
 784                    int npargs, t_pargs *ppa, const output_env_t oenv)
 785 {
 786     real *c1 = NULL, *sig = NULL, *fitparm;
 787     real  tendfit, tbeginfit;
 788     int   i, efitfn, nparm;
 789
 790     efitfn = get_acffitfn();
 791     nparm  = nfp_ffn[efitfn];
 792     fprintf(out, "Will fit to the following function:\n");
 793     fprintf(out, "%s\n", longs_ffn[efitfn]);
 794     c1 = val[n];
 795     if (bYdy)
 796     {
 797         c1  = val[n];
 798         sig = val[n+1];
 799         fprintf(out, "Using two columns as y and sigma values\n");
 800     }
 801     else
 802     {
 803         snew(sig, ny);
 804     }
 805     if (opt2parg_bSet("-beginfit", npargs, ppa))
 806     {
 807         tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
 808     }
 809     else
 810     {
 811         tbeginfit = x0[0];
 812     }
 813     if (opt2parg_bSet("-endfit", npargs, ppa))
 814     {
 815         tendfit   = opt2parg_real("-endfit", npargs, ppa);
 816     }
 817     else
 818     {
 819         tendfit   = x0[ny-1];
 820     }
 821
 822     snew(fitparm, nparm);
 823     switch (efitfn)
 824     {
 825         case effnEXP1:
 826             fitparm[0] = 0.5;
 827             break;
 828         case effnEXP2:
 829             fitparm[0] = 0.5;
 830             fitparm[1] = c1[0];
 831             break;
 832         case effnEXP3:
 833             fitparm[0] = 1.0;
 834             fitparm[1] = 0.5*c1[0];
 835             fitparm[2] = 10.0;
 836             break;
 837         case effnEXP5:
 838             fitparm[0] = fitparm[2] = 0.5*c1[0];
 839             fitparm[1] = 10;
 840             fitparm[3] = 40;
 841             fitparm[4] = 0;
 842             break;
 843         case effnEXP7:
 844             fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 845             fitparm[1] = 1;
 846             fitparm[3] = 10;
 847             fitparm[5] = 100;
 848             fitparm[6] = 0;
 849             break;
 850         case effnEXP9:
 851             fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 852             fitparm[1] = 0.1;
 853             fitparm[3] = 1;
 854             fitparm[5] = 10;
 855             fitparm[7] = 100;
 856             fitparm[8] = 0;
 857             break;
 858         default:
 859             fprintf(out, "Warning: don't know how to initialize the parameters\n");
 860             for (i = 0; (i < nparm); i++)
 861             {
 862                 fitparm[i] = 1;
 863             }
 864     }
 865     fprintf(out, "Starting parameters:\n");
 866     for (i = 0; (i < nparm); i++)
 867     {
 868         fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 869     }
 870     if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
 871                  oenv, bDebugMode(), efitfn, fitparm, 0))
 872     {
 873         for (i = 0; (i < nparm); i++)
 874         {
 875             fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
 876         }
 877     }
 878     else
 879     {
 880         fprintf(out, "No solution was found\n");
 881     }
 882 }
 883
 884 static void do_ballistic(const char *balFile, int nData,
 885                          real *t, real **val, int nSet,
 886                          real balTime, int nBalExp,
 887                          const output_env_t oenv)
 888 {
 889     double     **ctd   = NULL, *td = NULL;
 890     t_gemParams *GP    = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp);
 891     static char *leg[] = {"Ac'(t)"};
 892     FILE        *fp;
 893     int          i, set;
 894
 895     if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
 896     {
 897         snew(ctd, nSet);
 898         snew(td,  nData);
 899
 900         fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 901         xvgr_legend(fp, asize(leg), (const char**)leg, oenv);
 902
 903         for (set = 0; set < nSet; set++)
 904         {
 905             snew(ctd[set], nData);
 906             for (i = 0; i < nData; i++)
 907             {
 908                 ctd[set][i] = (double)val[set][i];
 909                 if (set == 0)
 910                 {
 911                     td[i] = (double)t[i];
 912                 }
 913             }
 914
 915             takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
 916         }
 917
 918         for (i = 0; i < nData; i++)
 919         {
 920             fprintf(fp, "  %g", t[i]);
 921             for (set = 0; set < nSet; set++)
 922             {
 923                 fprintf(fp, "  %g", ctd[set][i]);
 924             }
 925             fprintf(fp, "\n");
 926         }
 927
 928
 929         for (set = 0; set < nSet; set++)
 930         {
 931             sfree(ctd[set]);
 932         }
 933         sfree(ctd);
 934         sfree(td);
 935     }
 936     else
 937     {
 938         printf("Number of data points is less than the number of parameters to fit\n."
 939                "The system is underdetermined, hence no ballistic term can be found.\n\n");
 940     }
 941 }
 942
 943 static void do_geminate(const char *gemFile, int nData,
 944                         real *t, real **val, int nSet,
 945                         const real D, const real rcut, const real balTime,
 946                         const int nFitPoints, const real begFit, const real endFit,
 947                         const output_env_t oenv)
 948 {
 949     double     **ctd = NULL, **ctdGem = NULL, *td = NULL;
 950     t_gemParams *GP  = init_gemParams(rcut, D, t, nData, nFitPoints,
 951                                       begFit, endFit, balTime, 1);
 952     const char  *leg[] = {"Ac\\sgem\\N(t)"};
 953     FILE        *fp;
 954     int          i, set;
 955
 956     snew(ctd,    nSet);
 957     snew(ctdGem, nSet);
 958     snew(td,  nData);
 959
 960     fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation", "Time (ps)", "C'(t)", oenv);
 961     xvgr_legend(fp, asize(leg), leg, oenv);
 962
 963     for (set = 0; set < nSet; set++)
 964     {
 965         snew(ctd[set],    nData);
 966         snew(ctdGem[set], nData);
 967         for (i = 0; i < nData; i++)
 968         {
 969             ctd[set][i] = (double)val[set][i];
 970             if (set == 0)
 971             {
 972                 td[i] = (double)t[i];
 973             }
 974         }
 975         fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
 976     }
 977
 978     for (i = 0; i < nData; i++)
 979     {
 980         fprintf(fp, "  %g", t[i]);
 981         for (set = 0; set < nSet; set++)
 982         {
 983             fprintf(fp, "  %g", ctdGem[set][i]);
 984         }
 985         fprintf(fp, "\n");
 986     }
 987
 988     for (set = 0; set < nSet; set++)
 989     {
 990         sfree(ctd[set]);
 991         sfree(ctdGem[set]);
 992     }
 993     sfree(ctd);
 994     sfree(ctdGem);
 995     sfree(td);
 996 }
 997
 998 int gmx_analyze(int argc, char *argv[])
 999 {
1000     static const char *desc[] = {
1001         "[THISMODULE] reads an ASCII file and analyzes data sets.",
1002         "A line in the input file may start with a time",
1003         "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
1004         "Multiple sets can also be",
1005         "read when they are separated by & (option [TT]-n[tt]);",
1006         "in this case only one [IT]y[it]-value is read from each line.",
1007         "All lines starting with # and @ are skipped.",
1008         "All analyses can also be done for the derivative of a set",
1009         "(option [TT]-d[tt]).[PAR]",
1010
1011         "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
1012         "points are equidistant in time.[PAR]",
1013
1014         "[THISMODULE] always shows the average and standard deviation of each",
1015         "set, as well as the relative deviation of the third",
1016         "and fourth cumulant from those of a Gaussian distribution with the same",
1017         "standard deviation.[PAR]",
1018
1019         "Option [TT]-ac[tt] produces the autocorrelation function(s).",
1020         "Be sure that the time interval between data points is",
1021         "much shorter than the time scale of the autocorrelation.[PAR]",
1022
1023         "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
1024         "i/2 periods. The formula is:[BR]"
1025         "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
1026         "This is useful for principal components obtained from covariance",
1027         "analysis, since the principal components of random diffusion are",
1028         "pure cosines.[PAR]",
1029
1030         "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
1031
1032         "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
1033
1034         "Option [TT]-av[tt] produces the average over the sets.",
1035         "Error bars can be added with the option [TT]-errbar[tt].",
1036         "The errorbars can represent the standard deviation, the error",
1037         "(assuming the points are independent) or the interval containing",
1038         "90% of the points, by discarding 5% of the points at the top and",
1039         "the bottom.[PAR]",
1040
1041         "Option [TT]-ee[tt] produces error estimates using block averaging.",
1042         "A set is divided in a number of blocks and averages are calculated for",
1043         "each block. The error for the total average is calculated from",
1044         "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1045         "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1046         "These errors are plotted as a function of the block size.",
1047         "Also an analytical block average curve is plotted, assuming",
1048         "that the autocorrelation is a sum of two exponentials.",
1049         "The analytical curve for the block average is:[BR]",
1050         "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T (  [GRK]alpha[grk]   ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
1051         "                       (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
1052         "where T is the total time.",
1053         "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1054         "When the actual block average is very close to the analytical curve,",
1055         "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1056         "The complete derivation is given in",
1057         "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1058
1059         "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
1060         "component from a hydrogen bond autocorrelation function by the fitting",
1061         "of a sum of exponentials, as described in e.g.",
1062         "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
1063         "is the one with the most negative coefficient in the exponential,",
1064         "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
1065         "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
1066
1067         "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
1068         "(and optionally kD) to the hydrogen bond autocorrelation function",
1069         "according to the reversible geminate recombination model. Removal of",
1070         "the ballistic component first is strongly advised. The model is presented in",
1071         "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
1072
1073         "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1074         "of each set and over all sets with respect to a filtered average.",
1075         "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1076         "to len/2. len is supplied with the option [TT]-filter[tt].",
1077         "This filter reduces oscillations with period len/2 and len by a factor",
1078         "of 0.79 and 0.33 respectively.[PAR]",
1079
1080         "Option [TT]-g[tt] fits the data to the function given with option",
1081         "[TT]-fitfn[tt].[PAR]",
1082
1083         "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1084         "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1085         "zero or with a negative value are ignored.[PAR]"
1086
1087         "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1088         "on output from [gmx-hbond]. The input file can be taken directly",
1089         "from [TT]gmx hbond -ac[tt], and then the same result should be produced."
1090     };
1091     static real        tb         = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1092     static gmx_bool    bHaveT     = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1093     static gmx_bool    bEESEF     = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1094     static gmx_bool    bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE, bLuzarError = FALSE;
1095     static int         nsets_in   = 1, d = 1, nb_min = 4, resol = 10, nBalExp = 4, nFitPoints = 100;
1096     static real        temp       = 298.15, fit_start = 1, fit_end = 60, smooth_tail_start = -1, balTime = 0.2, diffusion = 5e-5, rcut = 0.35;
1097
1098     /* must correspond to enum avbar* declared at beginning of file */
1099     static const char *avbar_opt[avbarNR+1] = {
1100         NULL, "none", "stddev", "error", "90", NULL
1101     };
1102
1103     t_pargs            pa[] = {
1104         { "-time",    FALSE, etBOOL, {&bHaveT},
1105           "Expect a time in the input" },
1106         { "-b",       FALSE, etREAL, {&tb},
1107           "First time to read from set" },
1108         { "-e",       FALSE, etREAL, {&te},
1109           "Last time to read from set" },
1110         { "-n",       FALSE, etINT, {&nsets_in},
1111           "Read this number of sets separated by &" },
1112         { "-d",       FALSE, etBOOL, {&bDer},
1113           "Use the derivative" },
1114         { "-dp",      FALSE, etINT, {&d},
1115           "HIDDENThe derivative is the difference over this number of points" },
1116         { "-bw",      FALSE, etREAL, {&binwidth},
1117           "Binwidth for the distribution" },
1118         { "-errbar",  FALSE, etENUM, {avbar_opt},
1119           "Error bars for [TT]-av[tt]" },
1120         { "-integrate", FALSE, etBOOL, {&bIntegrate},
1121           "Integrate data function(s) numerically using trapezium rule" },
1122         { "-aver_start", FALSE, etREAL, {&aver_start},
1123           "Start averaging the integral from here" },
1124         { "-xydy",    FALSE, etBOOL, {&bXYdy},
1125           "Interpret second data set as error in the y values for integrating" },
1126         { "-regression", FALSE, etBOOL, {&bRegression},
1127           "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1128         { "-luzar",   FALSE, etBOOL, {&bLuzar},
1129           "Do a Luzar and Chandler analysis on a correlation function and "
1130           "related as produced by [gmx-hbond]. When in addition the "
1131           "[TT]-xydy[tt] flag is given the second and fourth column will be "
1132           "interpreted as errors in c(t) and n(t)." },
1133         { "-temp",    FALSE, etREAL, {&temp},
1134           "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1135         { "-fitstart", FALSE, etREAL, {&fit_start},
1136           "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1137         { "-fitend", FALSE, etREAL, {&fit_end},
1138           "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1139         { "-smooth", FALSE, etREAL, {&smooth_tail_start},
1140           "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1141         { "-nbmin",   FALSE, etINT, {&nb_min},
1142           "HIDDENMinimum number of blocks for block averaging" },
1143         { "-resol", FALSE, etINT, {&resol},
1144           "HIDDENResolution for the block averaging, block size increases with"
1145           " a factor 2^(1/resol)" },
1146         { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1147           "HIDDENAlways use a single exponential fit for the error estimate" },
1148         { "-eenlc", FALSE, etBOOL, {&bEENLC},
1149           "HIDDENAllow a negative long-time correlation" },
1150         { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1151           "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1152         { "-filter",  FALSE, etREAL, {&filtlen},
1153           "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1154         { "-power", FALSE, etBOOL, {&bPower},
1155           "Fit data to: b t^a" },
1156         { "-subav", FALSE, etBOOL, {&bSubAv},
1157           "Subtract the average before autocorrelating" },
1158         { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1159           "Calculate one ACF over all sets" },
1160         { "-nbalexp", FALSE, etINT, {&nBalExp},
1161           "HIDDENNumber of exponentials to fit to the ultrafast component" },
1162         { "-baltime", FALSE, etREAL, {&balTime},
1163           "HIDDENTime up to which the ballistic component will be fitted" },
1164 /*     { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1165 /*       "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1166 /*     { "-rcut", FALSE, etREAL, {&rcut}, */
1167 /*       "Cut-off for hydrogen bonds in geminate algorithms" }, */
1168 /*     { "-gemtype", FALSE, etENUM, {gemType}, */
1169 /*       "What type of gminate recombination to use"}, */
1170 /*     { "-D", FALSE, etREAL, {&diffusion}, */
1171 /*       "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1172     };
1173 #define NPA asize(pa)
1174
1175     FILE           *out, *out_fit;
1176     int             n, nlast, s, nset, i, j = 0;
1177     real          **val, *t, dt, tot, error;
1178     double         *av, *sig, cum1, cum2, cum3, cum4, db;
1179     const char     *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *balfile, *gemfile, *fitfile;
1180     output_env_t    oenv;
1181
1182     t_filenm        fnm[] = {
1183         { efXVG, "-f",    "graph",    ffREAD   },
1184         { efXVG, "-ac",   "autocorr", ffOPTWR  },
1185         { efXVG, "-msd",  "msd",      ffOPTWR  },
1186         { efXVG, "-cc",   "coscont",  ffOPTWR  },
1187         { efXVG, "-dist", "distr",    ffOPTWR  },
1188         { efXVG, "-av",   "average",  ffOPTWR  },
1189         { efXVG, "-ee",   "errest",   ffOPTWR  },
1190         { efXVG, "-bal",  "ballisitc", ffOPTWR  },
1191 /*     { efXVG, "-gem",  "geminate", ffOPTWR  }, */
1192         { efLOG, "-g",    "fitlog",   ffOPTWR  }
1193     };
1194 #define NFILE asize(fnm)
1195
1196     int      npargs;
1197     t_pargs *ppa;
1198
1199     npargs = asize(pa);
1200     ppa    = add_acf_pargs(&npargs, pa);
1201
1202     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1203                            NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
1204     {
1205         return 0;
1206     }
1207
1208     acfile   = opt2fn_null("-ac", NFILE, fnm);
1209     msdfile  = opt2fn_null("-msd", NFILE, fnm);
1210     ccfile   = opt2fn_null("-cc", NFILE, fnm);
1211     distfile = opt2fn_null("-dist", NFILE, fnm);
1212     avfile   = opt2fn_null("-av", NFILE, fnm);
1213     eefile   = opt2fn_null("-ee", NFILE, fnm);
1214     balfile  = opt2fn_null("-bal", NFILE, fnm);
1215 /*   gemfile  = opt2fn_null("-gem",NFILE,fnm); */
1216     /* When doing autocorrelation we don't want a fitlog for fitting
1217      * the function itself (not the acf) when the user did not ask for it.
1218      */
1219     if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == NULL)
1220     {
1221         fitfile  = opt2fn("-g", NFILE, fnm);
1222     }
1223     else
1224     {
1225         fitfile  = opt2fn_null("-g", NFILE, fnm);
1226     }
1227
1228     val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1229                         opt2parg_bSet("-b", npargs, ppa), tb,
1230                         opt2parg_bSet("-e", npargs, ppa), te,
1231                         nsets_in, &nset, &n, &dt, &t);
1232     printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1233
1234     if (bDer)
1235     {
1236         printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1237                d, d);
1238         n -= d;
1239         for (s = 0; s < nset; s++)
1240         {
1241             for (i = 0; (i < n); i++)
1242             {
1243                 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1244             }
1245         }
1246     }
1247
1248     if (bIntegrate)
1249     {
1250         real sum, stddev;
1251
1252         printf("Calculating the integral using the trapezium rule\n");
1253
1254         if (bXYdy)
1255         {
1256             sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1257             printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1258         }
1259         else
1260         {
1261             for (s = 0; s < nset; s++)
1262             {
1263                 sum = evaluate_integral(n, t, val[s], NULL, aver_start, &stddev);
1264                 printf("Integral %d  %10.5f  +/- %10.5f\n", s+1, sum, stddev);
1265             }
1266         }
1267     }
1268
1269     if (fitfile != NULL)
1270     {
1271         out_fit = gmx_ffopen(fitfile, "w");
1272         if (bXYdy && nset >= 2)
1273         {
1274             do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv);
1275         }
1276         else
1277         {
1278             for (s = 0; s < nset; s++)
1279             {
1280                 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv);
1281             }
1282         }
1283         gmx_ffclose(out_fit);
1284     }
1285
1286     printf("                                      std. dev.    relative deviation of\n");
1287     printf("                       standard       ---------   cumulants from those of\n");
1288     printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
1289     printf("                                                      cum. 3   cum. 4\n");
1290     snew(av, nset);
1291     snew(sig, nset);
1292     for (s = 0; (s < nset); s++)
1293     {
1294         cum1 = 0;
1295         cum2 = 0;
1296         cum3 = 0;
1297         cum4 = 0;
1298         for (i = 0; (i < n); i++)
1299         {
1300             cum1 += val[s][i];
1301         }
1302         cum1 /= n;
1303         for (i = 0; (i < n); i++)
1304         {
1305             db    = val[s][i]-cum1;
1306             cum2 += db*db;
1307             cum3 += db*db*db;
1308             cum4 += db*db*db*db;
1309         }
1310         cum2  /= n;
1311         cum3  /= n;
1312         cum4  /= n;
1313         av[s]  = cum1;
1314         sig[s] = sqrt(cum2);
1315         if (n > 1)
1316         {
1317             error = sqrt(cum2/(n-1));
1318         }
1319         else
1320         {
1321             error = 0;
1322         }
1323         printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
1324                s+1, av[s], sig[s], error,
1325                sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1326                sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1327     }
1328     printf("\n");
1329
1330     if (filtlen)
1331     {
1332         filter(filtlen, n, nset, val, dt);
1333     }
1334
1335     if (msdfile)
1336     {
1337         out = xvgropen(msdfile, "Mean square displacement",
1338                        "time", "MSD (nm\\S2\\N)", oenv);
1339         nlast = (int)(n*frac);
1340         for (s = 0; s < nset; s++)
1341         {
1342             for (j = 0; j <= nlast; j++)
1343             {
1344                 if (j % 100 == 0)
1345                 {
1346                     fprintf(stderr, "\r%d", j);
1347                 }
1348                 tot = 0;
1349                 for (i = 0; i < n-j; i++)
1350                 {
1351                     tot += sqr(val[s][i]-val[s][i+j]);
1352                 }
1353                 tot /= (real)(n-j);
1354                 fprintf(out, " %g %8g\n", dt*j, tot);
1355             }
1356             if (s < nset-1)
1357             {
1358                 fprintf(out, "&\n");
1359             }
1360         }
1361         gmx_ffclose(out);
1362         fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1363     }
1364     if (ccfile)
1365     {
1366         plot_coscont(ccfile, n, nset, val, oenv);
1367     }
1368
1369     if (distfile)
1370     {
1371         histogram(distfile, binwidth, n, nset, val, oenv);
1372     }
1373     if (avfile)
1374     {
1375         average(avfile, nenum(avbar_opt), n, nset, val, t);
1376     }
1377     if (eefile)
1378     {
1379         estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1380                        bEeFitAc, bEESEF, bEENLC, oenv);
1381     }
1382     if (balfile)
1383     {
1384         do_ballistic(balfile, n, t, val, nset, balTime, nBalExp, oenv);
1385     }
1386 /*   if (gemfile) */
1387 /*       do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1388 /*                   nFitPoints, fit_start, fit_end, oenv); */
1389     if (bPower)
1390     {
1391         power_fit(n, nset, val, t);
1392     }
1393
1394     if (acfile != NULL)
1395     {
1396         if (bSubAv)
1397         {
1398             for (s = 0; s < nset; s++)
1399             {
1400                 for (i = 0; i < n; i++)
1401                 {
1402                     val[s][i] -= av[s];
1403                 }
1404             }
1405         }
1406         do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1407                     eacNormal, bAverCorr);
1408     }
1409
1410     if (bRegression)
1411     {
1412         regression_analysis(n, bXYdy, t, nset, val);
1413     }
1414
1415     if (bLuzar)
1416     {
1417         luzar_correl(n, t, nset, val, temp, bXYdy, fit_start, smooth_tail_start, oenv);
1418     }
1419
1420     view_all(oenv, NFILE, fnm);
1421
1422     return 0;
1423 }