src/gromacs/gmxana/gmx_bar.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <math.h>
  44 #include <float.h>
  45
  46 #include "sysstuff.h"
  47 #include "typedefs.h"
  48 #include "smalloc.h"
  49 #include "futil.h"
  50 #include "statutil.h"
  51 #include "copyrite.h"
  52 #include "macros.h"
  53 #include "enxio.h"
  54 #include "physics.h"
  55 #include "gmx_fatal.h"
  56 #include "xvgr.h"
  57 #include "gmx_ana.h"
  58 #include "maths.h"
  59 #include "string2.h"
  60 #include "names.h"
  61 #include "mdebin.h"
  62
  63
  64 /* Structure for the names of lambda vector components */
  65 typedef struct lambda_components_t
  66 {
  67     char **names;  /* Array of strings with names for the lambda vector
  68                       components */
  69     int    N;      /* The number of components */
  70     int    Nalloc; /* The number of allocated components */
  71 } lambda_components_t;
  72
  73 /* Structure for a lambda vector or a dhdl derivative direction */
  74 typedef struct lambda_vec_t
  75 {
  76     double                    *val;   /* The lambda vector component values. Only valid if
  77                                          dhdl == -1 */
  78     int                        dhdl;  /* The coordinate index for the derivative described by this
  79                                          structure, or -1 */
  80     const lambda_components_t *lc;    /* the associated lambda_components
  81                                          structure */
  82     int                        index; /* The state number (init-lambda-state) of this lambda
  83                                          vector, if known. If not, it is set to -1 */
  84 } lambda_vec_t;
  85
  86 /* the dhdl.xvg data from a simulation */
  87 typedef struct xvg_t
  88 {
  89     const char   *filename;
  90     int           ftp;           /* file type */
  91     int           nset;          /* number of lambdas, including dhdl */
  92     int          *np;            /* number of data points (du or hists) per lambda */
  93     int           np_alloc;      /* number of points (du or hists) allocated */
  94     double        temp;          /* temperature */
  95     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  96     double       *t;             /* the times (of second index for y) */
  97     double      **y;             /* the dU values. y[0] holds the derivative, while
  98                                     further ones contain the energy differences between
  99                                     the native lambda and the 'foreign' lambdas. */
 100     lambda_vec_t  native_lambda; /* the native lambda */
 101
 102     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 103 } xvg_t;
 104
 105
 106 typedef struct hist_t
 107 {
 108     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 109     double          dx[2];                  /* the histogram spacing. The reverse
 110                                                dx is the negative of the forward dx.*/
 111     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 112                                                point(s) as int */
 113
 114     int             nbin[2];                /* the (forward+reverse) number of bins */
 115     gmx_large_int_t sum;                    /* the total number of counts. Must be
 116                                                the same for forward + reverse.  */
 117     int             nhist;                  /* number of hist datas (forward or reverse) */
 118
 119     double          start_time, delta_time; /* start time, end time of histogram */
 120 } hist_t;
 121
 122
 123 /* an aggregate of samples for partial free energy calculation */
 124 typedef struct samples_t
 125 {
 126     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 127     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 128     double        temp;           /* the temperature */
 129     gmx_bool      derivative;     /* whether this sample is a derivative */
 130
 131     /* The samples come either as either delta U lists: */
 132     int     ndu;                    /* the number of delta U samples */
 133     double *du;                     /* the delta u's */
 134     double *t;                      /* the times associated with those samples, or: */
 135     double  start_time, delta_time; /*start time and delta time for linear time*/
 136
 137     /* or as histograms: */
 138     hist_t *hist; /* a histogram */
 139
 140     /* allocation data: (not NULL for data 'owned' by this struct) */
 141     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 142     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 143     hist_t         *hist_alloc;          /* allocated hist */
 144
 145     gmx_large_int_t ntot;                /* total number of samples */
 146     const char     *filename;            /* the file name this sample comes from */
 147 } samples_t;
 148
 149 /* a sample range (start to end for du-style data, or boolean
 150     for both du-style data and histograms */
 151 typedef struct sample_range_t
 152 {
 153     int        start, end; /* start and end index for du style data */
 154     gmx_bool   use;        /* whether to use this sample */
 155
 156     samples_t *s;          /* the samples this range belongs to */
 157 } sample_range_t;
 158
 159
 160 /* a collection of samples for a partial free energy calculation
 161     (i.e. the collection of samples from one native lambda to one
 162     foreign lambda) */
 163 typedef struct sample_coll_t
 164 {
 165     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 166                                           in the histogram */
 167     lambda_vec_t   *foreign_lambda;    /* collection */
 168     double          temp;              /* the temperature */
 169
 170     int             nsamples;          /* the number of samples */
 171     samples_t     **s;                 /* the samples themselves */
 172     sample_range_t *r;                 /* the sample ranges */
 173     int             nsamples_alloc;    /* number of allocated samples */
 174
 175     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 176                                           this collection */
 177
 178     struct sample_coll_t *next, *prev; /* next and previous in the list */
 179 } sample_coll_t;
 180
 181 /* all the samples associated with a lambda point */
 182 typedef struct lambda_data_t
 183 {
 184     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 185     double                temp;        /* temperature */
 186
 187     sample_coll_t        *sc;          /* the samples */
 188
 189     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 190
 191     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 192 } lambda_data_t;
 193
 194 /* Top-level data structure of simulation data */
 195 typedef struct sim_data_t
 196 {
 197     lambda_data_t      *lb;      /* a lambda data linked list */
 198     lambda_data_t       lb_head; /* The head element of the linked list */
 199
 200     lambda_components_t lc;      /* the allowed components of the lambda
 201                                     vectors */
 202 } sim_data_t;
 203
 204 /* Top-level data structure with calculated values. */
 205 typedef struct {
 206     sample_coll_t *a, *b;            /* the simulation data */
 207
 208     double         dg;               /* the free energy difference */
 209     double         dg_err;           /* the free energy difference */
 210
 211     double         dg_disc_err;      /* discretization error */
 212     double         dg_histrange_err; /* histogram range error */
 213
 214     double         sa;               /* relative entropy of b in state a */
 215     double         sa_err;           /* error in sa */
 216     double         sb;               /* relative entropy of a in state b */
 217     double         sb_err;           /* error in sb */
 218
 219     double         dg_stddev;        /* expected dg stddev per sample */
 220     double         dg_stddev_err;    /* error in dg_stddev */
 221 } barres_t;
 222
 223
 224 /* Initialize a lambda_components structure */
 225 static void lambda_components_init(lambda_components_t *lc)
 226 {
 227     lc->N      = 0;
 228     lc->Nalloc = 2;
 229     snew(lc->names, lc->Nalloc);
 230 }
 231
 232 /* Add a component to a lambda_components structure */
 233 static void lambda_components_add(lambda_components_t *lc,
 234                                   const char *name, size_t name_length)
 235 {
 236     while (lc->N + 1 > lc->Nalloc)
 237     {
 238         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 239         srealloc( lc->names, lc->Nalloc );
 240     }
 241     snew(lc->names[lc->N], name_length+1);
 242     strncpy(lc->names[lc->N], name, name_length);
 243     lc->N++;
 244 }
 245
 246 /* check whether a component with index 'index' matches the given name, or
 247    is also NULL. Returns TRUE if this is the case.
 248    the string name does not need to end */
 249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 250                                         int                        index,
 251                                         const char                *name,
 252                                         size_t                     name_length)
 253 {
 254     size_t len;
 255     if (index >= lc->N)
 256     {
 257         return FALSE;
 258     }
 259     if (name == NULL && lc->names[index] == NULL)
 260     {
 261         return TRUE;
 262     }
 263     if ((name == NULL) != (lc->names[index] == NULL))
 264     {
 265         return FALSE;
 266     }
 267     len = strlen(lc->names[index]);
 268     if (len != name_length)
 269     {
 270         return FALSE;
 271     }
 272     if (strncmp(lc->names[index], name, name_length) == 0)
 273     {
 274         return TRUE;
 275     }
 276     return FALSE;
 277 }
 278
 279 /* Find the index of a given lambda component name, or -1 if not found */
 280 static int lambda_components_find(const lambda_components_t *lc,
 281                                   const char                *name,
 282                                   size_t                     name_length)
 283 {
 284     int i;
 285
 286     for (i = 0; i < lc->N; i++)
 287     {
 288         if (strncmp(lc->names[i], name, name_length) == 0)
 289         {
 290             return i;
 291         }
 292     }
 293     return -1;
 294 }
 295
 296
 297
 298 /* initialize a lambda vector */
 299 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 300 {
 301     snew(lv->val, lc->N);
 302     lv->index = -1;
 303     lv->dhdl  = -1;
 304     lv->lc    = lc;
 305 }
 306
 307 static void lambda_vec_destroy(lambda_vec_t *lv)
 308 {
 309     sfree(lv->val);
 310 }
 311
 312 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 313 {
 314     int i;
 315
 316     lambda_vec_init(lv, orig->lc);
 317     lv->dhdl  = orig->dhdl;
 318     lv->index = orig->index;
 319     for (i = 0; i < lv->lc->N; i++)
 320     {
 321         lv->val[i] = orig->val[i];
 322     }
 323 }
 324
 325 /* write a lambda vec to a preallocated string */
 326 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 327 {
 328     int    i;
 329     size_t np;
 330
 331     str[0] = 0; /* reset the string */
 332     if (lv->dhdl < 0)
 333     {
 334         if (named)
 335         {
 336             str += sprintf(str, "delta H to ");
 337         }
 338         if (lv->lc->N > 1)
 339         {
 340             str += sprintf(str, "(");
 341         }
 342         for (i = 0; i < lv->lc->N; i++)
 343         {
 344             str += sprintf(str, "%g", lv->val[i]);
 345             if (i < lv->lc->N-1)
 346             {
 347                 str += sprintf(str, ", ");
 348             }
 349         }
 350         if (lv->lc->N > 1)
 351         {
 352             str += sprintf(str, ")");
 353         }
 354     }
 355     else
 356     {
 357         /* this lambda vector describes a derivative */
 358         str += sprintf(str, "dH/dl");
 359         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 360         {
 361             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 362         }
 363     }
 364 }
 365
 366 /* write a shortened version of the lambda vec to a preallocated string */
 367 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 368 {
 369     int    i;
 370     size_t np;
 371
 372     if (lv->index >= 0)
 373     {
 374         sprintf(str, "%6d", lv->index);
 375     }
 376     else
 377     {
 378         if (lv->dhdl < 0)
 379         {
 380             sprintf(str, "%6.3f", lv->val[0]);
 381         }
 382         else
 383         {
 384             sprintf(str, "dH/dl[%d]", lv->dhdl);
 385         }
 386     }
 387 }
 388
 389 /* write an intermediate version of two lambda vecs to a preallocated string */
 390 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 391                                           const lambda_vec_t *b, char *str)
 392 {
 393     int    i;
 394     size_t np;
 395
 396     str[0] = 0;
 397     if ( (a->index >= 0) && (b->index >= 0) )
 398     {
 399         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 400     }
 401     else
 402     {
 403         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 404         {
 405             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 406         }
 407     }
 408 }
 409
 410
 411
 412 /* calculate the difference in lambda vectors: c = a-b.
 413    c must be initialized already, and a and b must describe non-derivative
 414    lambda points */
 415 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 416                             lambda_vec_t *c)
 417 {
 418     int i;
 419
 420     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 421     {
 422         gmx_fatal(FARGS,
 423                   "Trying to calculate the difference between derivatives instead of lambda points");
 424     }
 425     if ((a->lc != b->lc) || (a->lc != c->lc) )
 426     {
 427         gmx_fatal(FARGS,
 428                   "Trying to calculate the difference lambdas with differing basis set");
 429     }
 430     for (i = 0; i < a->lc->N; i++)
 431     {
 432         c->val[i] = a->val[i] - b->val[i];
 433     }
 434 }
 435
 436 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 437    a and b must describe non-derivative lambda points */
 438 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 439 {
 440     int    i;
 441     double ret = 0.;
 442
 443     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 444     {
 445         gmx_fatal(FARGS,
 446                   "Trying to calculate the difference between derivatives instead of lambda points");
 447     }
 448     if (a->lc != b->lc)
 449     {
 450         gmx_fatal(FARGS,
 451                   "Trying to calculate the difference lambdas with differing basis set");
 452     }
 453     for (i = 0; i < a->lc->N; i++)
 454     {
 455         double df = a->val[i] - b->val[i];
 456         ret += df*df;
 457     }
 458     return sqrt(ret);
 459 }
 460
 461
 462 /* check whether two lambda vectors are the same */
 463 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 464 {
 465     int i;
 466
 467     if (a->lc != b->lc)
 468     {
 469         return FALSE;
 470     }
 471     if (a->dhdl < 0)
 472     {
 473         for (i = 0; i < a->lc->N; i++)
 474         {
 475             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 476             {
 477                 return FALSE;
 478             }
 479         }
 480         return TRUE;
 481     }
 482     else
 483     {
 484         /* they're derivatives, so we check whether the indices match */
 485         return (a->dhdl == b->dhdl);
 486     }
 487 }
 488
 489 /* Compare the sort order of two foreign lambda vectors
 490
 491     returns 1 if a is 'bigger' than b,
 492     returns 0 if they're the same,
 493     returns -1 if a is 'smaller' than b.*/
 494 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 495                                        const lambda_vec_t *b)
 496 {
 497     int      i;
 498     double   norm_a    = 0, norm_b = 0;
 499     gmx_bool different = FALSE;
 500
 501     if (a->lc != b->lc)
 502     {
 503         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 504     }
 505     /* if either one has an index we sort based on that */
 506     if ((a->index >= 0) || (b->index >= 0))
 507     {
 508         if (a->index == b->index)
 509         {
 510             return 0;
 511         }
 512         return (a->index > b->index) ? 1 : -1;
 513     }
 514     if (a->dhdl >= 0 || b->dhdl >= 0)
 515     {
 516         /* lambda vectors that are derivatives always sort higher than those
 517            without derivatives */
 518         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 519         {
 520             return (a->dhdl >= 0) ? 1 : -1;
 521         }
 522         return a->dhdl > b->dhdl;
 523     }
 524
 525     /* neither has an index, so we can only sort on the lambda components,
 526        which is only valid if there is one component */
 527     for (i = 0; i < a->lc->N; i++)
 528     {
 529         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 530         {
 531             different = TRUE;
 532         }
 533         norm_a += a->val[i]*a->val[i];
 534         norm_b += b->val[i]*b->val[i];
 535     }
 536     if (!different)
 537     {
 538         return 0;
 539     }
 540     return norm_a > norm_b;
 541 }
 542
 543 /* Compare the sort order of two native lambda vectors
 544
 545     returns 1 if a is 'bigger' than b,
 546     returns 0 if they're the same,
 547     returns -1 if a is 'smaller' than b.*/
 548 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 549                                       const lambda_vec_t *b)
 550 {
 551     int i;
 552
 553     if (a->lc != b->lc)
 554     {
 555         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 556     }
 557     /* if either one has an index we sort based on that */
 558     if ((a->index >= 0) || (b->index >= 0))
 559     {
 560         if (a->index == b->index)
 561         {
 562             return 0;
 563         }
 564         return (a->index > b->index) ? 1 : -1;
 565     }
 566     /* neither has an index, so we can only sort on the lambda components,
 567        which is only valid if there is one component */
 568     if (a->lc->N > 1)
 569     {
 570         gmx_fatal(FARGS,
 571                   "Can't compare lambdas with no index and > 1 component");
 572     }
 573     if (a->dhdl >= 0 || b->dhdl >= 0)
 574     {
 575         gmx_fatal(FARGS,
 576                   "Can't compare native lambdas that are derivatives");
 577     }
 578     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 579     {
 580         return 0;
 581     }
 582     return a->val[0] > b->val[0] ? 1 : -1;
 583 }
 584
 585
 586
 587
 588 static void hist_init(hist_t *h, int nhist, int *nbin)
 589 {
 590     int i;
 591     if (nhist > 2)
 592     {
 593         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 594     }
 595     for (i = 0; i < nhist; i++)
 596     {
 597         snew(h->bin[i], nbin[i]);
 598         h->x0[i]      = 0;
 599         h->nbin[i]    = nbin[i];
 600         h->start_time = h->delta_time = 0;
 601         h->dx[i]      = 0;
 602     }
 603     h->sum   = 0;
 604     h->nhist = nhist;
 605 }
 606
 607 static void hist_destroy(hist_t *h)
 608 {
 609     sfree(h->bin);
 610 }
 611
 612
 613 static void xvg_init(xvg_t *ba)
 614 {
 615     ba->filename = NULL;
 616     ba->nset     = 0;
 617     ba->np_alloc = 0;
 618     ba->np       = NULL;
 619     ba->y        = NULL;
 620 }
 621
 622 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 623                          lambda_vec_t *foreign_lambda, double temp,
 624                          gmx_bool derivative, const char *filename)
 625 {
 626     s->native_lambda  = native_lambda;
 627     s->foreign_lambda = foreign_lambda;
 628     s->temp           = temp;
 629     s->derivative     = derivative;
 630
 631     s->ndu        = 0;
 632     s->du         = NULL;
 633     s->t          = NULL;
 634     s->start_time = s->delta_time = 0;
 635     s->hist       = NULL;
 636     s->du_alloc   = NULL;
 637     s->t_alloc    = NULL;
 638     s->hist_alloc = NULL;
 639     s->ndu_alloc  = 0;
 640     s->nt_alloc   = 0;
 641
 642     s->ntot     = 0;
 643     s->filename = filename;
 644 }
 645
 646 static void sample_range_init(sample_range_t *r, samples_t *s)
 647 {
 648     r->start = 0;
 649     r->end   = s->ndu;
 650     r->use   = TRUE;
 651     r->s     = NULL;
 652 }
 653
 654 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 655                              lambda_vec_t *foreign_lambda, double temp)
 656 {
 657     sc->native_lambda  = native_lambda;
 658     sc->foreign_lambda = foreign_lambda;
 659     sc->temp           = temp;
 660
 661     sc->nsamples       = 0;
 662     sc->s              = NULL;
 663     sc->r              = NULL;
 664     sc->nsamples_alloc = 0;
 665
 666     sc->ntot = 0;
 667     sc->next = sc->prev = NULL;
 668 }
 669
 670 static void sample_coll_destroy(sample_coll_t *sc)
 671 {
 672     /* don't free the samples themselves */
 673     sfree(sc->r);
 674     sfree(sc->s);
 675 }
 676
 677
 678 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 679                              double temp)
 680 {
 681     l->lambda = native_lambda;
 682     l->temp   = temp;
 683
 684     l->next = NULL;
 685     l->prev = NULL;
 686
 687     l->sc = &(l->sc_head);
 688
 689     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 690     l->sc->next = l->sc;
 691     l->sc->prev = l->sc;
 692 }
 693
 694 static void barres_init(barres_t *br)
 695 {
 696     br->dg            = 0;
 697     br->dg_err        = 0;
 698     br->sa            = 0;
 699     br->sa_err        = 0;
 700     br->sb            = 0;
 701     br->sb_err        = 0;
 702     br->dg_stddev     = 0;
 703     br->dg_stddev_err = 0;
 704
 705     br->a = NULL;
 706     br->b = NULL;
 707 }
 708
 709
 710 /* calculate the total number of samples in a sample collection */
 711 static void sample_coll_calc_ntot(sample_coll_t *sc)
 712 {
 713     int i;
 714
 715     sc->ntot = 0;
 716     for (i = 0; i < sc->nsamples; i++)
 717     {
 718         if (sc->r[i].use)
 719         {
 720             if (sc->s[i]->hist)
 721             {
 722                 sc->ntot += sc->s[i]->ntot;
 723             }
 724             else
 725             {
 726                 sc->ntot += sc->r[i].end - sc->r[i].start;
 727             }
 728         }
 729     }
 730 }
 731
 732
 733 /* find the barsamples_t associated with a lambda that corresponds to
 734    a specific foreign lambda */
 735 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 736                                                    lambda_vec_t  *foreign_lambda)
 737 {
 738     sample_coll_t *sc = l->sc->next;
 739
 740     while (sc != l->sc)
 741     {
 742         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 743         {
 744             return sc;
 745         }
 746         sc = sc->next;
 747     }
 748
 749     return NULL;
 750 }
 751
 752 /* insert li into an ordered list of lambda_colls */
 753 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 754 {
 755     sample_coll_t *scn = l->sc->next;
 756     while ( (scn != l->sc) )
 757     {
 758         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 759         {
 760             break;
 761         }
 762         scn = scn->next;
 763     }
 764     /* now insert it before the found scn */
 765     sc->next        = scn;
 766     sc->prev        = scn->prev;
 767     scn->prev->next = sc;
 768     scn->prev       = sc;
 769 }
 770
 771 /* insert li into an ordered list of lambdas */
 772 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 773 {
 774     lambda_data_t *lc = head->next;
 775     while (lc != head)
 776     {
 777         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 778         {
 779             break;
 780         }
 781         lc = lc->next;
 782     }
 783     /* now insert ourselves before the found lc */
 784     li->next       = lc;
 785     li->prev       = lc->prev;
 786     lc->prev->next = li;
 787     lc->prev       = li;
 788 }
 789
 790 /* insert a sample and a sample_range into a sample_coll. The
 791     samples are stored as a pointer, the range is copied. */
 792 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 793                                       sample_range_t *r)
 794 {
 795     /* first check if it belongs here */
 796     if (sc->temp != s->temp)
 797     {
 798         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 799                   s->filename, sc->next->s[0]->filename);
 800     }
 801     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 802     {
 803         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 804                   s->filename, sc->next->s[0]->filename);
 805     }
 806     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 807     {
 808         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 809                   s->filename, sc->next->s[0]->filename);
 810     }
 811
 812     /* check if there's room */
 813     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 814     {
 815         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 816         srenew(sc->s, sc->nsamples_alloc);
 817         srenew(sc->r, sc->nsamples_alloc);
 818     }
 819     sc->s[sc->nsamples] = s;
 820     sc->r[sc->nsamples] = *r;
 821     sc->nsamples++;
 822
 823     sample_coll_calc_ntot(sc);
 824 }
 825
 826 /* insert a sample into a lambda_list, creating the right sample_coll if
 827    neccesary */
 828 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 829 {
 830     gmx_bool       found = FALSE;
 831     sample_coll_t *sc;
 832     sample_range_t r;
 833
 834     lambda_data_t *l = head->next;
 835
 836     /* first search for the right lambda_data_t */
 837     while (l != head)
 838     {
 839         if (lambda_vec_same(l->lambda, s->native_lambda) )
 840         {
 841             found = TRUE;
 842             break;
 843         }
 844         l = l->next;
 845     }
 846
 847     if (!found)
 848     {
 849         snew(l, 1);                                     /* allocate a new one */
 850         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 851         lambda_data_insert_lambda(head, l);             /* add it to the list */
 852     }
 853
 854     /* now look for a sample collection */
 855     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 856     if (!sc)
 857     {
 858         snew(sc, 1); /* allocate a new one */
 859         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 860         lambda_data_insert_sample_coll(l, sc);
 861     }
 862
 863     /* now insert the samples into the sample coll */
 864     sample_range_init(&r, s);
 865     sample_coll_insert_sample(sc, s, &r);
 866 }
 867
 868
 869 /* make a histogram out of a sample collection */
 870 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 871                                   int *nbin_alloc, int *nbin,
 872                                   double *dx, double *xmin, int nbin_default)
 873 {
 874     int      i, j, k;
 875     gmx_bool dx_set   = FALSE;
 876     gmx_bool xmin_set = FALSE;
 877
 878     gmx_bool xmax_set      = FALSE;
 879     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 880                                        limits of a histogram */
 881     double   xmax          = -1;
 882
 883     /* first determine dx and xmin; try the histograms */
 884     for (i = 0; i < sc->nsamples; i++)
 885     {
 886         if (sc->s[i]->hist)
 887         {
 888             hist_t *hist = sc->s[i]->hist;
 889             for (k = 0; k < hist->nhist; k++)
 890             {
 891                 double hdx      = hist->dx[k];
 892                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 893
 894                 /* we use the biggest dx*/
 895                 if ( (!dx_set) || hist->dx[0] > *dx)
 896                 {
 897                     dx_set = TRUE;
 898                     *dx    = hist->dx[0];
 899                 }
 900                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 901                 {
 902                     xmin_set = TRUE;
 903                     *xmin    = (hist->x0[k]*hdx);
 904                 }
 905
 906                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 907                 {
 908                     xmax_set = TRUE;
 909                     xmax     = xmax_now;
 910                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 911                     {
 912                         xmax_set_hard = TRUE;
 913                     }
 914                 }
 915                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 916                 {
 917                     xmax_set_hard = TRUE;
 918                     xmax          = xmax_now;
 919                 }
 920             }
 921         }
 922     }
 923     /* and the delta us */
 924     for (i = 0; i < sc->nsamples; i++)
 925     {
 926         if (sc->s[i]->ndu > 0)
 927         {
 928             /* determine min and max */
 929             int    starti  = sc->r[i].start;
 930             int    endi    = sc->r[i].end;
 931             double du_xmin = sc->s[i]->du[starti];
 932             double du_xmax = sc->s[i]->du[starti];
 933             for (j = starti+1; j < endi; j++)
 934             {
 935                 if (sc->s[i]->du[j] < du_xmin)
 936                 {
 937                     du_xmin = sc->s[i]->du[j];
 938                 }
 939                 if (sc->s[i]->du[j] > du_xmax)
 940                 {
 941                     du_xmax = sc->s[i]->du[j];
 942                 }
 943             }
 944
 945             /* and now change the limits */
 946             if ( (!xmin_set) || (du_xmin < *xmin) )
 947             {
 948                 xmin_set = TRUE;
 949                 *xmin    = du_xmin;
 950             }
 951             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 952             {
 953                 xmax_set = TRUE;
 954                 xmax     = du_xmax;
 955             }
 956         }
 957     }
 958
 959     if (!xmax_set || !xmin_set)
 960     {
 961         *nbin = 0;
 962         return;
 963     }
 964
 965
 966     if (!dx_set)
 967     {
 968         *nbin = nbin_default;
 969         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 970                                                be 0, and we count from 0 */
 971     }
 972     else
 973     {
 974         *nbin = (xmax-(*xmin))/(*dx);
 975     }
 976
 977     if (*nbin > *nbin_alloc)
 978     {
 979         *nbin_alloc = *nbin;
 980         srenew(*bin, *nbin_alloc);
 981     }
 982
 983     /* reset the histogram */
 984     for (i = 0; i < (*nbin); i++)
 985     {
 986         (*bin)[i] = 0;
 987     }
 988
 989     /* now add the actual data */
 990     for (i = 0; i < sc->nsamples; i++)
 991     {
 992         if (sc->s[i]->hist)
 993         {
 994             hist_t *hist = sc->s[i]->hist;
 995             for (k = 0; k < hist->nhist; k++)
 996             {
 997                 double hdx       = hist->dx[k];
 998                 double xmin_hist = hist->x0[k]*hdx;
 999                 for (j = 0; j < hist->nbin[k]; j++)
1000                 {
1001                     /* calculate the bin corresponding to the middle of the
1002                        original bin */
1003                     double x     = hdx*(j+0.5) + xmin_hist;
1004                     int    binnr = (int)((x-(*xmin))/(*dx));
1005
1006                     if (binnr >= *nbin || binnr < 0)
1007                     {
1008                         binnr = (*nbin)-1;
1009                     }
1010
1011                     (*bin)[binnr] += hist->bin[k][j];
1012                 }
1013             }
1014         }
1015         else
1016         {
1017             int starti = sc->r[i].start;
1018             int endi   = sc->r[i].end;
1019             for (j = starti; j < endi; j++)
1020             {
1021                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1022                 if (binnr >= *nbin || binnr < 0)
1023                 {
1024                     binnr = (*nbin)-1;
1025                 }
1026
1027                 (*bin)[binnr]++;
1028             }
1029         }
1030     }
1031 }
1032
1033 /* write a collection of histograms to a file */
1034 void sim_data_histogram(sim_data_t *sd, const char *filename,
1035                         int nbin_default, const output_env_t oenv)
1036 {
1037     char           label_x[STRLEN];
1038     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1039     const char    *title   = "N(\\DeltaH)";
1040     const char    *label_y = "Samples";
1041     FILE          *fp;
1042     lambda_data_t *bl;
1043     int            nsets     = 0;
1044     char         **setnames  = NULL;
1045     gmx_bool       first_set = FALSE;
1046     /* histogram data: */
1047     int           *hist       = NULL;
1048     int            nbin       = 0;
1049     int            nbin_alloc = 0;
1050     double         dx         = 0;
1051     double         min        = 0;
1052     int            i;
1053     lambda_data_t *bl_head = sd->lb;
1054
1055     printf("\nWriting histogram to %s\n", filename);
1056     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1057
1058     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1059
1060     /* first get all the set names */
1061     bl = bl_head->next;
1062     /* iterate over all lambdas */
1063     while (bl != bl_head)
1064     {
1065         sample_coll_t *sc = bl->sc->next;
1066
1067         /* iterate over all samples */
1068         while (sc != bl->sc)
1069         {
1070             char buf[STRLEN], buf2[STRLEN];
1071
1072             nsets++;
1073             srenew(setnames, nsets);
1074             snew(setnames[nsets-1], STRLEN);
1075             if (sc->foreign_lambda->dhdl < 0)
1076             {
1077                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1078                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1079                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1080                         deltag, lambda, buf2, lambda, buf);
1081             }
1082             else
1083             {
1084                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1085                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1086                         dhdl, lambda, buf);
1087             }
1088             sc = sc->next;
1089         }
1090
1091         bl = bl->next;
1092     }
1093     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1094
1095
1096     /* now make the histograms */
1097     bl = bl_head->next;
1098     /* iterate over all lambdas */
1099     while (bl != bl_head)
1100     {
1101         sample_coll_t *sc = bl->sc->next;
1102
1103         /* iterate over all samples */
1104         while (sc != bl->sc)
1105         {
1106             if (!first_set)
1107             {
1108                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1109             }
1110
1111             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1112                                   nbin_default);
1113
1114             for (i = 0; i < nbin; i++)
1115             {
1116                 double xmin = i*dx + min;
1117                 double xmax = (i+1)*dx + min;
1118
1119                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1120             }
1121
1122             first_set = FALSE;
1123             sc        = sc->next;
1124         }
1125
1126         bl = bl->next;
1127     }
1128
1129     if (hist)
1130     {
1131         sfree(hist);
1132     }
1133
1134     xvgrclose(fp);
1135 }
1136
1137 /* create a collection (array) of barres_t object given a ordered linked list
1138    of barlamda_t sample collections */
1139 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1140                                     gmx_bool use_dhdl)
1141 {
1142     lambda_data_t *bl;
1143     int            nlambda = 0;
1144     barres_t      *res;
1145     int            i;
1146     gmx_bool       dhdl    = FALSE;
1147     gmx_bool       first   = TRUE;
1148     lambda_data_t *bl_head = sd->lb;
1149
1150     /* first count the lambdas */
1151     bl = bl_head->next;
1152     while (bl != bl_head)
1153     {
1154         nlambda++;
1155         bl = bl->next;
1156     }
1157     snew(res, nlambda-1);
1158
1159     /* next put the right samples in the res */
1160     *nres = 0;
1161     bl    = bl_head->next->next; /* we start with the second one. */
1162     while (bl != bl_head)
1163     {
1164         sample_coll_t *sc, *scprev;
1165         barres_t      *br = &(res[*nres]);
1166         /* there is always a previous one. we search for that as a foreign
1167            lambda: */
1168         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1169         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1170
1171         barres_init(br);
1172
1173         if (use_dhdl)
1174         {
1175             /* we use dhdl */
1176
1177             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1178             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1179
1180             if (first)
1181             {
1182                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1183                 dhdl = TRUE;
1184             }
1185             if (!dhdl)
1186             {
1187                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1188             }
1189         }
1190         else if (!scprev && !sc)
1191         {
1192             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1193         }
1194
1195         /* normal delta H */
1196         if (!scprev)
1197         {
1198             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1199         }
1200         if (!sc)
1201         {
1202             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1203         }
1204         br->a = scprev;
1205         br->b = sc;
1206
1207         first = FALSE;
1208         (*nres)++;
1209         bl = bl->next;
1210     }
1211     return res;
1212 }
1213
1214 /* estimate the maximum discretization error */
1215 static double barres_list_max_disc_err(barres_t *res, int nres)
1216 {
1217     int    i, j;
1218     double disc_err = 0.;
1219     double delta_lambda;
1220
1221     for (i = 0; i < nres; i++)
1222     {
1223         barres_t *br = &(res[i]);
1224
1225         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1226                                            br->a->native_lambda);
1227
1228         for (j = 0; j < br->a->nsamples; j++)
1229         {
1230             if (br->a->s[j]->hist)
1231             {
1232                 double Wfac = 1.;
1233                 if (br->a->s[j]->derivative)
1234                 {
1235                     Wfac =  delta_lambda;
1236                 }
1237
1238                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1239             }
1240         }
1241         for (j = 0; j < br->b->nsamples; j++)
1242         {
1243             if (br->b->s[j]->hist)
1244             {
1245                 double Wfac = 1.;
1246                 if (br->b->s[j]->derivative)
1247                 {
1248                     Wfac =  delta_lambda;
1249                 }
1250                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1251             }
1252         }
1253     }
1254     return disc_err;
1255 }
1256
1257
1258 /* impose start and end times on a sample collection, updating sample_ranges */
1259 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1260                                      double end_t)
1261 {
1262     int i;
1263     for (i = 0; i < sc->nsamples; i++)
1264     {
1265         samples_t      *s = sc->s[i];
1266         sample_range_t *r = &(sc->r[i]);
1267         if (s->hist)
1268         {
1269             double end_time = s->hist->delta_time*s->hist->sum +
1270                 s->hist->start_time;
1271             if (s->hist->start_time < begin_t || end_time > end_t)
1272             {
1273                 r->use = FALSE;
1274             }
1275         }
1276         else
1277         {
1278             if (!s->t)
1279             {
1280                 double end_time;
1281                 if (s->start_time < begin_t)
1282                 {
1283                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1284                 }
1285                 end_time = s->delta_time*s->ndu + s->start_time;
1286                 if (end_time > end_t)
1287                 {
1288                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1289                 }
1290             }
1291             else
1292             {
1293                 int j;
1294                 for (j = 0; j < s->ndu; j++)
1295                 {
1296                     if (s->t[j] < begin_t)
1297                     {
1298                         r->start = j;
1299                     }
1300
1301                     if (s->t[j] >= end_t)
1302                     {
1303                         r->end = j;
1304                         break;
1305                     }
1306                 }
1307             }
1308             if (r->start > r->end)
1309             {
1310                 r->use = FALSE;
1311             }
1312         }
1313     }
1314     sample_coll_calc_ntot(sc);
1315 }
1316
1317 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1318 {
1319     double         first_t, last_t;
1320     double         begin_t, end_t;
1321     lambda_data_t *lc;
1322     lambda_data_t *head = sd->lb;
1323     int            j;
1324
1325     if (begin <= 0 && end < 0)
1326     {
1327         return;
1328     }
1329
1330     /* first determine the global start and end times */
1331     first_t = -1;
1332     last_t  = -1;
1333     lc      = head->next;
1334     while (lc != head)
1335     {
1336         sample_coll_t *sc = lc->sc->next;
1337         while (sc != lc->sc)
1338         {
1339             for (j = 0; j < sc->nsamples; j++)
1340             {
1341                 double start_t, end_t;
1342
1343                 start_t = sc->s[j]->start_time;
1344                 end_t   =   sc->s[j]->start_time;
1345                 if (sc->s[j]->hist)
1346                 {
1347                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1348                 }
1349                 else
1350                 {
1351                     if (sc->s[j]->t)
1352                     {
1353                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1354                     }
1355                     else
1356                     {
1357                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1358                     }
1359                 }
1360
1361                 if (start_t < first_t || first_t < 0)
1362                 {
1363                     first_t = start_t;
1364                 }
1365                 if (end_t > last_t)
1366                 {
1367                     last_t = end_t;
1368                 }
1369             }
1370             sc = sc->next;
1371         }
1372         lc = lc->next;
1373     }
1374
1375     /* calculate the actual times */
1376     if (begin > 0)
1377     {
1378         begin_t = begin;
1379     }
1380     else
1381     {
1382         begin_t = first_t;
1383     }
1384
1385     if (end > 0)
1386     {
1387         end_t = end;
1388     }
1389     else
1390     {
1391         end_t = last_t;
1392     }
1393     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1394
1395     if (begin_t > end_t)
1396     {
1397         return;
1398     }
1399     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1400
1401     /* then impose them */
1402     lc = head->next;
1403     while (lc != head)
1404     {
1405         sample_coll_t *sc = lc->sc->next;
1406         while (sc != lc->sc)
1407         {
1408             sample_coll_impose_times(sc, begin_t, end_t);
1409             sc = sc->next;
1410         }
1411         lc = lc->next;
1412     }
1413 }
1414
1415
1416 /* create subsample i out of ni from an existing sample_coll */
1417 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1418                                              sample_coll_t *sc_orig,
1419                                              int i, int ni)
1420 {
1421     int             j;
1422     int             hist_start, hist_end;
1423
1424     gmx_large_int_t ntot_start;
1425     gmx_large_int_t ntot_end;
1426     gmx_large_int_t ntot_so_far;
1427
1428     *sc = *sc_orig; /* just copy all fields */
1429
1430     /* allocate proprietary memory */
1431     snew(sc->s, sc_orig->nsamples);
1432     snew(sc->r, sc_orig->nsamples);
1433
1434     /* copy the samples */
1435     for (j = 0; j < sc_orig->nsamples; j++)
1436     {
1437         sc->s[j] = sc_orig->s[j];
1438         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1439     }
1440
1441     /* now fix start and end fields */
1442     /* the casts avoid possible overflows */
1443     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1444     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1445     ntot_so_far = 0;
1446     for (j = 0; j < sc->nsamples; j++)
1447     {
1448         gmx_large_int_t ntot_add;
1449         gmx_large_int_t new_start, new_end;
1450
1451         if (sc->r[j].use)
1452         {
1453             if (sc->s[j]->hist)
1454             {
1455                 ntot_add = sc->s[j]->hist->sum;
1456             }
1457             else
1458             {
1459                 ntot_add = sc->r[j].end - sc->r[j].start;
1460             }
1461         }
1462         else
1463         {
1464             ntot_add = 0;
1465         }
1466
1467         if (!sc->s[j]->hist)
1468         {
1469             if (ntot_so_far < ntot_start)
1470             {
1471                 /* adjust starting point */
1472                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1473             }
1474             else
1475             {
1476                 new_start = sc->r[j].start;
1477             }
1478             /* adjust end point */
1479             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1480             if (new_end > sc->r[j].end)
1481             {
1482                 new_end = sc->r[j].end;
1483             }
1484
1485             /* check if we're in range at all */
1486             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1487             {
1488                 new_start = 0;
1489                 new_end   = 0;
1490             }
1491             /* and write the new range */
1492             sc->r[j].start = (int)new_start;
1493             sc->r[j].end   = (int)new_end;
1494         }
1495         else
1496         {
1497             if (sc->r[j].use)
1498             {
1499                 double overlap;
1500                 double ntot_start_norm, ntot_end_norm;
1501                 /* calculate the amount of overlap of the
1502                    desired range (ntot_start -- ntot_end) onto
1503                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1504
1505                 /* first calculate normalized bounds
1506                    (where 0 is the start of the hist range, and 1 the end) */
1507                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1508                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1509
1510                 /* now fix the boundaries */
1511                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1512                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1513
1514                 /* and calculate the overlap */
1515                 overlap = ntot_end_norm - ntot_start_norm;
1516
1517                 if (overlap > 0.95) /* we allow for 5% slack */
1518                 {
1519                     sc->r[j].use = TRUE;
1520                 }
1521                 else if (overlap < 0.05)
1522                 {
1523                     sc->r[j].use = FALSE;
1524                 }
1525                 else
1526                 {
1527                     return FALSE;
1528                 }
1529             }
1530         }
1531         ntot_so_far += ntot_add;
1532     }
1533     sample_coll_calc_ntot(sc);
1534
1535     return TRUE;
1536 }
1537
1538 /* calculate minimum and maximum work values in sample collection */
1539 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1540                                 double *Wmin, double *Wmax)
1541 {
1542     int i, j;
1543
1544     *Wmin = FLT_MAX;
1545     *Wmax = -FLT_MAX;
1546
1547     for (i = 0; i < sc->nsamples; i++)
1548     {
1549         samples_t      *s = sc->s[i];
1550         sample_range_t *r = &(sc->r[i]);
1551         if (r->use)
1552         {
1553             if (!s->hist)
1554             {
1555                 for (j = r->start; j < r->end; j++)
1556                 {
1557                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1558                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1559                 }
1560             }
1561             else
1562             {
1563                 int    hd = 0; /* determine the histogram direction: */
1564                 double dx;
1565                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1566                 {
1567                     hd = 1;
1568                 }
1569                 dx = s->hist->dx[hd];
1570
1571                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1572                 {
1573                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1574                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1575                     /* look for the highest value bin with values */
1576                     if (s->hist->bin[hd][j] > 0)
1577                     {
1578                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1579                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1580                         break;
1581                     }
1582                 }
1583             }
1584         }
1585     }
1586 }
1587
1588 /* Initialize a sim_data structure */
1589 static void sim_data_init(sim_data_t *sd)
1590 {
1591     /* make linked list */
1592     sd->lb       = &(sd->lb_head);
1593     sd->lb->next = sd->lb;
1594     sd->lb->prev = sd->lb;
1595
1596     lambda_components_init(&(sd->lc));
1597 }
1598
1599
1600 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1601 {
1602     int    i;
1603     double sum;
1604
1605     sum = 0;
1606
1607     for (i = 0; i < n; i++)
1608     {
1609         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1610     }
1611
1612     return sum;
1613 }
1614
1615 /* calculate the BAR average given a histogram
1616
1617     if type== 0, calculate the best estimate for the average,
1618     if type==-1, calculate the minimum possible value given the histogram
1619     if type== 1, calculate the maximum possible value given the histogram */
1620 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1621                                 int type)
1622 {
1623     double sum = 0.;
1624     int    i;
1625     int    max;
1626     /* normalization factor multiplied with bin width and
1627        number of samples (we normalize through M): */
1628     double normdx = 1.;
1629     int    hd     = 0; /* determine the histogram direction: */
1630     double dx;
1631
1632     if ( (hist->nhist > 1) && (Wfac < 0) )
1633     {
1634         hd = 1;
1635     }
1636     dx  = hist->dx[hd];
1637     max = hist->nbin[hd]-1;
1638     if (type == 1)
1639     {
1640         max = hist->nbin[hd]; /* we also add whatever was out of range */
1641     }
1642
1643     for (i = 0; i < max; i++)
1644     {
1645         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1646         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1647
1648         sum += pxdx/(1. + exp(x + sbMmDG));
1649     }
1650
1651     return sum;
1652 }
1653
1654 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1655                                 double temp, double tol, int type)
1656 {
1657     double kT, beta, M;
1658     double DG;
1659     int    i, j;
1660     double Wfac1, Wfac2, Wmin, Wmax;
1661     double DG0, DG1, DG2, dDG1;
1662     double sum1, sum2;
1663     double n1, n2; /* numbers of samples as doubles */
1664
1665     kT   = BOLTZ*temp;
1666     beta = 1/kT;
1667
1668     /* count the numbers of samples */
1669     n1 = ca->ntot;
1670     n2 = cb->ntot;
1671
1672     M = log(n1/n2);
1673
1674     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1675     if (ca->foreign_lambda->dhdl < 0)
1676     {
1677         /* this is the case when the delta U were calculated directly
1678            (i.e. we're not scaling dhdl) */
1679         Wfac1 = beta;
1680         Wfac2 = beta;
1681     }
1682     else
1683     {
1684         /* we're using dhdl, so delta_lambda needs to be a
1685            multiplication factor.  */
1686         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1687         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1688                                                   ca->native_lambda);
1689         if (cb->native_lambda->lc->N > 1)
1690         {
1691             gmx_fatal(FARGS,
1692                       "Can't (yet) do multi-component dhdl interpolation");
1693         }
1694
1695         Wfac1 =  beta*delta_lambda;
1696         Wfac2 = -beta*delta_lambda;
1697     }
1698
1699     if (beta < 1)
1700     {
1701         /* We print the output both in kT and kJ/mol.
1702          * Here we determine DG in kT, so when beta < 1
1703          * the precision has to be increased.
1704          */
1705         tol *= beta;
1706     }
1707
1708     /* Calculate minimum and maximum work to give an initial estimate of
1709      * delta G  as their average.
1710      */
1711     {
1712         double Wmin1, Wmin2, Wmax1, Wmax2;
1713         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1714         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1715
1716         Wmin = min(Wmin1, Wmin2);
1717         Wmax = max(Wmax1, Wmax2);
1718     }
1719
1720     DG0 = Wmin;
1721     DG2 = Wmax;
1722
1723     if (debug)
1724     {
1725         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1726     }
1727     /* We approximate by bisection: given our initial estimates
1728        we keep checking whether the halfway point is greater or
1729        smaller than what we get out of the BAR averages.
1730
1731        For the comparison we can use twice the tolerance. */
1732     while (DG2 - DG0 > 2*tol)
1733     {
1734         DG1 = 0.5*(DG0 + DG2);
1735
1736         /* calculate the BAR averages */
1737         dDG1 = 0.;
1738
1739         for (i = 0; i < ca->nsamples; i++)
1740         {
1741             samples_t      *s = ca->s[i];
1742             sample_range_t *r = &(ca->r[i]);
1743             if (r->use)
1744             {
1745                 if (s->hist)
1746                 {
1747                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1748                 }
1749                 else
1750                 {
1751                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1752                                          Wfac1, (M-DG1));
1753                 }
1754             }
1755         }
1756         for (i = 0; i < cb->nsamples; i++)
1757         {
1758             samples_t      *s = cb->s[i];
1759             sample_range_t *r = &(cb->r[i]);
1760             if (r->use)
1761             {
1762                 if (s->hist)
1763                 {
1764                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1765                 }
1766                 else
1767                 {
1768                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1769                                          Wfac2, -(M-DG1));
1770                 }
1771             }
1772         }
1773
1774         if (dDG1 < 0)
1775         {
1776             DG0 = DG1;
1777         }
1778         else
1779         {
1780             DG2 = DG1;
1781         }
1782         if (debug)
1783         {
1784             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1785         }
1786     }
1787
1788     return 0.5*(DG0 + DG2);
1789 }
1790
1791 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1792                              double temp, double dg, double *sa, double *sb)
1793 {
1794     int    i, j;
1795     double W_ab = 0.;
1796     double W_ba = 0.;
1797     double kT, beta;
1798     double Wfac1, Wfac2;
1799     double n1, n2;
1800
1801     kT   = BOLTZ*temp;
1802     beta = 1/kT;
1803
1804     /* count the numbers of samples */
1805     n1 = ca->ntot;
1806     n2 = cb->ntot;
1807
1808     /* to ensure the work values are the same as during the delta_G */
1809     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1810     if (ca->foreign_lambda->dhdl < 0)
1811     {
1812         /* this is the case when the delta U were calculated directly
1813            (i.e. we're not scaling dhdl) */
1814         Wfac1 = beta;
1815         Wfac2 = beta;
1816     }
1817     else
1818     {
1819         /* we're using dhdl, so delta_lambda needs to be a
1820            multiplication factor.  */
1821         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1822                                                   ca->native_lambda);
1823         Wfac1 =  beta*delta_lambda;
1824         Wfac2 = -beta*delta_lambda;
1825     }
1826
1827     /* first calculate the average work in both directions */
1828     for (i = 0; i < ca->nsamples; i++)
1829     {
1830         samples_t      *s = ca->s[i];
1831         sample_range_t *r = &(ca->r[i]);
1832         if (r->use)
1833         {
1834             if (!s->hist)
1835             {
1836                 for (j = r->start; j < r->end; j++)
1837                 {
1838                     W_ab += Wfac1*s->du[j];
1839                 }
1840             }
1841             else
1842             {
1843                 /* normalization factor multiplied with bin width and
1844                    number of samples (we normalize through M): */
1845                 double normdx = 1.;
1846                 double dx;
1847                 int    hd = 0; /* histogram direction */
1848                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1849                 {
1850                     hd = 1;
1851                 }
1852                 dx = s->hist->dx[hd];
1853
1854                 for (j = 0; j < s->hist->nbin[0]; j++)
1855                 {
1856                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1857                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1858                     W_ab += pxdx*x;
1859                 }
1860             }
1861         }
1862     }
1863     W_ab /= n1;
1864
1865     for (i = 0; i < cb->nsamples; i++)
1866     {
1867         samples_t      *s = cb->s[i];
1868         sample_range_t *r = &(cb->r[i]);
1869         if (r->use)
1870         {
1871             if (!s->hist)
1872             {
1873                 for (j = r->start; j < r->end; j++)
1874                 {
1875                     W_ba += Wfac1*s->du[j];
1876                 }
1877             }
1878             else
1879             {
1880                 /* normalization factor multiplied with bin width and
1881                    number of samples (we normalize through M): */
1882                 double normdx = 1.;
1883                 double dx;
1884                 int    hd = 0; /* histogram direction */
1885                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1886                 {
1887                     hd = 1;
1888                 }
1889                 dx = s->hist->dx[hd];
1890
1891                 for (j = 0; j < s->hist->nbin[0]; j++)
1892                 {
1893                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1894                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1895                     W_ba += pxdx*x;
1896                 }
1897             }
1898         }
1899     }
1900     W_ba /= n2;
1901
1902     /* then calculate the relative entropies */
1903     *sa = (W_ab - dg);
1904     *sb = (W_ba + dg);
1905 }
1906
1907 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1908                            double temp, double dg, double *stddev)
1909 {
1910     int    i, j;
1911     double M;
1912     double sigmafact = 0.;
1913     double kT, beta;
1914     double Wfac1, Wfac2;
1915     double n1, n2;
1916
1917     kT   = BOLTZ*temp;
1918     beta = 1/kT;
1919
1920     /* count the numbers of samples */
1921     n1 = ca->ntot;
1922     n2 = cb->ntot;
1923
1924     /* to ensure the work values are the same as during the delta_G */
1925     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1926     if (ca->foreign_lambda->dhdl < 0)
1927     {
1928         /* this is the case when the delta U were calculated directly
1929            (i.e. we're not scaling dhdl) */
1930         Wfac1 = beta;
1931         Wfac2 = beta;
1932     }
1933     else
1934     {
1935         /* we're using dhdl, so delta_lambda needs to be a
1936            multiplication factor.  */
1937         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1938                                                   ca->native_lambda);
1939         Wfac1 =  beta*delta_lambda;
1940         Wfac2 = -beta*delta_lambda;
1941     }
1942
1943     M = log(n1/n2);
1944
1945
1946     /* calculate average in both directions */
1947     for (i = 0; i < ca->nsamples; i++)
1948     {
1949         samples_t      *s = ca->s[i];
1950         sample_range_t *r = &(ca->r[i]);
1951         if (r->use)
1952         {
1953             if (!s->hist)
1954             {
1955                 for (j = r->start; j < r->end; j++)
1956                 {
1957                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1958                 }
1959             }
1960             else
1961             {
1962                 /* normalization factor multiplied with bin width and
1963                    number of samples (we normalize through M): */
1964                 double normdx = 1.;
1965                 double dx;
1966                 int    hd = 0; /* histogram direction */
1967                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1968                 {
1969                     hd = 1;
1970                 }
1971                 dx = s->hist->dx[hd];
1972
1973                 for (j = 0; j < s->hist->nbin[0]; j++)
1974                 {
1975                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1976                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1977
1978                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1979                 }
1980             }
1981         }
1982     }
1983     for (i = 0; i < cb->nsamples; i++)
1984     {
1985         samples_t      *s = cb->s[i];
1986         sample_range_t *r = &(cb->r[i]);
1987         if (r->use)
1988         {
1989             if (!s->hist)
1990             {
1991                 for (j = r->start; j < r->end; j++)
1992                 {
1993                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1994                 }
1995             }
1996             else
1997             {
1998                 /* normalization factor multiplied with bin width and
1999                    number of samples (we normalize through M): */
2000                 double normdx = 1.;
2001                 double dx;
2002                 int    hd = 0; /* histogram direction */
2003                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2004                 {
2005                     hd = 1;
2006                 }
2007                 dx = s->hist->dx[hd];
2008
2009                 for (j = 0; j < s->hist->nbin[0]; j++)
2010                 {
2011                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2012                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2013
2014                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2015                 }
2016             }
2017         }
2018     }
2019
2020     sigmafact /= (n1 + n2);
2021
2022
2023     /* Eq. 10 from
2024        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2025     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2026 }
2027
2028
2029
2030 static void calc_bar(barres_t *br, double tol,
2031                      int npee_min, int npee_max, gmx_bool *bEE,
2032                      double *partsum)
2033 {
2034     int      npee, p;
2035     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2036                                                         for calculated quantities */
2037     int      nsample1, nsample2;
2038     double   temp = br->a->temp;
2039     int      i, j;
2040     double   dg_min, dg_max;
2041     gmx_bool have_hist = FALSE;
2042
2043     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2044
2045     br->dg_disc_err      = 0.;
2046     br->dg_histrange_err = 0.;
2047
2048     /* check if there are histograms */
2049     for (i = 0; i < br->a->nsamples; i++)
2050     {
2051         if (br->a->r[i].use && br->a->s[i]->hist)
2052         {
2053             have_hist = TRUE;
2054             break;
2055         }
2056     }
2057     if (!have_hist)
2058     {
2059         for (i = 0; i < br->b->nsamples; i++)
2060         {
2061             if (br->b->r[i].use && br->b->s[i]->hist)
2062             {
2063                 have_hist = TRUE;
2064                 break;
2065             }
2066         }
2067     }
2068
2069     /* calculate histogram-specific errors */
2070     if (have_hist)
2071     {
2072         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2073         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2074
2075         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2076         {
2077             /* the histogram range  error is the biggest of the differences
2078                between the best estimate and the extremes */
2079             br->dg_histrange_err = fabs(dg_max - dg_min);
2080         }
2081         br->dg_disc_err = 0.;
2082         for (i = 0; i < br->a->nsamples; i++)
2083         {
2084             if (br->a->s[i]->hist)
2085             {
2086                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2087             }
2088         }
2089         for (i = 0; i < br->b->nsamples; i++)
2090         {
2091             if (br->b->s[i]->hist)
2092             {
2093                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2094             }
2095         }
2096     }
2097     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2098
2099     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2100
2101     dg_sig2     = 0;
2102     sa_sig2     = 0;
2103     sb_sig2     = 0;
2104     stddev_sig2 = 0;
2105
2106     *bEE = TRUE;
2107     {
2108         sample_coll_t ca, cb;
2109
2110         /* initialize the samples */
2111         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2112                          br->a->temp);
2113         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2114                          br->b->temp);
2115
2116         for (npee = npee_min; npee <= npee_max; npee++)
2117         {
2118             double dgs      = 0;
2119             double dgs2     = 0;
2120             double dsa      = 0;
2121             double dsb      = 0;
2122             double dsa2     = 0;
2123             double dsb2     = 0;
2124             double dstddev  = 0;
2125             double dstddev2 = 0;
2126
2127
2128             for (p = 0; p < npee; p++)
2129             {
2130                 double   dgp;
2131                 double   stddevc;
2132                 double   sac, sbc;
2133                 gmx_bool cac, cbc;
2134
2135                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2136                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2137
2138                 if (!cac || !cbc)
2139                 {
2140                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2141                     *bEE = FALSE;
2142                     if (cac)
2143                     {
2144                         sample_coll_destroy(&ca);
2145                     }
2146                     if (cbc)
2147                     {
2148                         sample_coll_destroy(&cb);
2149                     }
2150                     return;
2151                 }
2152
2153                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2154                 dgs  += dgp;
2155                 dgs2 += dgp*dgp;
2156
2157                 partsum[npee*(npee_max+1)+p] += dgp;
2158
2159                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2160                 dsa  += sac;
2161                 dsa2 += sac*sac;
2162                 dsb  += sbc;
2163                 dsb2 += sbc*sbc;
2164                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2165
2166                 dstddev  += stddevc;
2167                 dstddev2 += stddevc*stddevc;
2168
2169                 sample_coll_destroy(&ca);
2170                 sample_coll_destroy(&cb);
2171             }
2172             dgs     /= npee;
2173             dgs2    /= npee;
2174             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2175
2176             dsa     /= npee;
2177             dsa2    /= npee;
2178             dsb     /= npee;
2179             dsb2    /= npee;
2180             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2181             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2182
2183             dstddev     /= npee;
2184             dstddev2    /= npee;
2185             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2186         }
2187         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2188         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2189         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2190         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2191     }
2192 }
2193
2194
2195 static double bar_err(int nbmin, int nbmax, const double *partsum)
2196 {
2197     int    nb, b;
2198     double svar, s, s2, dg;
2199
2200     svar = 0;
2201     for (nb = nbmin; nb <= nbmax; nb++)
2202     {
2203         s  = 0;
2204         s2 = 0;
2205         for (b = 0; b < nb; b++)
2206         {
2207             dg  = partsum[nb*(nbmax+1)+b];
2208             s  += dg;
2209             s2 += dg*dg;
2210         }
2211         s    /= nb;
2212         s2   /= nb;
2213         svar += (s2 - s*s)/(nb - 1);
2214     }
2215
2216     return sqrt(svar/(nbmax + 1 - nbmin));
2217 }
2218
2219
2220 /* Seek the end of an identifier (consecutive non-spaces), followed by
2221    an optional number of spaces or '='-signs. Returns a pointer to the
2222    first non-space value found after that. Returns NULL if the string
2223    ends before that.
2224  */
2225 static const char *find_value(const char *str)
2226 {
2227     gmx_bool name_end_found = FALSE;
2228
2229     /* if the string is a NULL pointer, return a NULL pointer. */
2230     if (str == NULL)
2231     {
2232         return NULL;
2233     }
2234     while (*str != '\0')
2235     {
2236         /* first find the end of the name */
2237         if (!name_end_found)
2238         {
2239             if (isspace(*str) || (*str == '=') )
2240             {
2241                 name_end_found = TRUE;
2242             }
2243         }
2244         else
2245         {
2246             if (!( isspace(*str) || (*str == '=') ))
2247             {
2248                 return str;
2249             }
2250         }
2251         str++;
2252     }
2253     return NULL;
2254 }
2255
2256
2257
2258 /* read a vector-notation description of a lambda vector */
2259 static gmx_bool read_lambda_compvec(const char                *str,
2260                                     lambda_vec_t              *lv,
2261                                     const lambda_components_t *lc_in,
2262                                     lambda_components_t       *lc_out,
2263                                     const char               **end,
2264                                     const char                *fn)
2265 {
2266     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2267                                           components, or to check them */
2268     gmx_bool    start_reached = FALSE; /* whether the start of component names
2269                                           has been reached */
2270     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2271     int         n             = 0;     /* current component number */
2272     const char *val_start     = NULL;  /* start of the component name, or NULL
2273                                           if not in a value */
2274     char       *strtod_end;
2275     gmx_bool    OK = TRUE;
2276
2277     if (end)
2278     {
2279         *end = str;
2280     }
2281
2282
2283     if (lc_out && lc_out->N == 0)
2284     {
2285         initialize_lc = TRUE;
2286     }
2287
2288     if (lc_in == NULL)
2289     {
2290         lc_in = lc_out;
2291     }
2292
2293     while (1)
2294     {
2295         if (!start_reached)
2296         {
2297             if (isalnum(*str))
2298             {
2299                 vector        = FALSE;
2300                 start_reached = TRUE;
2301                 val_start     = str;
2302             }
2303             else if (*str == '(')
2304             {
2305                 vector        = TRUE;
2306                 start_reached = TRUE;
2307             }
2308             else if (!isspace(*str))
2309             {
2310                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2311             }
2312         }
2313         else
2314         {
2315             if (val_start)
2316             {
2317                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2318                 {
2319                     /* end of value */
2320                     if (lv == NULL)
2321                     {
2322                         if (initialize_lc)
2323                         {
2324                             lambda_components_add(lc_out, val_start,
2325                                                   (str-val_start));
2326                         }
2327                         else
2328                         {
2329                             if (!lambda_components_check(lc_out, n, val_start,
2330                                                          (str-val_start)))
2331                             {
2332                                 return FALSE;
2333                             }
2334                         }
2335                     }
2336                     else
2337                     {
2338                         /* add a vector component to lv */
2339                         lv->val[n] = strtod(val_start, &strtod_end);
2340                         if (val_start == strtod_end)
2341                         {
2342                             gmx_fatal(FARGS,
2343                                       "Error reading lambda vector in %s", fn);
2344                         }
2345                     }
2346                     /* reset for the next identifier */
2347                     val_start = NULL;
2348                     n++;
2349                     if (!vector)
2350                     {
2351                         return OK;
2352                     }
2353                 }
2354             }
2355             else if (isalnum(*str))
2356             {
2357                 val_start = str;
2358             }
2359             if (*str == ')')
2360             {
2361                 str++;
2362                 if (end)
2363                 {
2364                     *end = str;
2365                 }
2366                 if (!vector)
2367                 {
2368                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2369                 }
2370                 else
2371                 {
2372                     if (n == lc_in->N)
2373                     {
2374                         return OK;
2375                     }
2376                     else if (lv == NULL)
2377                     {
2378                         return FALSE;
2379                     }
2380                     else
2381                     {
2382                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2383                                   fn);
2384                         return FALSE;
2385                     }
2386
2387                 }
2388             }
2389         }
2390         if (*str == '\0')
2391         {
2392             break;
2393         }
2394         str++;
2395         if (end)
2396         {
2397             *end = str;
2398         }
2399     }
2400     if (vector)
2401     {
2402         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2403         return FALSE;
2404     }
2405     return OK;
2406 }
2407
2408 /* read and check the component names from a string */
2409 static gmx_bool read_lambda_components(const char          *str,
2410                                        lambda_components_t *lc,
2411                                        const char         **end,
2412                                        const char          *fn)
2413 {
2414     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2415 }
2416
2417 /* read an initialized lambda vector from a string */
2418 static gmx_bool read_lambda_vector(const char   *str,
2419                                    lambda_vec_t *lv,
2420                                    const char  **end,
2421                                    const char   *fn)
2422 {
2423     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2424 }
2425
2426
2427
2428 /* deduce lambda value from legend.
2429     fn = the file name
2430     legend = the legend string
2431     ba = the xvg data
2432     lam = the initialized lambda vector
2433    returns whether to use the data in this set.
2434  */
2435 static gmx_bool legend2lambda(const char   *fn,
2436                               const char   *legend,
2437                               xvg_t        *ba,
2438                               lambda_vec_t *lam)
2439 {
2440     double        lambda = 0;
2441     const char   *ptr    = NULL, *ptr2 = NULL;
2442     gmx_bool      ok     = FALSE;
2443     gmx_bool      bdhdl  = FALSE;
2444     const char   *tostr  = " to ";
2445
2446     if (legend == NULL)
2447     {
2448         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2449     }
2450
2451     /* look for the last 'to': */
2452     ptr2 = legend;
2453     do
2454     {
2455         ptr2 = strstr(ptr2, tostr);
2456         if (ptr2 != NULL)
2457         {
2458             ptr = ptr2;
2459             ptr2++;
2460         }
2461     }
2462     while (ptr2 != NULL && *ptr2 != '\0');
2463
2464     if (ptr)
2465     {
2466         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2467     }
2468     else
2469     {
2470         /* look for the = sign */
2471         ptr = strrchr(legend, '=');
2472         if (!ptr)
2473         {
2474             /* otherwise look for the last space */
2475             ptr = strrchr(legend, ' ');
2476         }
2477     }
2478
2479     if (strstr(legend, "dH"))
2480     {
2481         ok    = TRUE;
2482         bdhdl = TRUE;
2483     }
2484     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2485     {
2486         ok    = TRUE;
2487         bdhdl = FALSE;
2488     }
2489     else /*if (strstr(legend, "pV"))*/
2490     {
2491         return FALSE;
2492     }
2493     if (!ptr)
2494     {
2495         ok = FALSE;
2496     }
2497
2498     if (!ok)
2499     {
2500         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2501     }
2502     if (!bdhdl)
2503     {
2504         ptr = find_value(ptr);
2505         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2506         {
2507             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2508         }
2509     }
2510     else
2511     {
2512         int         dhdl_index;
2513         const char *end;
2514         char        buf[STRLEN];
2515
2516         ptr = strrchr(legend, '=');
2517         end = ptr;
2518         if (ptr)
2519         {
2520             /* there must be a component name */
2521             ptr--;
2522             if (ptr < legend)
2523             {
2524                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2525             }
2526             /* now backtrack to the start of the identifier */
2527             while (isspace(*ptr))
2528             {
2529                 end = ptr;
2530                 ptr--;
2531                 if (ptr < legend)
2532                 {
2533                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2534                 }
2535             }
2536             while (!isspace(*ptr))
2537             {
2538                 ptr--;
2539                 if (ptr < legend)
2540                 {
2541                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2542                 }
2543             }
2544             ptr++;
2545             strncpy(buf, ptr, (end-ptr));
2546             buf[(end-ptr)] = '\0';
2547             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2548             if (dhdl_index < 0)
2549             {
2550                 char buf[STRLEN];
2551                 strncpy(buf, ptr, (end-ptr));
2552                 buf[(end-ptr)] = '\0';
2553                 gmx_fatal(FARGS,
2554                           "Did not find lambda component for '%s' in %s",
2555                           buf, fn);
2556             }
2557         }
2558         else
2559         {
2560             if (lam->lc->N > 1)
2561             {
2562                 gmx_fatal(FARGS,
2563                           "dhdl without component name with >1 lambda component in %s",
2564                           fn);
2565             }
2566             dhdl_index = 0;
2567         }
2568         lam->dhdl = dhdl_index;
2569     }
2570     return TRUE;
2571 }
2572
2573 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2574                                 lambda_components_t *lc)
2575 {
2576     gmx_bool    bFound;
2577     const char *ptr;
2578     char       *end;
2579     double      native_lambda;
2580
2581     bFound = FALSE;
2582
2583     /* first check for a state string */
2584     ptr = strstr(subtitle, "state");
2585     if (ptr)
2586     {
2587         int         index = -1;
2588         const char *val_end;
2589
2590         /* the new 4.6 style lambda vectors */
2591         ptr = find_value(ptr);
2592         if (ptr)
2593         {
2594             index = strtol(ptr, &end, 10);
2595             if (ptr == end)
2596             {
2597                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2598                 return FALSE;
2599             }
2600             ptr = end;
2601         }
2602         else
2603         {
2604             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2605             return FALSE;
2606         }
2607         /* now find the lambda vector component names */
2608         while (*ptr != '(' && !isalnum(*ptr))
2609         {
2610             ptr++;
2611             if (*ptr == '\0')
2612             {
2613                 gmx_fatal(FARGS,
2614                           "Incomplete lambda vector component data in %s", fn);
2615                 return FALSE;
2616             }
2617         }
2618         val_end = ptr;
2619         if (!read_lambda_components(ptr, lc, &val_end, fn))
2620         {
2621             gmx_fatal(FARGS,
2622                       "lambda vector components in %s don't match those previously read",
2623                       fn);
2624         }
2625         ptr = find_value(val_end);
2626         if (!ptr)
2627         {
2628             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2629             return FALSE;
2630         }
2631         lambda_vec_init(&(ba->native_lambda), lc);
2632         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2633         {
2634             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2635         }
2636         ba->native_lambda.index = index;
2637         bFound                  = TRUE;
2638     }
2639     else
2640     {
2641         /* compatibility mode: check for lambda in other ways. */
2642         /* plain text lambda string */
2643         ptr = strstr(subtitle, "lambda");
2644         if (ptr == NULL)
2645         {
2646             /* xmgrace formatted lambda string */
2647             ptr = strstr(subtitle, "\\xl\\f{}");
2648         }
2649         if (ptr == NULL)
2650         {
2651             /* xmgr formatted lambda string */
2652             ptr = strstr(subtitle, "\\8l\\4");
2653         }
2654         if (ptr != NULL)
2655         {
2656             ptr = strstr(ptr, "=");
2657         }
2658         if (ptr != NULL)
2659         {
2660             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2661             /* add the lambda component name as an empty string */
2662             if (lc->N > 0)
2663             {
2664                 if (!lambda_components_check(lc, 0, "", 0))
2665                 {
2666                     gmx_fatal(FARGS,
2667                               "lambda vector components in %s don't match those previously read",
2668                               fn);
2669                 }
2670             }
2671             else
2672             {
2673                 lambda_components_add(lc, "", 0);
2674             }
2675             lambda_vec_init(&(ba->native_lambda), lc);
2676             ba->native_lambda.val[0] = native_lambda;
2677         }
2678     }
2679
2680     return bFound;
2681 }
2682
2683 static void filename2lambda(const char *fn, xvg_t *ba)
2684 {
2685     double        lambda;
2686     const char   *ptr, *digitptr;
2687     char         *endptr;
2688     int           dirsep;
2689     ptr = fn;
2690     /* go to the end of the path string and search backward to find the last
2691        directory in the path which has to contain the value of lambda
2692      */
2693     while (ptr[1] != '\0')
2694     {
2695         ptr++;
2696     }
2697     /* searching backward to find the second directory separator */
2698     dirsep   = 0;
2699     digitptr = NULL;
2700     while (ptr >= fn)
2701     {
2702         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2703         {
2704             if (dirsep == 1)
2705             {
2706                 break;
2707             }
2708             dirsep++;
2709         }
2710         /* save the last position of a digit between the last two
2711            separators = in the last dirname */
2712         if (dirsep > 0 && isdigit(*ptr))
2713         {
2714             digitptr = ptr;
2715         }
2716         ptr--;
2717     }
2718     if (!digitptr)
2719     {
2720         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2721                   " last directory in the path '%s' does not contain a number", fn);
2722     }
2723     if (digitptr[-1] == '-')
2724     {
2725         digitptr--;
2726     }
2727     lambda = strtod(digitptr, &endptr);
2728     if (endptr == digitptr)
2729     {
2730         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2731     }
2732 }
2733
2734 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2735                                   lambda_components_t *lc)
2736 {
2737     int          i;
2738     char        *subtitle, **legend, *ptr;
2739     int          np;
2740     gmx_bool     native_lambda_read = FALSE;
2741     char         buf[STRLEN];
2742     lambda_vec_t lv;
2743
2744     xvg_init(ba);
2745
2746     ba->filename = fn;
2747
2748     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2749     if (!ba->y)
2750     {
2751         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2752     }
2753     /* Reorder the data */
2754     ba->t  = ba->y[0];
2755     for (i = 1; i < ba->nset; i++)
2756     {
2757         ba->y[i-1] = ba->y[i];
2758     }
2759     ba->nset--;
2760
2761     snew(ba->np, ba->nset);
2762     for (i = 0; i < ba->nset; i++)
2763     {
2764         ba->np[i] = np;
2765     }
2766
2767     ba->temp = -1;
2768     if (subtitle != NULL)
2769     {
2770         /* try to extract temperature */
2771         ptr = strstr(subtitle, "T =");
2772         if (ptr != NULL)
2773         {
2774             ptr += 3;
2775             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2776             {
2777                 if (ba->temp <= 0)
2778                 {
2779                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2780                               ba->temp, fn);
2781                 }
2782             }
2783         }
2784     }
2785     if (ba->temp < 0)
2786     {
2787         if (*temp <= 0)
2788         {
2789             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2790         }
2791         ba->temp = *temp;
2792     }
2793
2794     /* Try to deduce lambda from the subtitle */
2795     if (subtitle)
2796     {
2797         if (subtitle2lambda(subtitle, ba, fn, lc))
2798         {
2799             native_lambda_read = TRUE;
2800         }
2801     }
2802     snew(ba->lambda, ba->nset);
2803     if (legend == NULL)
2804     {
2805         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2806         if (ba->nset == 1)
2807         {
2808             if (!native_lambda_read)
2809             {
2810                 /* Deduce lambda from the file name */
2811                 filename2lambda(fn, ba);
2812                 native_lambda_read = TRUE;
2813             }
2814             ba->lambda[0] = ba->native_lambda;
2815         }
2816         else
2817         {
2818             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2819         }
2820     }
2821     else
2822     {
2823         for (i = 0; i < ba->nset; )
2824         {
2825             gmx_bool use = FALSE;
2826             /* Read lambda from the legend */
2827             lambda_vec_init( &(ba->lambda[i]), lc );
2828             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2829             use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2830             if (use)
2831             {
2832                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2833                 i++;
2834             }
2835             else
2836             {
2837                 int j;
2838                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2839                 for (j = i+1; j < ba->nset; j++)
2840                 {
2841                     ba->y[j-1]  = ba->y[j];
2842                     legend[j-1] = legend[j];
2843                 }
2844                 ba->nset--;
2845             }
2846         }
2847     }
2848
2849     if (!native_lambda_read)
2850     {
2851         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2852     }
2853
2854     if (legend != NULL)
2855     {
2856         for (i = 0; i < ba->nset-1; i++)
2857         {
2858             sfree(legend[i]);
2859         }
2860         sfree(legend);
2861     }
2862 }
2863
2864 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2865 {
2866     xvg_t     *barsim;
2867     samples_t *s;
2868     int        i;
2869     double    *lambda;
2870
2871     snew(barsim, 1);
2872
2873     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2874
2875     if (barsim->nset < 1)
2876     {
2877         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2878     }
2879
2880     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2881     {
2882         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2883     }
2884     *temp = barsim->temp;
2885
2886     /* now create a series of samples_t */
2887     snew(s, barsim->nset);
2888     for (i = 0; i < barsim->nset; i++)
2889     {
2890         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2891                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2892                                                    &(barsim->lambda[i])),
2893                      fn);
2894         s[i].du  = barsim->y[i];
2895         s[i].ndu = barsim->np[i];
2896         s[i].t   = barsim->t;
2897
2898         lambda_data_list_insert_sample(sd->lb, s+i);
2899     }
2900     {
2901         char buf[STRLEN];
2902
2903         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2904         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2905                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2906         for (i = 0; i < barsim->nset; i++)
2907         {
2908             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2909             printf("        %s (%d pts)\n", buf, s[i].ndu);
2910         }
2911     }
2912     printf("\n\n");
2913 }
2914
2915 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2916                                  double start_time, double delta_time,
2917                                  lambda_vec_t *native_lambda, double temp,
2918                                  double *last_t, const char *filename)
2919 {
2920     int           i, j;
2921     gmx_bool      allocated;
2922     double        old_foreign_lambda;
2923     lambda_vec_t *foreign_lambda;
2924     int           type;
2925     samples_t    *s; /* convenience pointer */
2926     int           startj;
2927
2928     /* check the block types etc. */
2929     if ( (blk->nsub < 3) ||
2930          (blk->sub[0].type != xdr_datatype_int) ||
2931          (blk->sub[1].type != xdr_datatype_double) ||
2932          (
2933              (blk->sub[2].type != xdr_datatype_float) &&
2934              (blk->sub[2].type != xdr_datatype_double)
2935          ) ||
2936          (blk->sub[0].nr < 1) ||
2937          (blk->sub[1].nr < 1) )
2938     {
2939         gmx_fatal(FARGS,
2940                   "Unexpected/corrupted block data in file %s around time %f.",
2941                   filename, start_time);
2942     }
2943
2944     snew(foreign_lambda, 1);
2945     lambda_vec_init(foreign_lambda, native_lambda->lc);
2946     lambda_vec_copy(foreign_lambda, native_lambda);
2947     type = blk->sub[0].ival[0];
2948     if (type == dhbtDH)
2949     {
2950         for (i = 0; i < native_lambda->lc->N; i++)
2951         {
2952             foreign_lambda->val[i] = blk->sub[1].dval[i];
2953         }
2954     }
2955     else
2956     {
2957         if (blk->sub[0].nr > 1)
2958         {
2959             foreign_lambda->dhdl = blk->sub[0].ival[1];
2960         }
2961         else
2962         {
2963             foreign_lambda->dhdl = 0;
2964         }
2965     }
2966
2967     if (!*smp)
2968     {
2969         /* initialize the samples structure if it's empty. */
2970         snew(*smp, 1);
2971         samples_init(*smp, native_lambda, foreign_lambda, temp,
2972                      type == dhbtDHDL, filename);
2973         (*smp)->start_time = start_time;
2974         (*smp)->delta_time = delta_time;
2975     }
2976
2977     /* set convenience pointer */
2978     s = *smp;
2979
2980     /* now double check */
2981     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2982     {
2983         char buf[STRLEN], buf2[STRLEN];
2984         lambda_vec_print(foreign_lambda, buf, FALSE);
2985         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2986         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2987         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2988                   filename, start_time);
2989     }
2990
2991     /* make room for the data */
2992     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2993     {
2994         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2995             blk->sub[2].nr*2 : s->ndu_alloc;
2996         srenew(s->du_alloc, s->ndu_alloc);
2997         s->du = s->du_alloc;
2998     }
2999     startj   = s->ndu;
3000     s->ndu  += blk->sub[2].nr;
3001     s->ntot += blk->sub[2].nr;
3002     *ndu     = blk->sub[2].nr;
3003
3004     /* and copy the data*/
3005     for (j = 0; j < blk->sub[2].nr; j++)
3006     {
3007         if (blk->sub[2].type == xdr_datatype_float)
3008         {
3009             s->du[startj+j] = blk->sub[2].fval[j];
3010         }
3011         else
3012         {
3013             s->du[startj+j] = blk->sub[2].dval[j];
3014         }
3015     }
3016     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3017     {
3018         *last_t = start_time + blk->sub[2].nr*delta_time;
3019     }
3020 }
3021
3022 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3023                                       double start_time, double delta_time,
3024                                       lambda_vec_t *native_lambda, double temp,
3025                                       double *last_t, const char *filename)
3026 {
3027     int           i, j;
3028     samples_t    *s;
3029     int           nhist;
3030     double        old_foreign_lambda;
3031     lambda_vec_t *foreign_lambda;
3032     int           type;
3033     int           nbins[2];
3034
3035     /* check the block types etc. */
3036     if ( (blk->nsub < 2) ||
3037          (blk->sub[0].type != xdr_datatype_double) ||
3038          (blk->sub[1].type != xdr_datatype_large_int) ||
3039          (blk->sub[0].nr < 2)  ||
3040          (blk->sub[1].nr < 2) )
3041     {
3042         gmx_fatal(FARGS,
3043                   "Unexpected/corrupted block data in file %s around time %f",
3044                   filename, start_time);
3045     }
3046
3047     nhist = blk->nsub-2;
3048     if (nhist == 0)
3049     {
3050         return NULL;
3051     }
3052     if (nhist > 2)
3053     {
3054         gmx_fatal(FARGS,
3055                   "Unexpected/corrupted block data in file %s around time %f",
3056                   filename, start_time);
3057     }
3058
3059     snew(s, 1);
3060     *nsamples = 1;
3061
3062     snew(foreign_lambda, 1);
3063     lambda_vec_init(foreign_lambda, native_lambda->lc);
3064     lambda_vec_copy(foreign_lambda, native_lambda);
3065     type = (int)(blk->sub[1].lval[1]);
3066     if (type == dhbtDH)
3067     {
3068         double old_foreign_lambda;
3069
3070         old_foreign_lambda = blk->sub[0].dval[0];
3071         if (old_foreign_lambda >= 0)
3072         {
3073             foreign_lambda->val[0] = old_foreign_lambda;
3074             if (foreign_lambda->lc->N > 1)
3075             {
3076                 gmx_fatal(FARGS,
3077                           "Single-component lambda in multi-component file %s",
3078                           filename);
3079             }
3080         }
3081         else
3082         {
3083             for (i = 0; i < native_lambda->lc->N; i++)
3084             {
3085                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3086             }
3087         }
3088     }
3089     else
3090     {
3091         if (foreign_lambda->lc->N > 1)
3092         {
3093             if (blk->sub[1].nr < 3 + nhist)
3094             {
3095                 gmx_fatal(FARGS,
3096                           "Missing derivative coord in multi-component file %s",
3097                           filename);
3098             }
3099             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3100         }
3101         else
3102         {
3103             foreign_lambda->dhdl = 0;
3104         }
3105     }
3106
3107     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3108                  filename);
3109     snew(s->hist, 1);
3110
3111     for (i = 0; i < nhist; i++)
3112     {
3113         nbins[i] = blk->sub[i+2].nr;
3114     }
3115
3116     hist_init(s->hist, nhist, nbins);
3117
3118     for (i = 0; i < nhist; i++)
3119     {
3120         s->hist->x0[i] = blk->sub[1].lval[2+i];
3121         s->hist->dx[i] = blk->sub[0].dval[1];
3122         if (i == 1)
3123         {
3124             s->hist->dx[i] = -s->hist->dx[i];
3125         }
3126     }
3127
3128     s->hist->start_time = start_time;
3129     s->hist->delta_time = delta_time;
3130     s->start_time       = start_time;
3131     s->delta_time       = delta_time;
3132
3133     for (i = 0; i < nhist; i++)
3134     {
3135         int             nbin;
3136         gmx_large_int_t sum = 0;
3137
3138         for (j = 0; j < s->hist->nbin[i]; j++)
3139         {
3140             int binv = (int)(blk->sub[i+2].ival[j]);
3141
3142             s->hist->bin[i][j] = binv;
3143             sum               += binv;
3144
3145         }
3146         if (i == 0)
3147         {
3148             s->ntot      = sum;
3149             s->hist->sum = sum;
3150         }
3151         else
3152         {
3153             if (s->ntot != sum)
3154             {
3155                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3156                           filename);
3157             }
3158         }
3159     }
3160
3161     if (start_time + s->hist->sum*delta_time > *last_t)
3162     {
3163         *last_t = start_time + s->hist->sum*delta_time;
3164     }
3165     return s;
3166 }
3167
3168
3169 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3170 {
3171     int            i, j;
3172     ener_file_t    fp;
3173     t_enxframe    *fr;
3174     int            nre;
3175     gmx_enxnm_t   *enm           = NULL;
3176     double         first_t       = -1;
3177     double         last_t        = -1;
3178     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3179     int           *nhists        = NULL; /* array to keep count & print at end */
3180     int           *npts          = NULL; /* array to keep count & print at end */
3181     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3182     lambda_vec_t  *native_lambda;
3183     double         end_time;             /* the end time of the last batch of samples */
3184     int            nsamples = 0;
3185     lambda_vec_t   start_lambda;
3186
3187     fp = open_enx(fn, "r");
3188     do_enxnms(fp, &nre, &enm);
3189     snew(fr, 1);
3190
3191     snew(native_lambda, 1);
3192     start_lambda.lc = NULL;
3193
3194     while (do_enx(fp, fr))
3195     {
3196         /* count the data blocks */
3197         int    nblocks_raw  = 0;
3198         int    nblocks_hist = 0;
3199         int    nlam         = 0;
3200         int    k;
3201         /* DHCOLL block information: */
3202         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3203         double rtemp      = 0;
3204
3205         /* count the blocks and handle collection information: */
3206         for (i = 0; i < fr->nblock; i++)
3207         {
3208             if (fr->block[i].id == enxDHHIST)
3209             {
3210                 nblocks_hist++;
3211             }
3212             if (fr->block[i].id == enxDH)
3213             {
3214                 nblocks_raw++;
3215             }
3216             if (fr->block[i].id == enxDHCOLL)
3217             {
3218                 nlam++;
3219                 if ( (fr->block[i].nsub < 1) ||
3220                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3221                      (fr->block[i].sub[0].nr < 5))
3222                 {
3223                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3224                 }
3225
3226                 /* read the data from the DHCOLL block */
3227                 rtemp            =        fr->block[i].sub[0].dval[0];
3228                 start_time       =   fr->block[i].sub[0].dval[1];
3229                 delta_time       =   fr->block[i].sub[0].dval[2];
3230                 old_start_lambda = fr->block[i].sub[0].dval[3];
3231                 delta_lambda     = fr->block[i].sub[0].dval[4];
3232
3233                 if (delta_lambda > 0)
3234                 {
3235                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3236                 }
3237                 if ( ( *temp != rtemp) && (*temp > 0) )
3238                 {
3239                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3240                 }
3241                 *temp = rtemp;
3242
3243                 if (old_start_lambda >= 0)
3244                 {
3245                     if (sd->lc.N > 0)
3246                     {
3247                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3248                         {
3249                             gmx_fatal(FARGS,
3250                                       "lambda vector components in %s don't match those previously read",
3251                                       fn);
3252                         }
3253                     }
3254                     else
3255                     {
3256                         lambda_components_add(&(sd->lc), "", 0);
3257                     }
3258                     if (!start_lambda.lc)
3259                     {
3260                         lambda_vec_init(&start_lambda, &(sd->lc));
3261                     }
3262                     start_lambda.val[0] = old_start_lambda;
3263                 }
3264                 else
3265                 {
3266                     /* read lambda vector */
3267                     int      n_lambda_vec;
3268                     gmx_bool check = (sd->lc.N > 0);
3269                     if (fr->block[i].nsub < 2)
3270                     {
3271                         gmx_fatal(FARGS,
3272                                   "No lambda vector, but start_lambda=%f\n",
3273                                   old_start_lambda);
3274                     }
3275                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3276                     for (j = 0; j < n_lambda_vec; j++)
3277                     {
3278                         const char *name =
3279                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3280                         if (check)
3281                         {
3282                             /* check the components */
3283                             lambda_components_check(&(sd->lc), j, name,
3284                                                     strlen(name));
3285                         }
3286                         else
3287                         {
3288                             lambda_components_add(&(sd->lc), name,
3289                                                   strlen(name));
3290                         }
3291                     }
3292                     lambda_vec_init(&start_lambda, &(sd->lc));
3293                     start_lambda.index = fr->block[i].sub[1].ival[0];
3294                     for (j = 0; j < n_lambda_vec; j++)
3295                     {
3296                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3297                     }
3298                 }
3299                 if (first_t < 0)
3300                 {
3301                     first_t = start_time;
3302                 }
3303             }
3304         }
3305
3306         if (nlam != 1)
3307         {
3308             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3309         }
3310         if (nblocks_raw > 0 && nblocks_hist > 0)
3311         {
3312             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3313         }
3314
3315         if (nsamples > 0)
3316         {
3317             /* check the native lambda */
3318             if (!lambda_vec_same(&start_lambda, native_lambda) )
3319             {
3320                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3321                           fn, native_lambda, start_lambda, start_time);
3322             }
3323             /* check the number of samples against the previous number */
3324             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3325             {
3326                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3327                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3328             }
3329             /* check whether last iterations's end time matches with
3330                the currrent start time */
3331             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3332             {
3333                 /* it didn't. We need to store our samples and reallocate */
3334                 for (i = 0; i < nsamples; i++)
3335                 {
3336                     if (samples_rawdh[i])
3337                     {
3338                         /* insert it into the existing list */
3339                         lambda_data_list_insert_sample(sd->lb,
3340                                                        samples_rawdh[i]);
3341                         /* and make sure we'll allocate a new one this time
3342                            around */
3343                         samples_rawdh[i] = NULL;
3344                     }
3345                 }
3346             }
3347         }
3348         else
3349         {
3350             /* this is the first round; allocate the associated data
3351                structures */
3352             /*native_lambda=start_lambda;*/
3353             lambda_vec_init(native_lambda, &(sd->lc));
3354             lambda_vec_copy(native_lambda, &start_lambda);
3355             nsamples = nblocks_raw+nblocks_hist;
3356             snew(nhists, nsamples);
3357             snew(npts, nsamples);
3358             snew(lambdas, nsamples);
3359             snew(samples_rawdh, nsamples);
3360             for (i = 0; i < nsamples; i++)
3361             {
3362                 nhists[i]        = 0;
3363                 npts[i]          = 0;
3364                 lambdas[i]       = NULL;
3365                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3366                                             ones contain values */
3367             }
3368         }
3369
3370         /* and read them */
3371         k = 0; /* counter for the lambdas, etc. arrays */
3372         for (i = 0; i < fr->nblock; i++)
3373         {
3374             if (fr->block[i].id == enxDH)
3375             {
3376                 int type = (fr->block[i].sub[0].ival[0]);
3377                 if (type == dhbtDH || type == dhbtDHDL)
3378                 {
3379                     int ndu;
3380                     read_edr_rawdh_block(&(samples_rawdh[k]),
3381                                          &ndu,
3382                                          &(fr->block[i]),
3383                                          start_time, delta_time,
3384                                          native_lambda, rtemp,
3385                                          &last_t, fn);
3386                     npts[k] += ndu;
3387                     if (samples_rawdh[k])
3388                     {
3389                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3390                     }
3391                     k++;
3392                 }
3393             }
3394             else if (fr->block[i].id == enxDHHIST)
3395             {
3396                 int type = (int)(fr->block[i].sub[1].lval[1]);
3397                 if (type == dhbtDH || type == dhbtDHDL)
3398                 {
3399                     int        j;
3400                     int        nb = 0;
3401                     samples_t *s; /* this is where the data will go */
3402                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3403                                             start_time, delta_time,
3404                                             native_lambda, rtemp,
3405                                             &last_t, fn);
3406                     nhists[k] += nb;
3407                     if (nb > 0)
3408                     {
3409                         lambdas[k] = s->foreign_lambda;
3410                     }
3411                     k++;
3412                     /* and insert the new sample immediately */
3413                     for (j = 0; j < nb; j++)
3414                     {
3415                         lambda_data_list_insert_sample(sd->lb, s+j);
3416                     }
3417                 }
3418             }
3419         }
3420     }
3421     /* Now store all our extant sample collections */
3422     for (i = 0; i < nsamples; i++)
3423     {
3424         if (samples_rawdh[i])
3425         {
3426             /* insert it into the existing list */
3427             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3428         }
3429     }
3430
3431
3432     {
3433         char buf[STRLEN];
3434         printf("\n");
3435         lambda_vec_print(native_lambda, buf, FALSE);
3436         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3437                fn, first_t, last_t, buf);
3438         for (i = 0; i < nsamples; i++)
3439         {
3440             if (lambdas[i])
3441             {
3442                 lambda_vec_print(lambdas[i], buf, TRUE);
3443                 if (nhists[i] > 0)
3444                 {
3445                     printf("        %s (%d hists)\n", buf, nhists[i]);
3446                 }
3447                 else
3448                 {
3449                     printf("        %s (%d pts)\n", buf, npts[i]);
3450                 }
3451             }
3452         }
3453     }
3454     printf("\n\n");
3455     sfree(npts);
3456     sfree(nhists);
3457     sfree(lambdas);
3458 }
3459
3460
3461 int gmx_bar(int argc, char *argv[])
3462 {
3463     static const char *desc[] = {
3464         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3465         "Bennett's acceptance ratio method (BAR). It also automatically",
3466         "adds series of individual free energies obtained with BAR into",
3467         "a combined free energy estimate.[PAR]",
3468
3469         "Every individual BAR free energy difference relies on two ",
3470         "simulations at different states: say state A and state B, as",
3471         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3472         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3473         "average of the Hamiltonian difference of state B given state A and",
3474         "vice versa.",
3475         "The energy differences to the other state must be calculated",
3476         "explicitly during the simulation. This can be done with",
3477         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3478
3479         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3480         "Two types of input files are supported:[BR]",
3481         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3482         "The files should have columns ",
3483         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3484         "The [GRK]lambda[grk] values are inferred ",
3485         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3486         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3487         "legends of Delta H",
3488         "[BR]",
3489         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3490         "[TT]-extp[tt] option for these files, it is assumed",
3491         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3492         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3493         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3494         "subtitle (if present), otherwise from a number in the subdirectory ",
3495         "in the file name.[PAR]",
3496
3497         "The [GRK]lambda[grk] of the simulation is parsed from ",
3498         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3499         "foreign [GRK]lambda[grk] values from the legend containing the ",
3500         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3501         "the legend line containing 'T ='.[PAR]",
3502
3503         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3504         "These can contain either lists of energy differences (see the ",
3505         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3506         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3507         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3508         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3509
3510         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3511         "the energy difference can also be extrapolated from the ",
3512         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3513         "option, which assumes that the system's Hamiltonian depends linearly",
3514         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3515
3516         "The free energy estimates are determined using BAR with bisection, ",
3517         "with the precision of the output set with [TT]-prec[tt]. ",
3518         "An error estimate taking into account time correlations ",
3519         "is made by splitting the data into blocks and determining ",
3520         "the free energy differences over those blocks and assuming ",
3521         "the blocks are independent. ",
3522         "The final error estimate is determined from the average variance ",
3523         "over 5 blocks. A range of block numbers for error estimation can ",
3524         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3525
3526         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3527         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3528         "samples. [BB]Note[bb] that when aggregating energy ",
3529         "differences/derivatives with different sampling intervals, this is ",
3530         "almost certainly not correct. Usually subsequent energies are ",
3531         "correlated and different time intervals mean different degrees ",
3532         "of correlation between samples.[PAR]",
3533
3534         "The results are split in two parts: the last part contains the final ",
3535         "results in kJ/mol, together with the error estimate for each part ",
3536         "and the total. The first part contains detailed free energy ",
3537         "difference estimates and phase space overlap measures in units of ",
3538         "kT (together with their computed error estimate). The printed ",
3539         "values are:[BR]",
3540         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3541         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3542         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3543         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3544         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3545         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3546
3547         "The relative entropy of both states in each other's ensemble can be ",
3548         "interpreted as a measure of phase space overlap: ",
3549         "the relative entropy s_A of the work samples of lambda_B in the ",
3550         "ensemble of lambda_A (and vice versa for s_B), is a ",
3551         "measure of the 'distance' between Boltzmann distributions of ",
3552         "the two states, that goes to zero for identical distributions. See ",
3553         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3554         "[PAR]",
3555         "The estimate of the expected per-sample standard deviation, as given ",
3556         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3557         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3558         "of the actual statistical error, because it assumes independent samples).[PAR]",
3559
3560         "To get a visual estimate of the phase space overlap, use the ",
3561         "[TT]-oh[tt] option to write series of histograms, together with the ",
3562         "[TT]-nbin[tt] option.[PAR]"
3563     };
3564     static real        begin    = 0, end = -1, temp = -1;
3565     int                nd       = 2, nbmin = 5, nbmax = 5;
3566     int                nbin     = 100;
3567     gmx_bool           use_dhdl = FALSE;
3568     gmx_bool           calc_s, calc_v;
3569     t_pargs            pa[] = {
3570         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3571         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3572         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3573         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3574         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3575         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3576         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3577         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3578     };
3579
3580     t_filenm           fnm[] = {
3581         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3582         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3583         { efXVG, "-o",  "bar",    ffOPTWR },
3584         { efXVG, "-oi", "barint", ffOPTWR },
3585         { efXVG, "-oh", "histogram", ffOPTWR }
3586     };
3587 #define NFILE asize(fnm)
3588
3589     int          f, i, j;
3590     int          nf = 0;    /* file counter */
3591     int          nbs;
3592     int          nfile_tot; /* total number of input files */
3593     int          nxvgfile = 0;
3594     int          nedrfile = 0;
3595     char       **fxvgnms;
3596     char       **fedrnms;
3597     sim_data_t   sim_data; /* the simulation data */
3598     barres_t    *results;  /* the results */
3599     int          nresults; /* number of results in results array */
3600
3601     double      *partsum;
3602     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3603     FILE        *fpb, *fpi;
3604     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3605     char         buf[STRLEN], buf2[STRLEN];
3606     char         ktformat[STRLEN], sktformat[STRLEN];
3607     char         kteformat[STRLEN], skteformat[STRLEN];
3608     output_env_t oenv;
3609     double       kT, beta;
3610     gmx_bool     result_OK = TRUE, bEE = TRUE;
3611
3612     gmx_bool     disc_err          = FALSE;
3613     double       sum_disc_err      = 0.; /* discretization error */
3614     gmx_bool     histrange_err     = FALSE;
3615     double       sum_histrange_err = 0.; /* histogram range error */
3616     double       stat_err          = 0.; /* statistical error */
3617
3618     parse_common_args(&argc, argv,
3619                       PCA_CAN_VIEW,
3620                       NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3621
3622     if (opt2bSet("-f", NFILE, fnm))
3623     {
3624         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3625     }
3626     if (opt2bSet("-g", NFILE, fnm))
3627     {
3628         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3629     }
3630
3631     sim_data_init(&sim_data);
3632 #if 0
3633     /* make linked list */
3634     lb = &lambda_head;
3635     lambda_data_init(lb, 0, 0);
3636     lb->next = lb;
3637     lb->prev = lb;
3638 #endif
3639
3640
3641     nfile_tot = nxvgfile + nedrfile;
3642
3643     if (nfile_tot == 0)
3644     {
3645         gmx_fatal(FARGS, "No input files!");
3646     }
3647
3648     if (nd < 0)
3649     {
3650         gmx_fatal(FARGS, "Can not have negative number of digits");
3651     }
3652     prec = pow(10, -nd);
3653
3654     snew(partsum, (nbmax+1)*(nbmax+1));
3655     nf = 0;
3656
3657     /* read in all files. First xvg files */
3658     for (f = 0; f < nxvgfile; f++)
3659     {
3660         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3661         nf++;
3662     }
3663     /* then .edr files */
3664     for (f = 0; f < nedrfile; f++)
3665     {
3666         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3667         nf++;
3668     }
3669
3670     /* fix the times to allow for equilibration */
3671     sim_data_impose_times(&sim_data, begin, end);
3672
3673     if (opt2bSet("-oh", NFILE, fnm))
3674     {
3675         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3676     }
3677
3678     /* assemble the output structures from the lambdas */
3679     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3680
3681     sum_disc_err = barres_list_max_disc_err(results, nresults);
3682
3683     if (nresults == 0)
3684     {
3685         printf("\nNo results to calculate.\n");
3686         return 0;
3687     }
3688
3689     if (sum_disc_err > prec)
3690     {
3691         prec = sum_disc_err;
3692         nd   = ceil(-log10(prec));
3693         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3694     }
3695
3696
3697     /*sprintf(lamformat,"%%6.3f");*/
3698     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3699     /* the format strings of the results in kT */
3700     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3701     sprintf( sktformat, "%%%ds", 6+nd);
3702     /* the format strings of the errors in kT */
3703     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3704     sprintf( skteformat, "%%%ds", 4+nd);
3705     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3706     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3707
3708
3709
3710     fpb = NULL;
3711     if (opt2bSet("-o", NFILE, fnm))
3712     {
3713         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3714         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3715                             "\\lambda", buf, exvggtXYDY, oenv);
3716     }
3717
3718     fpi = NULL;
3719     if (opt2bSet("-oi", NFILE, fnm))
3720     {
3721         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3722         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3723                        "\\lambda", buf, oenv);
3724     }
3725
3726
3727
3728     if (nbmin > nbmax)
3729     {
3730         nbmin = nbmax;
3731     }
3732
3733     /* first calculate results */
3734     bEE      = TRUE;
3735     disc_err = FALSE;
3736     for (f = 0; f < nresults; f++)
3737     {
3738         /* Determine the free energy difference with a factor of 10
3739          * more accuracy than requested for printing.
3740          */
3741         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3742                  &bEE, partsum);
3743
3744         if (results[f].dg_disc_err > prec/10.)
3745         {
3746             disc_err = TRUE;
3747         }
3748         if (results[f].dg_histrange_err > prec/10.)
3749         {
3750             histrange_err = TRUE;
3751         }
3752     }
3753
3754     /* print results in kT */
3755     kT   = BOLTZ*temp;
3756     beta = 1/kT;
3757
3758     printf("\nTemperature: %g K\n", temp);
3759
3760     printf("\nDetailed results in kT (see help for explanation):\n\n");
3761     printf("%6s ", " lam_A");
3762     printf("%6s ", " lam_B");
3763     printf(sktformat,  "DG ");
3764     if (bEE)
3765     {
3766         printf(skteformat, "+/- ");
3767     }
3768     if (disc_err)
3769     {
3770         printf(skteformat, "disc ");
3771     }
3772     if (histrange_err)
3773     {
3774         printf(skteformat, "range ");
3775     }
3776     printf(sktformat,  "s_A ");
3777     if (bEE)
3778     {
3779         printf(skteformat, "+/- " );
3780     }
3781     printf(sktformat,  "s_B ");
3782     if (bEE)
3783     {
3784         printf(skteformat, "+/- " );
3785     }
3786     printf(sktformat,  "stdev ");
3787     if (bEE)
3788     {
3789         printf(skteformat, "+/- ");
3790     }
3791     printf("\n");
3792     for (f = 0; f < nresults; f++)
3793     {
3794         lambda_vec_print_short(results[f].a->native_lambda, buf);
3795         printf("%s ", buf);
3796         lambda_vec_print_short(results[f].b->native_lambda, buf);
3797         printf("%s ", buf);
3798         printf(ktformat,  results[f].dg);
3799         printf(" ");
3800         if (bEE)
3801         {
3802             printf(kteformat, results[f].dg_err);
3803             printf(" ");
3804         }
3805         if (disc_err)
3806         {
3807             printf(kteformat, results[f].dg_disc_err);
3808             printf(" ");
3809         }
3810         if (histrange_err)
3811         {
3812             printf(kteformat, results[f].dg_histrange_err);
3813             printf(" ");
3814         }
3815         printf(ktformat,  results[f].sa);
3816         printf(" ");
3817         if (bEE)
3818         {
3819             printf(kteformat, results[f].sa_err);
3820             printf(" ");
3821         }
3822         printf(ktformat,  results[f].sb);
3823         printf(" ");
3824         if (bEE)
3825         {
3826             printf(kteformat, results[f].sb_err);
3827             printf(" ");
3828         }
3829         printf(ktformat,  results[f].dg_stddev);
3830         printf(" ");
3831         if (bEE)
3832         {
3833             printf(kteformat, results[f].dg_stddev_err);
3834         }
3835         printf("\n");
3836
3837         /* Check for negative relative entropy with a 95% certainty. */
3838         if (results[f].sa < -2*results[f].sa_err ||
3839             results[f].sb < -2*results[f].sb_err)
3840         {
3841             result_OK = FALSE;
3842         }
3843     }
3844
3845     if (!result_OK)
3846     {
3847         printf("\nWARNING: Some of these results violate the Second Law of "
3848                "Thermodynamics: \n"
3849                "         This is can be the result of severe undersampling, or "
3850                "(more likely)\n"
3851                "         there is something wrong with the simulations.\n");
3852     }
3853
3854
3855     /* final results in kJ/mol */
3856     printf("\n\nFinal results in kJ/mol:\n\n");
3857     dg_tot  = 0;
3858     for (f = 0; f < nresults; f++)
3859     {
3860
3861         if (fpi != NULL)
3862         {
3863             lambda_vec_print_short(results[f].a->native_lambda, buf);
3864             fprintf(fpi, xvg2format, buf, dg_tot);
3865         }
3866
3867
3868         if (fpb != NULL)
3869         {
3870             lambda_vec_print_intermediate(results[f].a->native_lambda,
3871                                           results[f].b->native_lambda,
3872                                           buf);
3873
3874             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3875         }
3876
3877         printf("point ");
3878         lambda_vec_print_short(results[f].a->native_lambda, buf);
3879         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3880         printf("%s - %s", buf, buf2);
3881         printf(",   DG ");
3882
3883         printf(dgformat, results[f].dg*kT);
3884         if (bEE)
3885         {
3886             printf(" +/- ");
3887             printf(dgformat, results[f].dg_err*kT);
3888         }
3889         if (histrange_err)
3890         {
3891             printf(" (max. range err. = ");
3892             printf(dgformat, results[f].dg_histrange_err*kT);
3893             printf(")");
3894             sum_histrange_err += results[f].dg_histrange_err*kT;
3895         }
3896
3897         printf("\n");
3898         dg_tot += results[f].dg;
3899     }
3900     printf("\n");
3901     printf("total ");
3902     lambda_vec_print_short(results[0].a->native_lambda, buf);
3903     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3904     printf("%s - %s", buf, buf2);
3905     printf(",   DG ");
3906
3907     printf(dgformat, dg_tot*kT);
3908     if (bEE)
3909     {
3910         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3911         printf(" +/- ");
3912         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3913     }
3914     printf("\n");
3915     if (disc_err)
3916     {
3917         printf("\nmaximum discretization error = ");
3918         printf(dgformat, sum_disc_err);
3919         if (bEE && stat_err < sum_disc_err)
3920         {
3921             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3922         }
3923     }
3924     if (histrange_err)
3925     {
3926         printf("\nmaximum histogram range error = ");
3927         printf(dgformat, sum_histrange_err);
3928         if (bEE && stat_err < sum_histrange_err)
3929         {
3930             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3931         }
3932
3933     }
3934     printf("\n");
3935
3936
3937     if (fpi != NULL)
3938     {
3939         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3940         fprintf(fpi, xvg2format, buf, dg_tot);
3941         ffclose(fpi);
3942     }
3943
3944     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3945     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3946
3947     thanx(stderr);
3948
3949     return 0;
3950 }