src/gromacs/gmxana/gmx_bar.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <math.h>
  44 #include <float.h>
  45
  46 #include "sysstuff.h"
  47 #include "typedefs.h"
  48 #include "smalloc.h"
  49 #include "gromacs/fileio/futil.h"
  50 #include "statutil.h"
  51 #include "macros.h"
  52 #include "gromacs/fileio/enxio.h"
  53 #include "physics.h"
  54 #include "gmx_fatal.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57 #include "maths.h"
  58 #include "string2.h"
  59 #include "names.h"
  60 #include "mdebin.h"
  61
  62
  63 /* Structure for the names of lambda vector components */
  64 typedef struct lambda_components_t
  65 {
  66     char **names;  /* Array of strings with names for the lambda vector
  67                       components */
  68     int    N;      /* The number of components */
  69     int    Nalloc; /* The number of allocated components */
  70 } lambda_components_t;
  71
  72 /* Structure for a lambda vector or a dhdl derivative direction */
  73 typedef struct lambda_vec_t
  74 {
  75     double                    *val;   /* The lambda vector component values. Only valid if
  76                                          dhdl == -1 */
  77     int                        dhdl;  /* The coordinate index for the derivative described by this
  78                                          structure, or -1 */
  79     const lambda_components_t *lc;    /* the associated lambda_components
  80                                          structure */
  81     int                        index; /* The state number (init-lambda-state) of this lambda
  82                                          vector, if known. If not, it is set to -1 */
  83 } lambda_vec_t;
  84
  85 /* the dhdl.xvg data from a simulation */
  86 typedef struct xvg_t
  87 {
  88     const char   *filename;
  89     int           ftp;           /* file type */
  90     int           nset;          /* number of lambdas, including dhdl */
  91     int          *np;            /* number of data points (du or hists) per lambda */
  92     int           np_alloc;      /* number of points (du or hists) allocated */
  93     double        temp;          /* temperature */
  94     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  95     double       *t;             /* the times (of second index for y) */
  96     double      **y;             /* the dU values. y[0] holds the derivative, while
  97                                     further ones contain the energy differences between
  98                                     the native lambda and the 'foreign' lambdas. */
  99     lambda_vec_t  native_lambda; /* the native lambda */
 100
 101     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 102 } xvg_t;
 103
 104
 105 typedef struct hist_t
 106 {
 107     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 108     double          dx[2];                  /* the histogram spacing. The reverse
 109                                                dx is the negative of the forward dx.*/
 110     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 111                                                point(s) as int */
 112
 113     int             nbin[2];                /* the (forward+reverse) number of bins */
 114     gmx_large_int_t sum;                    /* the total number of counts. Must be
 115                                                the same for forward + reverse.  */
 116     int             nhist;                  /* number of hist datas (forward or reverse) */
 117
 118     double          start_time, delta_time; /* start time, end time of histogram */
 119 } hist_t;
 120
 121
 122 /* an aggregate of samples for partial free energy calculation */
 123 typedef struct samples_t
 124 {
 125     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 126     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 127     double        temp;           /* the temperature */
 128     gmx_bool      derivative;     /* whether this sample is a derivative */
 129
 130     /* The samples come either as either delta U lists: */
 131     int     ndu;                    /* the number of delta U samples */
 132     double *du;                     /* the delta u's */
 133     double *t;                      /* the times associated with those samples, or: */
 134     double  start_time, delta_time; /*start time and delta time for linear time*/
 135
 136     /* or as histograms: */
 137     hist_t *hist; /* a histogram */
 138
 139     /* allocation data: (not NULL for data 'owned' by this struct) */
 140     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 141     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 142     hist_t         *hist_alloc;          /* allocated hist */
 143
 144     gmx_large_int_t ntot;                /* total number of samples */
 145     const char     *filename;            /* the file name this sample comes from */
 146 } samples_t;
 147
 148 /* a sample range (start to end for du-style data, or boolean
 149     for both du-style data and histograms */
 150 typedef struct sample_range_t
 151 {
 152     int        start, end; /* start and end index for du style data */
 153     gmx_bool   use;        /* whether to use this sample */
 154
 155     samples_t *s;          /* the samples this range belongs to */
 156 } sample_range_t;
 157
 158
 159 /* a collection of samples for a partial free energy calculation
 160     (i.e. the collection of samples from one native lambda to one
 161     foreign lambda) */
 162 typedef struct sample_coll_t
 163 {
 164     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 165                                           in the histogram */
 166     lambda_vec_t   *foreign_lambda;    /* collection */
 167     double          temp;              /* the temperature */
 168
 169     int             nsamples;          /* the number of samples */
 170     samples_t     **s;                 /* the samples themselves */
 171     sample_range_t *r;                 /* the sample ranges */
 172     int             nsamples_alloc;    /* number of allocated samples */
 173
 174     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 175                                           this collection */
 176
 177     struct sample_coll_t *next, *prev; /* next and previous in the list */
 178 } sample_coll_t;
 179
 180 /* all the samples associated with a lambda point */
 181 typedef struct lambda_data_t
 182 {
 183     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 184     double                temp;        /* temperature */
 185
 186     sample_coll_t        *sc;          /* the samples */
 187
 188     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 189
 190     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 191 } lambda_data_t;
 192
 193 /* Top-level data structure of simulation data */
 194 typedef struct sim_data_t
 195 {
 196     lambda_data_t      *lb;      /* a lambda data linked list */
 197     lambda_data_t       lb_head; /* The head element of the linked list */
 198
 199     lambda_components_t lc;      /* the allowed components of the lambda
 200                                     vectors */
 201 } sim_data_t;
 202
 203 /* Top-level data structure with calculated values. */
 204 typedef struct {
 205     sample_coll_t *a, *b;            /* the simulation data */
 206
 207     double         dg;               /* the free energy difference */
 208     double         dg_err;           /* the free energy difference */
 209
 210     double         dg_disc_err;      /* discretization error */
 211     double         dg_histrange_err; /* histogram range error */
 212
 213     double         sa;               /* relative entropy of b in state a */
 214     double         sa_err;           /* error in sa */
 215     double         sb;               /* relative entropy of a in state b */
 216     double         sb_err;           /* error in sb */
 217
 218     double         dg_stddev;        /* expected dg stddev per sample */
 219     double         dg_stddev_err;    /* error in dg_stddev */
 220 } barres_t;
 221
 222
 223 /* Initialize a lambda_components structure */
 224 static void lambda_components_init(lambda_components_t *lc)
 225 {
 226     lc->N      = 0;
 227     lc->Nalloc = 2;
 228     snew(lc->names, lc->Nalloc);
 229 }
 230
 231 /* Add a component to a lambda_components structure */
 232 static void lambda_components_add(lambda_components_t *lc,
 233                                   const char *name, size_t name_length)
 234 {
 235     while (lc->N + 1 > lc->Nalloc)
 236     {
 237         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 238         srealloc( lc->names, lc->Nalloc );
 239     }
 240     snew(lc->names[lc->N], name_length+1);
 241     strncpy(lc->names[lc->N], name, name_length);
 242     lc->N++;
 243 }
 244
 245 /* check whether a component with index 'index' matches the given name, or
 246    is also NULL. Returns TRUE if this is the case.
 247    the string name does not need to end */
 248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 249                                         int                        index,
 250                                         const char                *name,
 251                                         size_t                     name_length)
 252 {
 253     size_t len;
 254     if (index >= lc->N)
 255     {
 256         return FALSE;
 257     }
 258     if (name == NULL && lc->names[index] == NULL)
 259     {
 260         return TRUE;
 261     }
 262     if ((name == NULL) != (lc->names[index] == NULL))
 263     {
 264         return FALSE;
 265     }
 266     len = strlen(lc->names[index]);
 267     if (len != name_length)
 268     {
 269         return FALSE;
 270     }
 271     if (strncmp(lc->names[index], name, name_length) == 0)
 272     {
 273         return TRUE;
 274     }
 275     return FALSE;
 276 }
 277
 278 /* Find the index of a given lambda component name, or -1 if not found */
 279 static int lambda_components_find(const lambda_components_t *lc,
 280                                   const char                *name,
 281                                   size_t                     name_length)
 282 {
 283     int i;
 284
 285     for (i = 0; i < lc->N; i++)
 286     {
 287         if (strncmp(lc->names[i], name, name_length) == 0)
 288         {
 289             return i;
 290         }
 291     }
 292     return -1;
 293 }
 294
 295
 296
 297 /* initialize a lambda vector */
 298 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 299 {
 300     snew(lv->val, lc->N);
 301     lv->index = -1;
 302     lv->dhdl  = -1;
 303     lv->lc    = lc;
 304 }
 305
 306 static void lambda_vec_destroy(lambda_vec_t *lv)
 307 {
 308     sfree(lv->val);
 309 }
 310
 311 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 312 {
 313     int i;
 314
 315     lambda_vec_init(lv, orig->lc);
 316     lv->dhdl  = orig->dhdl;
 317     lv->index = orig->index;
 318     for (i = 0; i < lv->lc->N; i++)
 319     {
 320         lv->val[i] = orig->val[i];
 321     }
 322 }
 323
 324 /* write a lambda vec to a preallocated string */
 325 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 326 {
 327     int    i;
 328     size_t np;
 329
 330     str[0] = 0; /* reset the string */
 331     if (lv->dhdl < 0)
 332     {
 333         if (named)
 334         {
 335             str += sprintf(str, "delta H to ");
 336         }
 337         if (lv->lc->N > 1)
 338         {
 339             str += sprintf(str, "(");
 340         }
 341         for (i = 0; i < lv->lc->N; i++)
 342         {
 343             str += sprintf(str, "%g", lv->val[i]);
 344             if (i < lv->lc->N-1)
 345             {
 346                 str += sprintf(str, ", ");
 347             }
 348         }
 349         if (lv->lc->N > 1)
 350         {
 351             str += sprintf(str, ")");
 352         }
 353     }
 354     else
 355     {
 356         /* this lambda vector describes a derivative */
 357         str += sprintf(str, "dH/dl");
 358         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 359         {
 360             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 361         }
 362     }
 363 }
 364
 365 /* write a shortened version of the lambda vec to a preallocated string */
 366 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 367 {
 368     int    i;
 369     size_t np;
 370
 371     if (lv->index >= 0)
 372     {
 373         sprintf(str, "%6d", lv->index);
 374     }
 375     else
 376     {
 377         if (lv->dhdl < 0)
 378         {
 379             sprintf(str, "%6.3f", lv->val[0]);
 380         }
 381         else
 382         {
 383             sprintf(str, "dH/dl[%d]", lv->dhdl);
 384         }
 385     }
 386 }
 387
 388 /* write an intermediate version of two lambda vecs to a preallocated string */
 389 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 390                                           const lambda_vec_t *b, char *str)
 391 {
 392     int    i;
 393     size_t np;
 394
 395     str[0] = 0;
 396     if ( (a->index >= 0) && (b->index >= 0) )
 397     {
 398         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 399     }
 400     else
 401     {
 402         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 403         {
 404             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 405         }
 406     }
 407 }
 408
 409
 410
 411 /* calculate the difference in lambda vectors: c = a-b.
 412    c must be initialized already, and a and b must describe non-derivative
 413    lambda points */
 414 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 415                             lambda_vec_t *c)
 416 {
 417     int i;
 418
 419     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 420     {
 421         gmx_fatal(FARGS,
 422                   "Trying to calculate the difference between derivatives instead of lambda points");
 423     }
 424     if ((a->lc != b->lc) || (a->lc != c->lc) )
 425     {
 426         gmx_fatal(FARGS,
 427                   "Trying to calculate the difference lambdas with differing basis set");
 428     }
 429     for (i = 0; i < a->lc->N; i++)
 430     {
 431         c->val[i] = a->val[i] - b->val[i];
 432     }
 433 }
 434
 435 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 436    a and b must describe non-derivative lambda points */
 437 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 438 {
 439     int    i;
 440     double ret = 0.;
 441
 442     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 443     {
 444         gmx_fatal(FARGS,
 445                   "Trying to calculate the difference between derivatives instead of lambda points");
 446     }
 447     if (a->lc != b->lc)
 448     {
 449         gmx_fatal(FARGS,
 450                   "Trying to calculate the difference lambdas with differing basis set");
 451     }
 452     for (i = 0; i < a->lc->N; i++)
 453     {
 454         double df = a->val[i] - b->val[i];
 455         ret += df*df;
 456     }
 457     return sqrt(ret);
 458 }
 459
 460
 461 /* check whether two lambda vectors are the same */
 462 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 463 {
 464     int i;
 465
 466     if (a->lc != b->lc)
 467     {
 468         return FALSE;
 469     }
 470     if (a->dhdl < 0)
 471     {
 472         for (i = 0; i < a->lc->N; i++)
 473         {
 474             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 475             {
 476                 return FALSE;
 477             }
 478         }
 479         return TRUE;
 480     }
 481     else
 482     {
 483         /* they're derivatives, so we check whether the indices match */
 484         return (a->dhdl == b->dhdl);
 485     }
 486 }
 487
 488 /* Compare the sort order of two foreign lambda vectors
 489
 490     returns 1 if a is 'bigger' than b,
 491     returns 0 if they're the same,
 492     returns -1 if a is 'smaller' than b.*/
 493 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 494                                        const lambda_vec_t *b)
 495 {
 496     int      i;
 497     double   norm_a    = 0, norm_b = 0;
 498     gmx_bool different = FALSE;
 499
 500     if (a->lc != b->lc)
 501     {
 502         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 503     }
 504     /* if either one has an index we sort based on that */
 505     if ((a->index >= 0) || (b->index >= 0))
 506     {
 507         if (a->index == b->index)
 508         {
 509             return 0;
 510         }
 511         return (a->index > b->index) ? 1 : -1;
 512     }
 513     if (a->dhdl >= 0 || b->dhdl >= 0)
 514     {
 515         /* lambda vectors that are derivatives always sort higher than those
 516            without derivatives */
 517         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 518         {
 519             return (a->dhdl >= 0) ? 1 : -1;
 520         }
 521         return a->dhdl > b->dhdl;
 522     }
 523
 524     /* neither has an index, so we can only sort on the lambda components,
 525        which is only valid if there is one component */
 526     for (i = 0; i < a->lc->N; i++)
 527     {
 528         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 529         {
 530             different = TRUE;
 531         }
 532         norm_a += a->val[i]*a->val[i];
 533         norm_b += b->val[i]*b->val[i];
 534     }
 535     if (!different)
 536     {
 537         return 0;
 538     }
 539     return norm_a > norm_b;
 540 }
 541
 542 /* Compare the sort order of two native lambda vectors
 543
 544     returns 1 if a is 'bigger' than b,
 545     returns 0 if they're the same,
 546     returns -1 if a is 'smaller' than b.*/
 547 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 548                                       const lambda_vec_t *b)
 549 {
 550     int i;
 551
 552     if (a->lc != b->lc)
 553     {
 554         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 555     }
 556     /* if either one has an index we sort based on that */
 557     if ((a->index >= 0) || (b->index >= 0))
 558     {
 559         if (a->index == b->index)
 560         {
 561             return 0;
 562         }
 563         return (a->index > b->index) ? 1 : -1;
 564     }
 565     /* neither has an index, so we can only sort on the lambda components,
 566        which is only valid if there is one component */
 567     if (a->lc->N > 1)
 568     {
 569         gmx_fatal(FARGS,
 570                   "Can't compare lambdas with no index and > 1 component");
 571     }
 572     if (a->dhdl >= 0 || b->dhdl >= 0)
 573     {
 574         gmx_fatal(FARGS,
 575                   "Can't compare native lambdas that are derivatives");
 576     }
 577     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 578     {
 579         return 0;
 580     }
 581     return a->val[0] > b->val[0] ? 1 : -1;
 582 }
 583
 584
 585
 586
 587 static void hist_init(hist_t *h, int nhist, int *nbin)
 588 {
 589     int i;
 590     if (nhist > 2)
 591     {
 592         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 593     }
 594     for (i = 0; i < nhist; i++)
 595     {
 596         snew(h->bin[i], nbin[i]);
 597         h->x0[i]      = 0;
 598         h->nbin[i]    = nbin[i];
 599         h->start_time = h->delta_time = 0;
 600         h->dx[i]      = 0;
 601     }
 602     h->sum   = 0;
 603     h->nhist = nhist;
 604 }
 605
 606 static void hist_destroy(hist_t *h)
 607 {
 608     sfree(h->bin);
 609 }
 610
 611
 612 static void xvg_init(xvg_t *ba)
 613 {
 614     ba->filename = NULL;
 615     ba->nset     = 0;
 616     ba->np_alloc = 0;
 617     ba->np       = NULL;
 618     ba->y        = NULL;
 619 }
 620
 621 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 622                          lambda_vec_t *foreign_lambda, double temp,
 623                          gmx_bool derivative, const char *filename)
 624 {
 625     s->native_lambda  = native_lambda;
 626     s->foreign_lambda = foreign_lambda;
 627     s->temp           = temp;
 628     s->derivative     = derivative;
 629
 630     s->ndu        = 0;
 631     s->du         = NULL;
 632     s->t          = NULL;
 633     s->start_time = s->delta_time = 0;
 634     s->hist       = NULL;
 635     s->du_alloc   = NULL;
 636     s->t_alloc    = NULL;
 637     s->hist_alloc = NULL;
 638     s->ndu_alloc  = 0;
 639     s->nt_alloc   = 0;
 640
 641     s->ntot     = 0;
 642     s->filename = filename;
 643 }
 644
 645 static void sample_range_init(sample_range_t *r, samples_t *s)
 646 {
 647     r->start = 0;
 648     r->end   = s->ndu;
 649     r->use   = TRUE;
 650     r->s     = NULL;
 651 }
 652
 653 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 654                              lambda_vec_t *foreign_lambda, double temp)
 655 {
 656     sc->native_lambda  = native_lambda;
 657     sc->foreign_lambda = foreign_lambda;
 658     sc->temp           = temp;
 659
 660     sc->nsamples       = 0;
 661     sc->s              = NULL;
 662     sc->r              = NULL;
 663     sc->nsamples_alloc = 0;
 664
 665     sc->ntot = 0;
 666     sc->next = sc->prev = NULL;
 667 }
 668
 669 static void sample_coll_destroy(sample_coll_t *sc)
 670 {
 671     /* don't free the samples themselves */
 672     sfree(sc->r);
 673     sfree(sc->s);
 674 }
 675
 676
 677 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 678                              double temp)
 679 {
 680     l->lambda = native_lambda;
 681     l->temp   = temp;
 682
 683     l->next = NULL;
 684     l->prev = NULL;
 685
 686     l->sc = &(l->sc_head);
 687
 688     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 689     l->sc->next = l->sc;
 690     l->sc->prev = l->sc;
 691 }
 692
 693 static void barres_init(barres_t *br)
 694 {
 695     br->dg            = 0;
 696     br->dg_err        = 0;
 697     br->sa            = 0;
 698     br->sa_err        = 0;
 699     br->sb            = 0;
 700     br->sb_err        = 0;
 701     br->dg_stddev     = 0;
 702     br->dg_stddev_err = 0;
 703
 704     br->a = NULL;
 705     br->b = NULL;
 706 }
 707
 708
 709 /* calculate the total number of samples in a sample collection */
 710 static void sample_coll_calc_ntot(sample_coll_t *sc)
 711 {
 712     int i;
 713
 714     sc->ntot = 0;
 715     for (i = 0; i < sc->nsamples; i++)
 716     {
 717         if (sc->r[i].use)
 718         {
 719             if (sc->s[i]->hist)
 720             {
 721                 sc->ntot += sc->s[i]->ntot;
 722             }
 723             else
 724             {
 725                 sc->ntot += sc->r[i].end - sc->r[i].start;
 726             }
 727         }
 728     }
 729 }
 730
 731
 732 /* find the barsamples_t associated with a lambda that corresponds to
 733    a specific foreign lambda */
 734 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 735                                                    lambda_vec_t  *foreign_lambda)
 736 {
 737     sample_coll_t *sc = l->sc->next;
 738
 739     while (sc != l->sc)
 740     {
 741         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 742         {
 743             return sc;
 744         }
 745         sc = sc->next;
 746     }
 747
 748     return NULL;
 749 }
 750
 751 /* insert li into an ordered list of lambda_colls */
 752 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 753 {
 754     sample_coll_t *scn = l->sc->next;
 755     while ( (scn != l->sc) )
 756     {
 757         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 758         {
 759             break;
 760         }
 761         scn = scn->next;
 762     }
 763     /* now insert it before the found scn */
 764     sc->next        = scn;
 765     sc->prev        = scn->prev;
 766     scn->prev->next = sc;
 767     scn->prev       = sc;
 768 }
 769
 770 /* insert li into an ordered list of lambdas */
 771 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 772 {
 773     lambda_data_t *lc = head->next;
 774     while (lc != head)
 775     {
 776         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 777         {
 778             break;
 779         }
 780         lc = lc->next;
 781     }
 782     /* now insert ourselves before the found lc */
 783     li->next       = lc;
 784     li->prev       = lc->prev;
 785     lc->prev->next = li;
 786     lc->prev       = li;
 787 }
 788
 789 /* insert a sample and a sample_range into a sample_coll. The
 790     samples are stored as a pointer, the range is copied. */
 791 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 792                                       sample_range_t *r)
 793 {
 794     /* first check if it belongs here */
 795     if (sc->temp != s->temp)
 796     {
 797         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 798                   s->filename, sc->next->s[0]->filename);
 799     }
 800     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 801     {
 802         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 803                   s->filename, sc->next->s[0]->filename);
 804     }
 805     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 806     {
 807         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 808                   s->filename, sc->next->s[0]->filename);
 809     }
 810
 811     /* check if there's room */
 812     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 813     {
 814         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 815         srenew(sc->s, sc->nsamples_alloc);
 816         srenew(sc->r, sc->nsamples_alloc);
 817     }
 818     sc->s[sc->nsamples] = s;
 819     sc->r[sc->nsamples] = *r;
 820     sc->nsamples++;
 821
 822     sample_coll_calc_ntot(sc);
 823 }
 824
 825 /* insert a sample into a lambda_list, creating the right sample_coll if
 826    neccesary */
 827 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 828 {
 829     gmx_bool       found = FALSE;
 830     sample_coll_t *sc;
 831     sample_range_t r;
 832
 833     lambda_data_t *l = head->next;
 834
 835     /* first search for the right lambda_data_t */
 836     while (l != head)
 837     {
 838         if (lambda_vec_same(l->lambda, s->native_lambda) )
 839         {
 840             found = TRUE;
 841             break;
 842         }
 843         l = l->next;
 844     }
 845
 846     if (!found)
 847     {
 848         snew(l, 1);                                     /* allocate a new one */
 849         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 850         lambda_data_insert_lambda(head, l);             /* add it to the list */
 851     }
 852
 853     /* now look for a sample collection */
 854     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 855     if (!sc)
 856     {
 857         snew(sc, 1); /* allocate a new one */
 858         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 859         lambda_data_insert_sample_coll(l, sc);
 860     }
 861
 862     /* now insert the samples into the sample coll */
 863     sample_range_init(&r, s);
 864     sample_coll_insert_sample(sc, s, &r);
 865 }
 866
 867
 868 /* make a histogram out of a sample collection */
 869 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 870                                   int *nbin_alloc, int *nbin,
 871                                   double *dx, double *xmin, int nbin_default)
 872 {
 873     int      i, j, k;
 874     gmx_bool dx_set   = FALSE;
 875     gmx_bool xmin_set = FALSE;
 876
 877     gmx_bool xmax_set      = FALSE;
 878     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 879                                        limits of a histogram */
 880     double   xmax          = -1;
 881
 882     /* first determine dx and xmin; try the histograms */
 883     for (i = 0; i < sc->nsamples; i++)
 884     {
 885         if (sc->s[i]->hist)
 886         {
 887             hist_t *hist = sc->s[i]->hist;
 888             for (k = 0; k < hist->nhist; k++)
 889             {
 890                 double hdx      = hist->dx[k];
 891                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 892
 893                 /* we use the biggest dx*/
 894                 if ( (!dx_set) || hist->dx[0] > *dx)
 895                 {
 896                     dx_set = TRUE;
 897                     *dx    = hist->dx[0];
 898                 }
 899                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 900                 {
 901                     xmin_set = TRUE;
 902                     *xmin    = (hist->x0[k]*hdx);
 903                 }
 904
 905                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 906                 {
 907                     xmax_set = TRUE;
 908                     xmax     = xmax_now;
 909                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 910                     {
 911                         xmax_set_hard = TRUE;
 912                     }
 913                 }
 914                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 915                 {
 916                     xmax_set_hard = TRUE;
 917                     xmax          = xmax_now;
 918                 }
 919             }
 920         }
 921     }
 922     /* and the delta us */
 923     for (i = 0; i < sc->nsamples; i++)
 924     {
 925         if (sc->s[i]->ndu > 0)
 926         {
 927             /* determine min and max */
 928             int    starti  = sc->r[i].start;
 929             int    endi    = sc->r[i].end;
 930             double du_xmin = sc->s[i]->du[starti];
 931             double du_xmax = sc->s[i]->du[starti];
 932             for (j = starti+1; j < endi; j++)
 933             {
 934                 if (sc->s[i]->du[j] < du_xmin)
 935                 {
 936                     du_xmin = sc->s[i]->du[j];
 937                 }
 938                 if (sc->s[i]->du[j] > du_xmax)
 939                 {
 940                     du_xmax = sc->s[i]->du[j];
 941                 }
 942             }
 943
 944             /* and now change the limits */
 945             if ( (!xmin_set) || (du_xmin < *xmin) )
 946             {
 947                 xmin_set = TRUE;
 948                 *xmin    = du_xmin;
 949             }
 950             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 951             {
 952                 xmax_set = TRUE;
 953                 xmax     = du_xmax;
 954             }
 955         }
 956     }
 957
 958     if (!xmax_set || !xmin_set)
 959     {
 960         *nbin = 0;
 961         return;
 962     }
 963
 964
 965     if (!dx_set)
 966     {
 967         *nbin = nbin_default;
 968         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 969                                                be 0, and we count from 0 */
 970     }
 971     else
 972     {
 973         *nbin = (xmax-(*xmin))/(*dx);
 974     }
 975
 976     if (*nbin > *nbin_alloc)
 977     {
 978         *nbin_alloc = *nbin;
 979         srenew(*bin, *nbin_alloc);
 980     }
 981
 982     /* reset the histogram */
 983     for (i = 0; i < (*nbin); i++)
 984     {
 985         (*bin)[i] = 0;
 986     }
 987
 988     /* now add the actual data */
 989     for (i = 0; i < sc->nsamples; i++)
 990     {
 991         if (sc->s[i]->hist)
 992         {
 993             hist_t *hist = sc->s[i]->hist;
 994             for (k = 0; k < hist->nhist; k++)
 995             {
 996                 double hdx       = hist->dx[k];
 997                 double xmin_hist = hist->x0[k]*hdx;
 998                 for (j = 0; j < hist->nbin[k]; j++)
 999                 {
1000                     /* calculate the bin corresponding to the middle of the
1001                        original bin */
1002                     double x     = hdx*(j+0.5) + xmin_hist;
1003                     int    binnr = (int)((x-(*xmin))/(*dx));
1004
1005                     if (binnr >= *nbin || binnr < 0)
1006                     {
1007                         binnr = (*nbin)-1;
1008                     }
1009
1010                     (*bin)[binnr] += hist->bin[k][j];
1011                 }
1012             }
1013         }
1014         else
1015         {
1016             int starti = sc->r[i].start;
1017             int endi   = sc->r[i].end;
1018             for (j = starti; j < endi; j++)
1019             {
1020                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1021                 if (binnr >= *nbin || binnr < 0)
1022                 {
1023                     binnr = (*nbin)-1;
1024                 }
1025
1026                 (*bin)[binnr]++;
1027             }
1028         }
1029     }
1030 }
1031
1032 /* write a collection of histograms to a file */
1033 void sim_data_histogram(sim_data_t *sd, const char *filename,
1034                         int nbin_default, const output_env_t oenv)
1035 {
1036     char           label_x[STRLEN];
1037     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1038     const char    *title   = "N(\\DeltaH)";
1039     const char    *label_y = "Samples";
1040     FILE          *fp;
1041     lambda_data_t *bl;
1042     int            nsets     = 0;
1043     char         **setnames  = NULL;
1044     gmx_bool       first_set = FALSE;
1045     /* histogram data: */
1046     int           *hist       = NULL;
1047     int            nbin       = 0;
1048     int            nbin_alloc = 0;
1049     double         dx         = 0;
1050     double         min        = 0;
1051     int            i;
1052     lambda_data_t *bl_head = sd->lb;
1053
1054     printf("\nWriting histogram to %s\n", filename);
1055     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1056
1057     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1058
1059     /* first get all the set names */
1060     bl = bl_head->next;
1061     /* iterate over all lambdas */
1062     while (bl != bl_head)
1063     {
1064         sample_coll_t *sc = bl->sc->next;
1065
1066         /* iterate over all samples */
1067         while (sc != bl->sc)
1068         {
1069             char buf[STRLEN], buf2[STRLEN];
1070
1071             nsets++;
1072             srenew(setnames, nsets);
1073             snew(setnames[nsets-1], STRLEN);
1074             if (sc->foreign_lambda->dhdl < 0)
1075             {
1076                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1077                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1078                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1079                         deltag, lambda, buf2, lambda, buf);
1080             }
1081             else
1082             {
1083                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1084                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1085                         dhdl, lambda, buf);
1086             }
1087             sc = sc->next;
1088         }
1089
1090         bl = bl->next;
1091     }
1092     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1093
1094
1095     /* now make the histograms */
1096     bl = bl_head->next;
1097     /* iterate over all lambdas */
1098     while (bl != bl_head)
1099     {
1100         sample_coll_t *sc = bl->sc->next;
1101
1102         /* iterate over all samples */
1103         while (sc != bl->sc)
1104         {
1105             if (!first_set)
1106             {
1107                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1108             }
1109
1110             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1111                                   nbin_default);
1112
1113             for (i = 0; i < nbin; i++)
1114             {
1115                 double xmin = i*dx + min;
1116                 double xmax = (i+1)*dx + min;
1117
1118                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1119             }
1120
1121             first_set = FALSE;
1122             sc        = sc->next;
1123         }
1124
1125         bl = bl->next;
1126     }
1127
1128     if (hist)
1129     {
1130         sfree(hist);
1131     }
1132
1133     xvgrclose(fp);
1134 }
1135
1136 /* create a collection (array) of barres_t object given a ordered linked list
1137    of barlamda_t sample collections */
1138 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1139                                     gmx_bool use_dhdl)
1140 {
1141     lambda_data_t *bl;
1142     int            nlambda = 0;
1143     barres_t      *res;
1144     int            i;
1145     gmx_bool       dhdl    = FALSE;
1146     gmx_bool       first   = TRUE;
1147     lambda_data_t *bl_head = sd->lb;
1148
1149     /* first count the lambdas */
1150     bl = bl_head->next;
1151     while (bl != bl_head)
1152     {
1153         nlambda++;
1154         bl = bl->next;
1155     }
1156     snew(res, nlambda-1);
1157
1158     /* next put the right samples in the res */
1159     *nres = 0;
1160     bl    = bl_head->next->next; /* we start with the second one. */
1161     while (bl != bl_head)
1162     {
1163         sample_coll_t *sc, *scprev;
1164         barres_t      *br = &(res[*nres]);
1165         /* there is always a previous one. we search for that as a foreign
1166            lambda: */
1167         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1168         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1169
1170         barres_init(br);
1171
1172         if (use_dhdl)
1173         {
1174             /* we use dhdl */
1175
1176             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1177             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1178
1179             if (first)
1180             {
1181                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1182                 dhdl = TRUE;
1183             }
1184             if (!dhdl)
1185             {
1186                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1187             }
1188         }
1189         else if (!scprev && !sc)
1190         {
1191             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1192         }
1193
1194         /* normal delta H */
1195         if (!scprev)
1196         {
1197             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1198         }
1199         if (!sc)
1200         {
1201             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1202         }
1203         br->a = scprev;
1204         br->b = sc;
1205
1206         first = FALSE;
1207         (*nres)++;
1208         bl = bl->next;
1209     }
1210     return res;
1211 }
1212
1213 /* estimate the maximum discretization error */
1214 static double barres_list_max_disc_err(barres_t *res, int nres)
1215 {
1216     int    i, j;
1217     double disc_err = 0.;
1218     double delta_lambda;
1219
1220     for (i = 0; i < nres; i++)
1221     {
1222         barres_t *br = &(res[i]);
1223
1224         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1225                                            br->a->native_lambda);
1226
1227         for (j = 0; j < br->a->nsamples; j++)
1228         {
1229             if (br->a->s[j]->hist)
1230             {
1231                 double Wfac = 1.;
1232                 if (br->a->s[j]->derivative)
1233                 {
1234                     Wfac =  delta_lambda;
1235                 }
1236
1237                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1238             }
1239         }
1240         for (j = 0; j < br->b->nsamples; j++)
1241         {
1242             if (br->b->s[j]->hist)
1243             {
1244                 double Wfac = 1.;
1245                 if (br->b->s[j]->derivative)
1246                 {
1247                     Wfac =  delta_lambda;
1248                 }
1249                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1250             }
1251         }
1252     }
1253     return disc_err;
1254 }
1255
1256
1257 /* impose start and end times on a sample collection, updating sample_ranges */
1258 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1259                                      double end_t)
1260 {
1261     int i;
1262     for (i = 0; i < sc->nsamples; i++)
1263     {
1264         samples_t      *s = sc->s[i];
1265         sample_range_t *r = &(sc->r[i]);
1266         if (s->hist)
1267         {
1268             double end_time = s->hist->delta_time*s->hist->sum +
1269                 s->hist->start_time;
1270             if (s->hist->start_time < begin_t || end_time > end_t)
1271             {
1272                 r->use = FALSE;
1273             }
1274         }
1275         else
1276         {
1277             if (!s->t)
1278             {
1279                 double end_time;
1280                 if (s->start_time < begin_t)
1281                 {
1282                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1283                 }
1284                 end_time = s->delta_time*s->ndu + s->start_time;
1285                 if (end_time > end_t)
1286                 {
1287                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1288                 }
1289             }
1290             else
1291             {
1292                 int j;
1293                 for (j = 0; j < s->ndu; j++)
1294                 {
1295                     if (s->t[j] < begin_t)
1296                     {
1297                         r->start = j;
1298                     }
1299
1300                     if (s->t[j] >= end_t)
1301                     {
1302                         r->end = j;
1303                         break;
1304                     }
1305                 }
1306             }
1307             if (r->start > r->end)
1308             {
1309                 r->use = FALSE;
1310             }
1311         }
1312     }
1313     sample_coll_calc_ntot(sc);
1314 }
1315
1316 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1317 {
1318     double         first_t, last_t;
1319     double         begin_t, end_t;
1320     lambda_data_t *lc;
1321     lambda_data_t *head = sd->lb;
1322     int            j;
1323
1324     if (begin <= 0 && end < 0)
1325     {
1326         return;
1327     }
1328
1329     /* first determine the global start and end times */
1330     first_t = -1;
1331     last_t  = -1;
1332     lc      = head->next;
1333     while (lc != head)
1334     {
1335         sample_coll_t *sc = lc->sc->next;
1336         while (sc != lc->sc)
1337         {
1338             for (j = 0; j < sc->nsamples; j++)
1339             {
1340                 double start_t, end_t;
1341
1342                 start_t = sc->s[j]->start_time;
1343                 end_t   =   sc->s[j]->start_time;
1344                 if (sc->s[j]->hist)
1345                 {
1346                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1347                 }
1348                 else
1349                 {
1350                     if (sc->s[j]->t)
1351                     {
1352                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1353                     }
1354                     else
1355                     {
1356                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1357                     }
1358                 }
1359
1360                 if (start_t < first_t || first_t < 0)
1361                 {
1362                     first_t = start_t;
1363                 }
1364                 if (end_t > last_t)
1365                 {
1366                     last_t = end_t;
1367                 }
1368             }
1369             sc = sc->next;
1370         }
1371         lc = lc->next;
1372     }
1373
1374     /* calculate the actual times */
1375     if (begin > 0)
1376     {
1377         begin_t = begin;
1378     }
1379     else
1380     {
1381         begin_t = first_t;
1382     }
1383
1384     if (end > 0)
1385     {
1386         end_t = end;
1387     }
1388     else
1389     {
1390         end_t = last_t;
1391     }
1392     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1393
1394     if (begin_t > end_t)
1395     {
1396         return;
1397     }
1398     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1399
1400     /* then impose them */
1401     lc = head->next;
1402     while (lc != head)
1403     {
1404         sample_coll_t *sc = lc->sc->next;
1405         while (sc != lc->sc)
1406         {
1407             sample_coll_impose_times(sc, begin_t, end_t);
1408             sc = sc->next;
1409         }
1410         lc = lc->next;
1411     }
1412 }
1413
1414
1415 /* create subsample i out of ni from an existing sample_coll */
1416 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1417                                              sample_coll_t *sc_orig,
1418                                              int i, int ni)
1419 {
1420     int             j;
1421     int             hist_start, hist_end;
1422
1423     gmx_large_int_t ntot_start;
1424     gmx_large_int_t ntot_end;
1425     gmx_large_int_t ntot_so_far;
1426
1427     *sc = *sc_orig; /* just copy all fields */
1428
1429     /* allocate proprietary memory */
1430     snew(sc->s, sc_orig->nsamples);
1431     snew(sc->r, sc_orig->nsamples);
1432
1433     /* copy the samples */
1434     for (j = 0; j < sc_orig->nsamples; j++)
1435     {
1436         sc->s[j] = sc_orig->s[j];
1437         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1438     }
1439
1440     /* now fix start and end fields */
1441     /* the casts avoid possible overflows */
1442     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1443     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1444     ntot_so_far = 0;
1445     for (j = 0; j < sc->nsamples; j++)
1446     {
1447         gmx_large_int_t ntot_add;
1448         gmx_large_int_t new_start, new_end;
1449
1450         if (sc->r[j].use)
1451         {
1452             if (sc->s[j]->hist)
1453             {
1454                 ntot_add = sc->s[j]->hist->sum;
1455             }
1456             else
1457             {
1458                 ntot_add = sc->r[j].end - sc->r[j].start;
1459             }
1460         }
1461         else
1462         {
1463             ntot_add = 0;
1464         }
1465
1466         if (!sc->s[j]->hist)
1467         {
1468             if (ntot_so_far < ntot_start)
1469             {
1470                 /* adjust starting point */
1471                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1472             }
1473             else
1474             {
1475                 new_start = sc->r[j].start;
1476             }
1477             /* adjust end point */
1478             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1479             if (new_end > sc->r[j].end)
1480             {
1481                 new_end = sc->r[j].end;
1482             }
1483
1484             /* check if we're in range at all */
1485             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1486             {
1487                 new_start = 0;
1488                 new_end   = 0;
1489             }
1490             /* and write the new range */
1491             sc->r[j].start = (int)new_start;
1492             sc->r[j].end   = (int)new_end;
1493         }
1494         else
1495         {
1496             if (sc->r[j].use)
1497             {
1498                 double overlap;
1499                 double ntot_start_norm, ntot_end_norm;
1500                 /* calculate the amount of overlap of the
1501                    desired range (ntot_start -- ntot_end) onto
1502                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1503
1504                 /* first calculate normalized bounds
1505                    (where 0 is the start of the hist range, and 1 the end) */
1506                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1507                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1508
1509                 /* now fix the boundaries */
1510                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1511                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1512
1513                 /* and calculate the overlap */
1514                 overlap = ntot_end_norm - ntot_start_norm;
1515
1516                 if (overlap > 0.95) /* we allow for 5% slack */
1517                 {
1518                     sc->r[j].use = TRUE;
1519                 }
1520                 else if (overlap < 0.05)
1521                 {
1522                     sc->r[j].use = FALSE;
1523                 }
1524                 else
1525                 {
1526                     return FALSE;
1527                 }
1528             }
1529         }
1530         ntot_so_far += ntot_add;
1531     }
1532     sample_coll_calc_ntot(sc);
1533
1534     return TRUE;
1535 }
1536
1537 /* calculate minimum and maximum work values in sample collection */
1538 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1539                                 double *Wmin, double *Wmax)
1540 {
1541     int i, j;
1542
1543     *Wmin = FLT_MAX;
1544     *Wmax = -FLT_MAX;
1545
1546     for (i = 0; i < sc->nsamples; i++)
1547     {
1548         samples_t      *s = sc->s[i];
1549         sample_range_t *r = &(sc->r[i]);
1550         if (r->use)
1551         {
1552             if (!s->hist)
1553             {
1554                 for (j = r->start; j < r->end; j++)
1555                 {
1556                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1557                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1558                 }
1559             }
1560             else
1561             {
1562                 int    hd = 0; /* determine the histogram direction: */
1563                 double dx;
1564                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1565                 {
1566                     hd = 1;
1567                 }
1568                 dx = s->hist->dx[hd];
1569
1570                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1571                 {
1572                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1573                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1574                     /* look for the highest value bin with values */
1575                     if (s->hist->bin[hd][j] > 0)
1576                     {
1577                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1578                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1579                         break;
1580                     }
1581                 }
1582             }
1583         }
1584     }
1585 }
1586
1587 /* Initialize a sim_data structure */
1588 static void sim_data_init(sim_data_t *sd)
1589 {
1590     /* make linked list */
1591     sd->lb       = &(sd->lb_head);
1592     sd->lb->next = sd->lb;
1593     sd->lb->prev = sd->lb;
1594
1595     lambda_components_init(&(sd->lc));
1596 }
1597
1598
1599 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1600 {
1601     int    i;
1602     double sum;
1603
1604     sum = 0;
1605
1606     for (i = 0; i < n; i++)
1607     {
1608         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1609     }
1610
1611     return sum;
1612 }
1613
1614 /* calculate the BAR average given a histogram
1615
1616     if type== 0, calculate the best estimate for the average,
1617     if type==-1, calculate the minimum possible value given the histogram
1618     if type== 1, calculate the maximum possible value given the histogram */
1619 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1620                                 int type)
1621 {
1622     double sum = 0.;
1623     int    i;
1624     int    max;
1625     /* normalization factor multiplied with bin width and
1626        number of samples (we normalize through M): */
1627     double normdx = 1.;
1628     int    hd     = 0; /* determine the histogram direction: */
1629     double dx;
1630
1631     if ( (hist->nhist > 1) && (Wfac < 0) )
1632     {
1633         hd = 1;
1634     }
1635     dx  = hist->dx[hd];
1636     max = hist->nbin[hd]-1;
1637     if (type == 1)
1638     {
1639         max = hist->nbin[hd]; /* we also add whatever was out of range */
1640     }
1641
1642     for (i = 0; i < max; i++)
1643     {
1644         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1645         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1646
1647         sum += pxdx/(1. + exp(x + sbMmDG));
1648     }
1649
1650     return sum;
1651 }
1652
1653 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1654                                 double temp, double tol, int type)
1655 {
1656     double kT, beta, M;
1657     double DG;
1658     int    i, j;
1659     double Wfac1, Wfac2, Wmin, Wmax;
1660     double DG0, DG1, DG2, dDG1;
1661     double sum1, sum2;
1662     double n1, n2; /* numbers of samples as doubles */
1663
1664     kT   = BOLTZ*temp;
1665     beta = 1/kT;
1666
1667     /* count the numbers of samples */
1668     n1 = ca->ntot;
1669     n2 = cb->ntot;
1670
1671     M = log(n1/n2);
1672
1673     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1674     if (ca->foreign_lambda->dhdl < 0)
1675     {
1676         /* this is the case when the delta U were calculated directly
1677            (i.e. we're not scaling dhdl) */
1678         Wfac1 = beta;
1679         Wfac2 = beta;
1680     }
1681     else
1682     {
1683         /* we're using dhdl, so delta_lambda needs to be a
1684            multiplication factor.  */
1685         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1686         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1687                                                   ca->native_lambda);
1688         if (cb->native_lambda->lc->N > 1)
1689         {
1690             gmx_fatal(FARGS,
1691                       "Can't (yet) do multi-component dhdl interpolation");
1692         }
1693
1694         Wfac1 =  beta*delta_lambda;
1695         Wfac2 = -beta*delta_lambda;
1696     }
1697
1698     if (beta < 1)
1699     {
1700         /* We print the output both in kT and kJ/mol.
1701          * Here we determine DG in kT, so when beta < 1
1702          * the precision has to be increased.
1703          */
1704         tol *= beta;
1705     }
1706
1707     /* Calculate minimum and maximum work to give an initial estimate of
1708      * delta G  as their average.
1709      */
1710     {
1711         double Wmin1, Wmin2, Wmax1, Wmax2;
1712         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1713         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1714
1715         Wmin = min(Wmin1, Wmin2);
1716         Wmax = max(Wmax1, Wmax2);
1717     }
1718
1719     DG0 = Wmin;
1720     DG2 = Wmax;
1721
1722     if (debug)
1723     {
1724         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1725     }
1726     /* We approximate by bisection: given our initial estimates
1727        we keep checking whether the halfway point is greater or
1728        smaller than what we get out of the BAR averages.
1729
1730        For the comparison we can use twice the tolerance. */
1731     while (DG2 - DG0 > 2*tol)
1732     {
1733         DG1 = 0.5*(DG0 + DG2);
1734
1735         /* calculate the BAR averages */
1736         dDG1 = 0.;
1737
1738         for (i = 0; i < ca->nsamples; i++)
1739         {
1740             samples_t      *s = ca->s[i];
1741             sample_range_t *r = &(ca->r[i]);
1742             if (r->use)
1743             {
1744                 if (s->hist)
1745                 {
1746                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1747                 }
1748                 else
1749                 {
1750                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1751                                          Wfac1, (M-DG1));
1752                 }
1753             }
1754         }
1755         for (i = 0; i < cb->nsamples; i++)
1756         {
1757             samples_t      *s = cb->s[i];
1758             sample_range_t *r = &(cb->r[i]);
1759             if (r->use)
1760             {
1761                 if (s->hist)
1762                 {
1763                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1764                 }
1765                 else
1766                 {
1767                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1768                                          Wfac2, -(M-DG1));
1769                 }
1770             }
1771         }
1772
1773         if (dDG1 < 0)
1774         {
1775             DG0 = DG1;
1776         }
1777         else
1778         {
1779             DG2 = DG1;
1780         }
1781         if (debug)
1782         {
1783             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1784         }
1785     }
1786
1787     return 0.5*(DG0 + DG2);
1788 }
1789
1790 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1791                              double temp, double dg, double *sa, double *sb)
1792 {
1793     int    i, j;
1794     double W_ab = 0.;
1795     double W_ba = 0.;
1796     double kT, beta;
1797     double Wfac1, Wfac2;
1798     double n1, n2;
1799
1800     kT   = BOLTZ*temp;
1801     beta = 1/kT;
1802
1803     /* count the numbers of samples */
1804     n1 = ca->ntot;
1805     n2 = cb->ntot;
1806
1807     /* to ensure the work values are the same as during the delta_G */
1808     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1809     if (ca->foreign_lambda->dhdl < 0)
1810     {
1811         /* this is the case when the delta U were calculated directly
1812            (i.e. we're not scaling dhdl) */
1813         Wfac1 = beta;
1814         Wfac2 = beta;
1815     }
1816     else
1817     {
1818         /* we're using dhdl, so delta_lambda needs to be a
1819            multiplication factor.  */
1820         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1821                                                   ca->native_lambda);
1822         Wfac1 =  beta*delta_lambda;
1823         Wfac2 = -beta*delta_lambda;
1824     }
1825
1826     /* first calculate the average work in both directions */
1827     for (i = 0; i < ca->nsamples; i++)
1828     {
1829         samples_t      *s = ca->s[i];
1830         sample_range_t *r = &(ca->r[i]);
1831         if (r->use)
1832         {
1833             if (!s->hist)
1834             {
1835                 for (j = r->start; j < r->end; j++)
1836                 {
1837                     W_ab += Wfac1*s->du[j];
1838                 }
1839             }
1840             else
1841             {
1842                 /* normalization factor multiplied with bin width and
1843                    number of samples (we normalize through M): */
1844                 double normdx = 1.;
1845                 double dx;
1846                 int    hd = 0; /* histogram direction */
1847                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1848                 {
1849                     hd = 1;
1850                 }
1851                 dx = s->hist->dx[hd];
1852
1853                 for (j = 0; j < s->hist->nbin[0]; j++)
1854                 {
1855                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1856                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1857                     W_ab += pxdx*x;
1858                 }
1859             }
1860         }
1861     }
1862     W_ab /= n1;
1863
1864     for (i = 0; i < cb->nsamples; i++)
1865     {
1866         samples_t      *s = cb->s[i];
1867         sample_range_t *r = &(cb->r[i]);
1868         if (r->use)
1869         {
1870             if (!s->hist)
1871             {
1872                 for (j = r->start; j < r->end; j++)
1873                 {
1874                     W_ba += Wfac1*s->du[j];
1875                 }
1876             }
1877             else
1878             {
1879                 /* normalization factor multiplied with bin width and
1880                    number of samples (we normalize through M): */
1881                 double normdx = 1.;
1882                 double dx;
1883                 int    hd = 0; /* histogram direction */
1884                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1885                 {
1886                     hd = 1;
1887                 }
1888                 dx = s->hist->dx[hd];
1889
1890                 for (j = 0; j < s->hist->nbin[0]; j++)
1891                 {
1892                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1893                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1894                     W_ba += pxdx*x;
1895                 }
1896             }
1897         }
1898     }
1899     W_ba /= n2;
1900
1901     /* then calculate the relative entropies */
1902     *sa = (W_ab - dg);
1903     *sb = (W_ba + dg);
1904 }
1905
1906 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1907                            double temp, double dg, double *stddev)
1908 {
1909     int    i, j;
1910     double M;
1911     double sigmafact = 0.;
1912     double kT, beta;
1913     double Wfac1, Wfac2;
1914     double n1, n2;
1915
1916     kT   = BOLTZ*temp;
1917     beta = 1/kT;
1918
1919     /* count the numbers of samples */
1920     n1 = ca->ntot;
1921     n2 = cb->ntot;
1922
1923     /* to ensure the work values are the same as during the delta_G */
1924     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1925     if (ca->foreign_lambda->dhdl < 0)
1926     {
1927         /* this is the case when the delta U were calculated directly
1928            (i.e. we're not scaling dhdl) */
1929         Wfac1 = beta;
1930         Wfac2 = beta;
1931     }
1932     else
1933     {
1934         /* we're using dhdl, so delta_lambda needs to be a
1935            multiplication factor.  */
1936         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1937                                                   ca->native_lambda);
1938         Wfac1 =  beta*delta_lambda;
1939         Wfac2 = -beta*delta_lambda;
1940     }
1941
1942     M = log(n1/n2);
1943
1944
1945     /* calculate average in both directions */
1946     for (i = 0; i < ca->nsamples; i++)
1947     {
1948         samples_t      *s = ca->s[i];
1949         sample_range_t *r = &(ca->r[i]);
1950         if (r->use)
1951         {
1952             if (!s->hist)
1953             {
1954                 for (j = r->start; j < r->end; j++)
1955                 {
1956                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1957                 }
1958             }
1959             else
1960             {
1961                 /* normalization factor multiplied with bin width and
1962                    number of samples (we normalize through M): */
1963                 double normdx = 1.;
1964                 double dx;
1965                 int    hd = 0; /* histogram direction */
1966                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1967                 {
1968                     hd = 1;
1969                 }
1970                 dx = s->hist->dx[hd];
1971
1972                 for (j = 0; j < s->hist->nbin[0]; j++)
1973                 {
1974                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1975                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1976
1977                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1978                 }
1979             }
1980         }
1981     }
1982     for (i = 0; i < cb->nsamples; i++)
1983     {
1984         samples_t      *s = cb->s[i];
1985         sample_range_t *r = &(cb->r[i]);
1986         if (r->use)
1987         {
1988             if (!s->hist)
1989             {
1990                 for (j = r->start; j < r->end; j++)
1991                 {
1992                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1993                 }
1994             }
1995             else
1996             {
1997                 /* normalization factor multiplied with bin width and
1998                    number of samples (we normalize through M): */
1999                 double normdx = 1.;
2000                 double dx;
2001                 int    hd = 0; /* histogram direction */
2002                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2003                 {
2004                     hd = 1;
2005                 }
2006                 dx = s->hist->dx[hd];
2007
2008                 for (j = 0; j < s->hist->nbin[0]; j++)
2009                 {
2010                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2011                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2012
2013                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2014                 }
2015             }
2016         }
2017     }
2018
2019     sigmafact /= (n1 + n2);
2020
2021
2022     /* Eq. 10 from
2023        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2024     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2025 }
2026
2027
2028
2029 static void calc_bar(barres_t *br, double tol,
2030                      int npee_min, int npee_max, gmx_bool *bEE,
2031                      double *partsum)
2032 {
2033     int      npee, p;
2034     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2035                                                         for calculated quantities */
2036     int      nsample1, nsample2;
2037     double   temp = br->a->temp;
2038     int      i, j;
2039     double   dg_min, dg_max;
2040     gmx_bool have_hist = FALSE;
2041
2042     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2043
2044     br->dg_disc_err      = 0.;
2045     br->dg_histrange_err = 0.;
2046
2047     /* check if there are histograms */
2048     for (i = 0; i < br->a->nsamples; i++)
2049     {
2050         if (br->a->r[i].use && br->a->s[i]->hist)
2051         {
2052             have_hist = TRUE;
2053             break;
2054         }
2055     }
2056     if (!have_hist)
2057     {
2058         for (i = 0; i < br->b->nsamples; i++)
2059         {
2060             if (br->b->r[i].use && br->b->s[i]->hist)
2061             {
2062                 have_hist = TRUE;
2063                 break;
2064             }
2065         }
2066     }
2067
2068     /* calculate histogram-specific errors */
2069     if (have_hist)
2070     {
2071         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2072         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2073
2074         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2075         {
2076             /* the histogram range  error is the biggest of the differences
2077                between the best estimate and the extremes */
2078             br->dg_histrange_err = fabs(dg_max - dg_min);
2079         }
2080         br->dg_disc_err = 0.;
2081         for (i = 0; i < br->a->nsamples; i++)
2082         {
2083             if (br->a->s[i]->hist)
2084             {
2085                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2086             }
2087         }
2088         for (i = 0; i < br->b->nsamples; i++)
2089         {
2090             if (br->b->s[i]->hist)
2091             {
2092                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2093             }
2094         }
2095     }
2096     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2097
2098     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2099
2100     dg_sig2     = 0;
2101     sa_sig2     = 0;
2102     sb_sig2     = 0;
2103     stddev_sig2 = 0;
2104
2105     *bEE = TRUE;
2106     {
2107         sample_coll_t ca, cb;
2108
2109         /* initialize the samples */
2110         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2111                          br->a->temp);
2112         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2113                          br->b->temp);
2114
2115         for (npee = npee_min; npee <= npee_max; npee++)
2116         {
2117             double dgs      = 0;
2118             double dgs2     = 0;
2119             double dsa      = 0;
2120             double dsb      = 0;
2121             double dsa2     = 0;
2122             double dsb2     = 0;
2123             double dstddev  = 0;
2124             double dstddev2 = 0;
2125
2126
2127             for (p = 0; p < npee; p++)
2128             {
2129                 double   dgp;
2130                 double   stddevc;
2131                 double   sac, sbc;
2132                 gmx_bool cac, cbc;
2133
2134                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2135                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2136
2137                 if (!cac || !cbc)
2138                 {
2139                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2140                     *bEE = FALSE;
2141                     if (cac)
2142                     {
2143                         sample_coll_destroy(&ca);
2144                     }
2145                     if (cbc)
2146                     {
2147                         sample_coll_destroy(&cb);
2148                     }
2149                     return;
2150                 }
2151
2152                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2153                 dgs  += dgp;
2154                 dgs2 += dgp*dgp;
2155
2156                 partsum[npee*(npee_max+1)+p] += dgp;
2157
2158                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2159                 dsa  += sac;
2160                 dsa2 += sac*sac;
2161                 dsb  += sbc;
2162                 dsb2 += sbc*sbc;
2163                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2164
2165                 dstddev  += stddevc;
2166                 dstddev2 += stddevc*stddevc;
2167
2168                 sample_coll_destroy(&ca);
2169                 sample_coll_destroy(&cb);
2170             }
2171             dgs     /= npee;
2172             dgs2    /= npee;
2173             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2174
2175             dsa     /= npee;
2176             dsa2    /= npee;
2177             dsb     /= npee;
2178             dsb2    /= npee;
2179             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2180             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2181
2182             dstddev     /= npee;
2183             dstddev2    /= npee;
2184             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2185         }
2186         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2187         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2188         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2189         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2190     }
2191 }
2192
2193
2194 static double bar_err(int nbmin, int nbmax, const double *partsum)
2195 {
2196     int    nb, b;
2197     double svar, s, s2, dg;
2198
2199     svar = 0;
2200     for (nb = nbmin; nb <= nbmax; nb++)
2201     {
2202         s  = 0;
2203         s2 = 0;
2204         for (b = 0; b < nb; b++)
2205         {
2206             dg  = partsum[nb*(nbmax+1)+b];
2207             s  += dg;
2208             s2 += dg*dg;
2209         }
2210         s    /= nb;
2211         s2   /= nb;
2212         svar += (s2 - s*s)/(nb - 1);
2213     }
2214
2215     return sqrt(svar/(nbmax + 1 - nbmin));
2216 }
2217
2218
2219 /* Seek the end of an identifier (consecutive non-spaces), followed by
2220    an optional number of spaces or '='-signs. Returns a pointer to the
2221    first non-space value found after that. Returns NULL if the string
2222    ends before that.
2223  */
2224 static const char *find_value(const char *str)
2225 {
2226     gmx_bool name_end_found = FALSE;
2227
2228     /* if the string is a NULL pointer, return a NULL pointer. */
2229     if (str == NULL)
2230     {
2231         return NULL;
2232     }
2233     while (*str != '\0')
2234     {
2235         /* first find the end of the name */
2236         if (!name_end_found)
2237         {
2238             if (isspace(*str) || (*str == '=') )
2239             {
2240                 name_end_found = TRUE;
2241             }
2242         }
2243         else
2244         {
2245             if (!( isspace(*str) || (*str == '=') ))
2246             {
2247                 return str;
2248             }
2249         }
2250         str++;
2251     }
2252     return NULL;
2253 }
2254
2255
2256
2257 /* read a vector-notation description of a lambda vector */
2258 static gmx_bool read_lambda_compvec(const char                *str,
2259                                     lambda_vec_t              *lv,
2260                                     const lambda_components_t *lc_in,
2261                                     lambda_components_t       *lc_out,
2262                                     const char               **end,
2263                                     const char                *fn)
2264 {
2265     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2266                                           components, or to check them */
2267     gmx_bool    start_reached = FALSE; /* whether the start of component names
2268                                           has been reached */
2269     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2270     int         n             = 0;     /* current component number */
2271     const char *val_start     = NULL;  /* start of the component name, or NULL
2272                                           if not in a value */
2273     char       *strtod_end;
2274     gmx_bool    OK = TRUE;
2275
2276     if (end)
2277     {
2278         *end = str;
2279     }
2280
2281
2282     if (lc_out && lc_out->N == 0)
2283     {
2284         initialize_lc = TRUE;
2285     }
2286
2287     if (lc_in == NULL)
2288     {
2289         lc_in = lc_out;
2290     }
2291
2292     while (1)
2293     {
2294         if (!start_reached)
2295         {
2296             if (isalnum(*str))
2297             {
2298                 vector        = FALSE;
2299                 start_reached = TRUE;
2300                 val_start     = str;
2301             }
2302             else if (*str == '(')
2303             {
2304                 vector        = TRUE;
2305                 start_reached = TRUE;
2306             }
2307             else if (!isspace(*str))
2308             {
2309                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2310             }
2311         }
2312         else
2313         {
2314             if (val_start)
2315             {
2316                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2317                 {
2318                     /* end of value */
2319                     if (lv == NULL)
2320                     {
2321                         if (initialize_lc)
2322                         {
2323                             lambda_components_add(lc_out, val_start,
2324                                                   (str-val_start));
2325                         }
2326                         else
2327                         {
2328                             if (!lambda_components_check(lc_out, n, val_start,
2329                                                          (str-val_start)))
2330                             {
2331                                 return FALSE;
2332                             }
2333                         }
2334                     }
2335                     else
2336                     {
2337                         /* add a vector component to lv */
2338                         lv->val[n] = strtod(val_start, &strtod_end);
2339                         if (val_start == strtod_end)
2340                         {
2341                             gmx_fatal(FARGS,
2342                                       "Error reading lambda vector in %s", fn);
2343                         }
2344                     }
2345                     /* reset for the next identifier */
2346                     val_start = NULL;
2347                     n++;
2348                     if (!vector)
2349                     {
2350                         return OK;
2351                     }
2352                 }
2353             }
2354             else if (isalnum(*str))
2355             {
2356                 val_start = str;
2357             }
2358             if (*str == ')')
2359             {
2360                 str++;
2361                 if (end)
2362                 {
2363                     *end = str;
2364                 }
2365                 if (!vector)
2366                 {
2367                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2368                 }
2369                 else
2370                 {
2371                     if (n == lc_in->N)
2372                     {
2373                         return OK;
2374                     }
2375                     else if (lv == NULL)
2376                     {
2377                         return FALSE;
2378                     }
2379                     else
2380                     {
2381                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2382                                   fn);
2383                         return FALSE;
2384                     }
2385
2386                 }
2387             }
2388         }
2389         if (*str == '\0')
2390         {
2391             break;
2392         }
2393         str++;
2394         if (end)
2395         {
2396             *end = str;
2397         }
2398     }
2399     if (vector)
2400     {
2401         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2402         return FALSE;
2403     }
2404     return OK;
2405 }
2406
2407 /* read and check the component names from a string */
2408 static gmx_bool read_lambda_components(const char          *str,
2409                                        lambda_components_t *lc,
2410                                        const char         **end,
2411                                        const char          *fn)
2412 {
2413     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2414 }
2415
2416 /* read an initialized lambda vector from a string */
2417 static gmx_bool read_lambda_vector(const char   *str,
2418                                    lambda_vec_t *lv,
2419                                    const char  **end,
2420                                    const char   *fn)
2421 {
2422     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2423 }
2424
2425
2426
2427 /* deduce lambda value from legend.
2428     fn = the file name
2429     legend = the legend string
2430     ba = the xvg data
2431     lam = the initialized lambda vector
2432    returns whether to use the data in this set.
2433  */
2434 static gmx_bool legend2lambda(const char   *fn,
2435                               const char   *legend,
2436                               lambda_vec_t *lam)
2437 {
2438     double        lambda = 0;
2439     const char   *ptr    = NULL, *ptr2 = NULL;
2440     gmx_bool      ok     = FALSE;
2441     gmx_bool      bdhdl  = FALSE;
2442     const char   *tostr  = " to ";
2443
2444     if (legend == NULL)
2445     {
2446         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2447     }
2448
2449     /* look for the last 'to': */
2450     ptr2 = legend;
2451     do
2452     {
2453         ptr2 = strstr(ptr2, tostr);
2454         if (ptr2 != NULL)
2455         {
2456             ptr = ptr2;
2457             ptr2++;
2458         }
2459     }
2460     while (ptr2 != NULL && *ptr2 != '\0');
2461
2462     if (ptr)
2463     {
2464         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2465     }
2466     else
2467     {
2468         /* look for the = sign */
2469         ptr = strrchr(legend, '=');
2470         if (!ptr)
2471         {
2472             /* otherwise look for the last space */
2473             ptr = strrchr(legend, ' ');
2474         }
2475     }
2476
2477     if (strstr(legend, "dH"))
2478     {
2479         ok    = TRUE;
2480         bdhdl = TRUE;
2481     }
2482     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2483     {
2484         ok    = TRUE;
2485         bdhdl = FALSE;
2486     }
2487     else /*if (strstr(legend, "pV"))*/
2488     {
2489         return FALSE;
2490     }
2491     if (!ptr)
2492     {
2493         ok = FALSE;
2494     }
2495
2496     if (!ok)
2497     {
2498         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2499     }
2500     if (!bdhdl)
2501     {
2502         ptr = find_value(ptr);
2503         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2504         {
2505             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2506         }
2507     }
2508     else
2509     {
2510         int         dhdl_index;
2511         const char *end;
2512         char        buf[STRLEN];
2513
2514         ptr = strrchr(legend, '=');
2515         end = ptr;
2516         if (ptr)
2517         {
2518             /* there must be a component name */
2519             ptr--;
2520             if (ptr < legend)
2521             {
2522                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2523             }
2524             /* now backtrack to the start of the identifier */
2525             while (isspace(*ptr))
2526             {
2527                 end = ptr;
2528                 ptr--;
2529                 if (ptr < legend)
2530                 {
2531                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2532                 }
2533             }
2534             while (!isspace(*ptr))
2535             {
2536                 ptr--;
2537                 if (ptr < legend)
2538                 {
2539                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2540                 }
2541             }
2542             ptr++;
2543             strncpy(buf, ptr, (end-ptr));
2544             buf[(end-ptr)] = '\0';
2545             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2546             if (dhdl_index < 0)
2547             {
2548                 char buf[STRLEN];
2549                 strncpy(buf, ptr, (end-ptr));
2550                 buf[(end-ptr)] = '\0';
2551                 gmx_fatal(FARGS,
2552                           "Did not find lambda component for '%s' in %s",
2553                           buf, fn);
2554             }
2555         }
2556         else
2557         {
2558             if (lam->lc->N > 1)
2559             {
2560                 gmx_fatal(FARGS,
2561                           "dhdl without component name with >1 lambda component in %s",
2562                           fn);
2563             }
2564             dhdl_index = 0;
2565         }
2566         lam->dhdl = dhdl_index;
2567     }
2568     return TRUE;
2569 }
2570
2571 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2572                                 lambda_components_t *lc)
2573 {
2574     gmx_bool    bFound;
2575     const char *ptr;
2576     char       *end;
2577     double      native_lambda;
2578
2579     bFound = FALSE;
2580
2581     /* first check for a state string */
2582     ptr = strstr(subtitle, "state");
2583     if (ptr)
2584     {
2585         int         index = -1;
2586         const char *val_end;
2587
2588         /* the new 4.6 style lambda vectors */
2589         ptr = find_value(ptr);
2590         if (ptr)
2591         {
2592             index = strtol(ptr, &end, 10);
2593             if (ptr == end)
2594             {
2595                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2596                 return FALSE;
2597             }
2598             ptr = end;
2599         }
2600         else
2601         {
2602             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2603             return FALSE;
2604         }
2605         /* now find the lambda vector component names */
2606         while (*ptr != '(' && !isalnum(*ptr))
2607         {
2608             ptr++;
2609             if (*ptr == '\0')
2610             {
2611                 gmx_fatal(FARGS,
2612                           "Incomplete lambda vector component data in %s", fn);
2613                 return FALSE;
2614             }
2615         }
2616         val_end = ptr;
2617         if (!read_lambda_components(ptr, lc, &val_end, fn))
2618         {
2619             gmx_fatal(FARGS,
2620                       "lambda vector components in %s don't match those previously read",
2621                       fn);
2622         }
2623         ptr = find_value(val_end);
2624         if (!ptr)
2625         {
2626             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2627             return FALSE;
2628         }
2629         lambda_vec_init(&(ba->native_lambda), lc);
2630         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2631         {
2632             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2633         }
2634         ba->native_lambda.index = index;
2635         bFound                  = TRUE;
2636     }
2637     else
2638     {
2639         /* compatibility mode: check for lambda in other ways. */
2640         /* plain text lambda string */
2641         ptr = strstr(subtitle, "lambda");
2642         if (ptr == NULL)
2643         {
2644             /* xmgrace formatted lambda string */
2645             ptr = strstr(subtitle, "\\xl\\f{}");
2646         }
2647         if (ptr == NULL)
2648         {
2649             /* xmgr formatted lambda string */
2650             ptr = strstr(subtitle, "\\8l\\4");
2651         }
2652         if (ptr != NULL)
2653         {
2654             ptr = strstr(ptr, "=");
2655         }
2656         if (ptr != NULL)
2657         {
2658             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2659             /* add the lambda component name as an empty string */
2660             if (lc->N > 0)
2661             {
2662                 if (!lambda_components_check(lc, 0, "", 0))
2663                 {
2664                     gmx_fatal(FARGS,
2665                               "lambda vector components in %s don't match those previously read",
2666                               fn);
2667                 }
2668             }
2669             else
2670             {
2671                 lambda_components_add(lc, "", 0);
2672             }
2673             lambda_vec_init(&(ba->native_lambda), lc);
2674             ba->native_lambda.val[0] = native_lambda;
2675         }
2676     }
2677
2678     return bFound;
2679 }
2680
2681 static void filename2lambda(const char *fn)
2682 {
2683     double        lambda;
2684     const char   *ptr, *digitptr;
2685     char         *endptr;
2686     int           dirsep;
2687     ptr = fn;
2688     /* go to the end of the path string and search backward to find the last
2689        directory in the path which has to contain the value of lambda
2690      */
2691     while (ptr[1] != '\0')
2692     {
2693         ptr++;
2694     }
2695     /* searching backward to find the second directory separator */
2696     dirsep   = 0;
2697     digitptr = NULL;
2698     while (ptr >= fn)
2699     {
2700         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2701         {
2702             if (dirsep == 1)
2703             {
2704                 break;
2705             }
2706             dirsep++;
2707         }
2708         /* save the last position of a digit between the last two
2709            separators = in the last dirname */
2710         if (dirsep > 0 && isdigit(*ptr))
2711         {
2712             digitptr = ptr;
2713         }
2714         ptr--;
2715     }
2716     if (!digitptr)
2717     {
2718         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2719                   " last directory in the path '%s' does not contain a number", fn);
2720     }
2721     if (digitptr[-1] == '-')
2722     {
2723         digitptr--;
2724     }
2725     lambda = strtod(digitptr, &endptr);
2726     if (endptr == digitptr)
2727     {
2728         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2729     }
2730 }
2731
2732 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2733                                   lambda_components_t *lc)
2734 {
2735     int          i;
2736     char        *subtitle, **legend, *ptr;
2737     int          np;
2738     gmx_bool     native_lambda_read = FALSE;
2739     char         buf[STRLEN];
2740     lambda_vec_t lv;
2741
2742     xvg_init(ba);
2743
2744     ba->filename = fn;
2745
2746     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2747     if (!ba->y)
2748     {
2749         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2750     }
2751     /* Reorder the data */
2752     ba->t  = ba->y[0];
2753     for (i = 1; i < ba->nset; i++)
2754     {
2755         ba->y[i-1] = ba->y[i];
2756     }
2757     ba->nset--;
2758
2759     snew(ba->np, ba->nset);
2760     for (i = 0; i < ba->nset; i++)
2761     {
2762         ba->np[i] = np;
2763     }
2764
2765     ba->temp = -1;
2766     if (subtitle != NULL)
2767     {
2768         /* try to extract temperature */
2769         ptr = strstr(subtitle, "T =");
2770         if (ptr != NULL)
2771         {
2772             ptr += 3;
2773             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2774             {
2775                 if (ba->temp <= 0)
2776                 {
2777                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2778                               ba->temp, fn);
2779                 }
2780             }
2781         }
2782     }
2783     if (ba->temp < 0)
2784     {
2785         if (*temp <= 0)
2786         {
2787             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2788         }
2789         ba->temp = *temp;
2790     }
2791
2792     /* Try to deduce lambda from the subtitle */
2793     if (subtitle)
2794     {
2795         if (subtitle2lambda(subtitle, ba, fn, lc))
2796         {
2797             native_lambda_read = TRUE;
2798         }
2799     }
2800     snew(ba->lambda, ba->nset);
2801     if (legend == NULL)
2802     {
2803         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2804         if (ba->nset == 1)
2805         {
2806             if (!native_lambda_read)
2807             {
2808                 /* Deduce lambda from the file name */
2809                 filename2lambda(fn);
2810                 native_lambda_read = TRUE;
2811             }
2812             ba->lambda[0] = ba->native_lambda;
2813         }
2814         else
2815         {
2816             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2817         }
2818     }
2819     else
2820     {
2821         for (i = 0; i < ba->nset; )
2822         {
2823             gmx_bool use = FALSE;
2824             /* Read lambda from the legend */
2825             lambda_vec_init( &(ba->lambda[i]), lc );
2826             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2827             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2828             if (use)
2829             {
2830                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2831                 i++;
2832             }
2833             else
2834             {
2835                 int j;
2836                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2837                 for (j = i+1; j < ba->nset; j++)
2838                 {
2839                     ba->y[j-1]  = ba->y[j];
2840                     legend[j-1] = legend[j];
2841                 }
2842                 ba->nset--;
2843             }
2844         }
2845     }
2846
2847     if (!native_lambda_read)
2848     {
2849         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2850     }
2851
2852     if (legend != NULL)
2853     {
2854         for (i = 0; i < ba->nset-1; i++)
2855         {
2856             sfree(legend[i]);
2857         }
2858         sfree(legend);
2859     }
2860 }
2861
2862 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2863 {
2864     xvg_t     *barsim;
2865     samples_t *s;
2866     int        i;
2867     double    *lambda;
2868
2869     snew(barsim, 1);
2870
2871     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2872
2873     if (barsim->nset < 1)
2874     {
2875         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2876     }
2877
2878     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2879     {
2880         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2881     }
2882     *temp = barsim->temp;
2883
2884     /* now create a series of samples_t */
2885     snew(s, barsim->nset);
2886     for (i = 0; i < barsim->nset; i++)
2887     {
2888         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2889                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2890                                                    &(barsim->lambda[i])),
2891                      fn);
2892         s[i].du  = barsim->y[i];
2893         s[i].ndu = barsim->np[i];
2894         s[i].t   = barsim->t;
2895
2896         lambda_data_list_insert_sample(sd->lb, s+i);
2897     }
2898     {
2899         char buf[STRLEN];
2900
2901         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2902         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2903                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2904         for (i = 0; i < barsim->nset; i++)
2905         {
2906             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2907             printf("        %s (%d pts)\n", buf, s[i].ndu);
2908         }
2909     }
2910     printf("\n\n");
2911 }
2912
2913 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2914                                  double start_time, double delta_time,
2915                                  lambda_vec_t *native_lambda, double temp,
2916                                  double *last_t, const char *filename)
2917 {
2918     int           i, j;
2919     gmx_bool      allocated;
2920     double        old_foreign_lambda;
2921     lambda_vec_t *foreign_lambda;
2922     int           type;
2923     samples_t    *s; /* convenience pointer */
2924     int           startj;
2925
2926     /* check the block types etc. */
2927     if ( (blk->nsub < 3) ||
2928          (blk->sub[0].type != xdr_datatype_int) ||
2929          (blk->sub[1].type != xdr_datatype_double) ||
2930          (
2931              (blk->sub[2].type != xdr_datatype_float) &&
2932              (blk->sub[2].type != xdr_datatype_double)
2933          ) ||
2934          (blk->sub[0].nr < 1) ||
2935          (blk->sub[1].nr < 1) )
2936     {
2937         gmx_fatal(FARGS,
2938                   "Unexpected/corrupted block data in file %s around time %f.",
2939                   filename, start_time);
2940     }
2941
2942     snew(foreign_lambda, 1);
2943     lambda_vec_init(foreign_lambda, native_lambda->lc);
2944     lambda_vec_copy(foreign_lambda, native_lambda);
2945     type = blk->sub[0].ival[0];
2946     if (type == dhbtDH)
2947     {
2948         for (i = 0; i < native_lambda->lc->N; i++)
2949         {
2950             foreign_lambda->val[i] = blk->sub[1].dval[i];
2951         }
2952     }
2953     else
2954     {
2955         if (blk->sub[0].nr > 1)
2956         {
2957             foreign_lambda->dhdl = blk->sub[0].ival[1];
2958         }
2959         else
2960         {
2961             foreign_lambda->dhdl = 0;
2962         }
2963     }
2964
2965     if (!*smp)
2966     {
2967         /* initialize the samples structure if it's empty. */
2968         snew(*smp, 1);
2969         samples_init(*smp, native_lambda, foreign_lambda, temp,
2970                      type == dhbtDHDL, filename);
2971         (*smp)->start_time = start_time;
2972         (*smp)->delta_time = delta_time;
2973     }
2974
2975     /* set convenience pointer */
2976     s = *smp;
2977
2978     /* now double check */
2979     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2980     {
2981         char buf[STRLEN], buf2[STRLEN];
2982         lambda_vec_print(foreign_lambda, buf, FALSE);
2983         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2984         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2985         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2986                   filename, start_time);
2987     }
2988
2989     /* make room for the data */
2990     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2991     {
2992         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2993             blk->sub[2].nr*2 : s->ndu_alloc;
2994         srenew(s->du_alloc, s->ndu_alloc);
2995         s->du = s->du_alloc;
2996     }
2997     startj   = s->ndu;
2998     s->ndu  += blk->sub[2].nr;
2999     s->ntot += blk->sub[2].nr;
3000     *ndu     = blk->sub[2].nr;
3001
3002     /* and copy the data*/
3003     for (j = 0; j < blk->sub[2].nr; j++)
3004     {
3005         if (blk->sub[2].type == xdr_datatype_float)
3006         {
3007             s->du[startj+j] = blk->sub[2].fval[j];
3008         }
3009         else
3010         {
3011             s->du[startj+j] = blk->sub[2].dval[j];
3012         }
3013     }
3014     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3015     {
3016         *last_t = start_time + blk->sub[2].nr*delta_time;
3017     }
3018 }
3019
3020 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3021                                       double start_time, double delta_time,
3022                                       lambda_vec_t *native_lambda, double temp,
3023                                       double *last_t, const char *filename)
3024 {
3025     int           i, j;
3026     samples_t    *s;
3027     int           nhist;
3028     double        old_foreign_lambda;
3029     lambda_vec_t *foreign_lambda;
3030     int           type;
3031     int           nbins[2];
3032
3033     /* check the block types etc. */
3034     if ( (blk->nsub < 2) ||
3035          (blk->sub[0].type != xdr_datatype_double) ||
3036          (blk->sub[1].type != xdr_datatype_large_int) ||
3037          (blk->sub[0].nr < 2)  ||
3038          (blk->sub[1].nr < 2) )
3039     {
3040         gmx_fatal(FARGS,
3041                   "Unexpected/corrupted block data in file %s around time %f",
3042                   filename, start_time);
3043     }
3044
3045     nhist = blk->nsub-2;
3046     if (nhist == 0)
3047     {
3048         return NULL;
3049     }
3050     if (nhist > 2)
3051     {
3052         gmx_fatal(FARGS,
3053                   "Unexpected/corrupted block data in file %s around time %f",
3054                   filename, start_time);
3055     }
3056
3057     snew(s, 1);
3058     *nsamples = 1;
3059
3060     snew(foreign_lambda, 1);
3061     lambda_vec_init(foreign_lambda, native_lambda->lc);
3062     lambda_vec_copy(foreign_lambda, native_lambda);
3063     type = (int)(blk->sub[1].lval[1]);
3064     if (type == dhbtDH)
3065     {
3066         double old_foreign_lambda;
3067
3068         old_foreign_lambda = blk->sub[0].dval[0];
3069         if (old_foreign_lambda >= 0)
3070         {
3071             foreign_lambda->val[0] = old_foreign_lambda;
3072             if (foreign_lambda->lc->N > 1)
3073             {
3074                 gmx_fatal(FARGS,
3075                           "Single-component lambda in multi-component file %s",
3076                           filename);
3077             }
3078         }
3079         else
3080         {
3081             for (i = 0; i < native_lambda->lc->N; i++)
3082             {
3083                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3084             }
3085         }
3086     }
3087     else
3088     {
3089         if (foreign_lambda->lc->N > 1)
3090         {
3091             if (blk->sub[1].nr < 3 + nhist)
3092             {
3093                 gmx_fatal(FARGS,
3094                           "Missing derivative coord in multi-component file %s",
3095                           filename);
3096             }
3097             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3098         }
3099         else
3100         {
3101             foreign_lambda->dhdl = 0;
3102         }
3103     }
3104
3105     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3106                  filename);
3107     snew(s->hist, 1);
3108
3109     for (i = 0; i < nhist; i++)
3110     {
3111         nbins[i] = blk->sub[i+2].nr;
3112     }
3113
3114     hist_init(s->hist, nhist, nbins);
3115
3116     for (i = 0; i < nhist; i++)
3117     {
3118         s->hist->x0[i] = blk->sub[1].lval[2+i];
3119         s->hist->dx[i] = blk->sub[0].dval[1];
3120         if (i == 1)
3121         {
3122             s->hist->dx[i] = -s->hist->dx[i];
3123         }
3124     }
3125
3126     s->hist->start_time = start_time;
3127     s->hist->delta_time = delta_time;
3128     s->start_time       = start_time;
3129     s->delta_time       = delta_time;
3130
3131     for (i = 0; i < nhist; i++)
3132     {
3133         int             nbin;
3134         gmx_large_int_t sum = 0;
3135
3136         for (j = 0; j < s->hist->nbin[i]; j++)
3137         {
3138             int binv = (int)(blk->sub[i+2].ival[j]);
3139
3140             s->hist->bin[i][j] = binv;
3141             sum               += binv;
3142
3143         }
3144         if (i == 0)
3145         {
3146             s->ntot      = sum;
3147             s->hist->sum = sum;
3148         }
3149         else
3150         {
3151             if (s->ntot != sum)
3152             {
3153                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3154                           filename);
3155             }
3156         }
3157     }
3158
3159     if (start_time + s->hist->sum*delta_time > *last_t)
3160     {
3161         *last_t = start_time + s->hist->sum*delta_time;
3162     }
3163     return s;
3164 }
3165
3166
3167 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3168 {
3169     int            i, j;
3170     ener_file_t    fp;
3171     t_enxframe    *fr;
3172     int            nre;
3173     gmx_enxnm_t   *enm           = NULL;
3174     double         first_t       = -1;
3175     double         last_t        = -1;
3176     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3177     int           *nhists        = NULL; /* array to keep count & print at end */
3178     int           *npts          = NULL; /* array to keep count & print at end */
3179     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3180     lambda_vec_t  *native_lambda;
3181     double         end_time;             /* the end time of the last batch of samples */
3182     int            nsamples = 0;
3183     lambda_vec_t   start_lambda;
3184
3185     fp = open_enx(fn, "r");
3186     do_enxnms(fp, &nre, &enm);
3187     snew(fr, 1);
3188
3189     snew(native_lambda, 1);
3190     start_lambda.lc = NULL;
3191
3192     while (do_enx(fp, fr))
3193     {
3194         /* count the data blocks */
3195         int    nblocks_raw  = 0;
3196         int    nblocks_hist = 0;
3197         int    nlam         = 0;
3198         int    k;
3199         /* DHCOLL block information: */
3200         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3201         double rtemp      = 0;
3202
3203         /* count the blocks and handle collection information: */
3204         for (i = 0; i < fr->nblock; i++)
3205         {
3206             if (fr->block[i].id == enxDHHIST)
3207             {
3208                 nblocks_hist++;
3209             }
3210             if (fr->block[i].id == enxDH)
3211             {
3212                 nblocks_raw++;
3213             }
3214             if (fr->block[i].id == enxDHCOLL)
3215             {
3216                 nlam++;
3217                 if ( (fr->block[i].nsub < 1) ||
3218                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3219                      (fr->block[i].sub[0].nr < 5))
3220                 {
3221                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3222                 }
3223
3224                 /* read the data from the DHCOLL block */
3225                 rtemp            =        fr->block[i].sub[0].dval[0];
3226                 start_time       =   fr->block[i].sub[0].dval[1];
3227                 delta_time       =   fr->block[i].sub[0].dval[2];
3228                 old_start_lambda = fr->block[i].sub[0].dval[3];
3229                 delta_lambda     = fr->block[i].sub[0].dval[4];
3230
3231                 if (delta_lambda > 0)
3232                 {
3233                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3234                 }
3235                 if ( ( *temp != rtemp) && (*temp > 0) )
3236                 {
3237                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3238                 }
3239                 *temp = rtemp;
3240
3241                 if (old_start_lambda >= 0)
3242                 {
3243                     if (sd->lc.N > 0)
3244                     {
3245                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3246                         {
3247                             gmx_fatal(FARGS,
3248                                       "lambda vector components in %s don't match those previously read",
3249                                       fn);
3250                         }
3251                     }
3252                     else
3253                     {
3254                         lambda_components_add(&(sd->lc), "", 0);
3255                     }
3256                     if (!start_lambda.lc)
3257                     {
3258                         lambda_vec_init(&start_lambda, &(sd->lc));
3259                     }
3260                     start_lambda.val[0] = old_start_lambda;
3261                 }
3262                 else
3263                 {
3264                     /* read lambda vector */
3265                     int      n_lambda_vec;
3266                     gmx_bool check = (sd->lc.N > 0);
3267                     if (fr->block[i].nsub < 2)
3268                     {
3269                         gmx_fatal(FARGS,
3270                                   "No lambda vector, but start_lambda=%f\n",
3271                                   old_start_lambda);
3272                     }
3273                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3274                     for (j = 0; j < n_lambda_vec; j++)
3275                     {
3276                         const char *name =
3277                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3278                         if (check)
3279                         {
3280                             /* check the components */
3281                             lambda_components_check(&(sd->lc), j, name,
3282                                                     strlen(name));
3283                         }
3284                         else
3285                         {
3286                             lambda_components_add(&(sd->lc), name,
3287                                                   strlen(name));
3288                         }
3289                     }
3290                     lambda_vec_init(&start_lambda, &(sd->lc));
3291                     start_lambda.index = fr->block[i].sub[1].ival[0];
3292                     for (j = 0; j < n_lambda_vec; j++)
3293                     {
3294                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3295                     }
3296                 }
3297                 if (first_t < 0)
3298                 {
3299                     first_t = start_time;
3300                 }
3301             }
3302         }
3303
3304         if (nlam != 1)
3305         {
3306             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3307         }
3308         if (nblocks_raw > 0 && nblocks_hist > 0)
3309         {
3310             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3311         }
3312
3313         if (nsamples > 0)
3314         {
3315             /* check the native lambda */
3316             if (!lambda_vec_same(&start_lambda, native_lambda) )
3317             {
3318                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3319                           fn, native_lambda, start_lambda, start_time);
3320             }
3321             /* check the number of samples against the previous number */
3322             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3323             {
3324                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3325                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3326             }
3327             /* check whether last iterations's end time matches with
3328                the currrent start time */
3329             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3330             {
3331                 /* it didn't. We need to store our samples and reallocate */
3332                 for (i = 0; i < nsamples; i++)
3333                 {
3334                     if (samples_rawdh[i])
3335                     {
3336                         /* insert it into the existing list */
3337                         lambda_data_list_insert_sample(sd->lb,
3338                                                        samples_rawdh[i]);
3339                         /* and make sure we'll allocate a new one this time
3340                            around */
3341                         samples_rawdh[i] = NULL;
3342                     }
3343                 }
3344             }
3345         }
3346         else
3347         {
3348             /* this is the first round; allocate the associated data
3349                structures */
3350             /*native_lambda=start_lambda;*/
3351             lambda_vec_init(native_lambda, &(sd->lc));
3352             lambda_vec_copy(native_lambda, &start_lambda);
3353             nsamples = nblocks_raw+nblocks_hist;
3354             snew(nhists, nsamples);
3355             snew(npts, nsamples);
3356             snew(lambdas, nsamples);
3357             snew(samples_rawdh, nsamples);
3358             for (i = 0; i < nsamples; i++)
3359             {
3360                 nhists[i]        = 0;
3361                 npts[i]          = 0;
3362                 lambdas[i]       = NULL;
3363                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3364                                             ones contain values */
3365             }
3366         }
3367
3368         /* and read them */
3369         k = 0; /* counter for the lambdas, etc. arrays */
3370         for (i = 0; i < fr->nblock; i++)
3371         {
3372             if (fr->block[i].id == enxDH)
3373             {
3374                 int type = (fr->block[i].sub[0].ival[0]);
3375                 if (type == dhbtDH || type == dhbtDHDL)
3376                 {
3377                     int ndu;
3378                     read_edr_rawdh_block(&(samples_rawdh[k]),
3379                                          &ndu,
3380                                          &(fr->block[i]),
3381                                          start_time, delta_time,
3382                                          native_lambda, rtemp,
3383                                          &last_t, fn);
3384                     npts[k] += ndu;
3385                     if (samples_rawdh[k])
3386                     {
3387                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3388                     }
3389                     k++;
3390                 }
3391             }
3392             else if (fr->block[i].id == enxDHHIST)
3393             {
3394                 int type = (int)(fr->block[i].sub[1].lval[1]);
3395                 if (type == dhbtDH || type == dhbtDHDL)
3396                 {
3397                     int        j;
3398                     int        nb = 0;
3399                     samples_t *s; /* this is where the data will go */
3400                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3401                                             start_time, delta_time,
3402                                             native_lambda, rtemp,
3403                                             &last_t, fn);
3404                     nhists[k] += nb;
3405                     if (nb > 0)
3406                     {
3407                         lambdas[k] = s->foreign_lambda;
3408                     }
3409                     k++;
3410                     /* and insert the new sample immediately */
3411                     for (j = 0; j < nb; j++)
3412                     {
3413                         lambda_data_list_insert_sample(sd->lb, s+j);
3414                     }
3415                 }
3416             }
3417         }
3418     }
3419     /* Now store all our extant sample collections */
3420     for (i = 0; i < nsamples; i++)
3421     {
3422         if (samples_rawdh[i])
3423         {
3424             /* insert it into the existing list */
3425             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3426         }
3427     }
3428
3429
3430     {
3431         char buf[STRLEN];
3432         printf("\n");
3433         lambda_vec_print(native_lambda, buf, FALSE);
3434         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3435                fn, first_t, last_t, buf);
3436         for (i = 0; i < nsamples; i++)
3437         {
3438             if (lambdas[i])
3439             {
3440                 lambda_vec_print(lambdas[i], buf, TRUE);
3441                 if (nhists[i] > 0)
3442                 {
3443                     printf("        %s (%d hists)\n", buf, nhists[i]);
3444                 }
3445                 else
3446                 {
3447                     printf("        %s (%d pts)\n", buf, npts[i]);
3448                 }
3449             }
3450         }
3451     }
3452     printf("\n\n");
3453     sfree(npts);
3454     sfree(nhists);
3455     sfree(lambdas);
3456 }
3457
3458
3459 int gmx_bar(int argc, char *argv[])
3460 {
3461     static const char *desc[] = {
3462         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3463         "Bennett's acceptance ratio method (BAR). It also automatically",
3464         "adds series of individual free energies obtained with BAR into",
3465         "a combined free energy estimate.[PAR]",
3466
3467         "Every individual BAR free energy difference relies on two ",
3468         "simulations at different states: say state A and state B, as",
3469         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3470         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3471         "average of the Hamiltonian difference of state B given state A and",
3472         "vice versa.",
3473         "The energy differences to the other state must be calculated",
3474         "explicitly during the simulation. This can be done with",
3475         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3476
3477         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3478         "Two types of input files are supported:[BR]",
3479         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3480         "The files should have columns ",
3481         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3482         "The [GRK]lambda[grk] values are inferred ",
3483         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3484         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3485         "legends of Delta H",
3486         "[BR]",
3487         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3488         "[TT]-extp[tt] option for these files, it is assumed",
3489         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3490         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3491         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3492         "subtitle (if present), otherwise from a number in the subdirectory ",
3493         "in the file name.[PAR]",
3494
3495         "The [GRK]lambda[grk] of the simulation is parsed from ",
3496         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3497         "foreign [GRK]lambda[grk] values from the legend containing the ",
3498         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3499         "the legend line containing 'T ='.[PAR]",
3500
3501         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3502         "These can contain either lists of energy differences (see the ",
3503         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3504         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3505         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3506         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3507
3508         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3509         "the energy difference can also be extrapolated from the ",
3510         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3511         "option, which assumes that the system's Hamiltonian depends linearly",
3512         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3513
3514         "The free energy estimates are determined using BAR with bisection, ",
3515         "with the precision of the output set with [TT]-prec[tt]. ",
3516         "An error estimate taking into account time correlations ",
3517         "is made by splitting the data into blocks and determining ",
3518         "the free energy differences over those blocks and assuming ",
3519         "the blocks are independent. ",
3520         "The final error estimate is determined from the average variance ",
3521         "over 5 blocks. A range of block numbers for error estimation can ",
3522         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3523
3524         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3525         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3526         "samples. [BB]Note[bb] that when aggregating energy ",
3527         "differences/derivatives with different sampling intervals, this is ",
3528         "almost certainly not correct. Usually subsequent energies are ",
3529         "correlated and different time intervals mean different degrees ",
3530         "of correlation between samples.[PAR]",
3531
3532         "The results are split in two parts: the last part contains the final ",
3533         "results in kJ/mol, together with the error estimate for each part ",
3534         "and the total. The first part contains detailed free energy ",
3535         "difference estimates and phase space overlap measures in units of ",
3536         "kT (together with their computed error estimate). The printed ",
3537         "values are:[BR]",
3538         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3539         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3540         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3541         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3542         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3543         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3544
3545         "The relative entropy of both states in each other's ensemble can be ",
3546         "interpreted as a measure of phase space overlap: ",
3547         "the relative entropy s_A of the work samples of lambda_B in the ",
3548         "ensemble of lambda_A (and vice versa for s_B), is a ",
3549         "measure of the 'distance' between Boltzmann distributions of ",
3550         "the two states, that goes to zero for identical distributions. See ",
3551         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3552         "[PAR]",
3553         "The estimate of the expected per-sample standard deviation, as given ",
3554         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3555         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3556         "of the actual statistical error, because it assumes independent samples).[PAR]",
3557
3558         "To get a visual estimate of the phase space overlap, use the ",
3559         "[TT]-oh[tt] option to write series of histograms, together with the ",
3560         "[TT]-nbin[tt] option.[PAR]"
3561     };
3562     static real        begin    = 0, end = -1, temp = -1;
3563     int                nd       = 2, nbmin = 5, nbmax = 5;
3564     int                nbin     = 100;
3565     gmx_bool           use_dhdl = FALSE;
3566     gmx_bool           calc_s, calc_v;
3567     t_pargs            pa[] = {
3568         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3569         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3570         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3571         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3572         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3573         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3574         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3575         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3576     };
3577
3578     t_filenm           fnm[] = {
3579         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3580         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3581         { efXVG, "-o",  "bar",    ffOPTWR },
3582         { efXVG, "-oi", "barint", ffOPTWR },
3583         { efXVG, "-oh", "histogram", ffOPTWR }
3584     };
3585 #define NFILE asize(fnm)
3586
3587     int          f, i, j;
3588     int          nf = 0;    /* file counter */
3589     int          nbs;
3590     int          nfile_tot; /* total number of input files */
3591     int          nxvgfile = 0;
3592     int          nedrfile = 0;
3593     char       **fxvgnms;
3594     char       **fedrnms;
3595     sim_data_t   sim_data; /* the simulation data */
3596     barres_t    *results;  /* the results */
3597     int          nresults; /* number of results in results array */
3598
3599     double      *partsum;
3600     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3601     FILE        *fpb, *fpi;
3602     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3603     char         buf[STRLEN], buf2[STRLEN];
3604     char         ktformat[STRLEN], sktformat[STRLEN];
3605     char         kteformat[STRLEN], skteformat[STRLEN];
3606     output_env_t oenv;
3607     double       kT, beta;
3608     gmx_bool     result_OK = TRUE, bEE = TRUE;
3609
3610     gmx_bool     disc_err          = FALSE;
3611     double       sum_disc_err      = 0.; /* discretization error */
3612     gmx_bool     histrange_err     = FALSE;
3613     double       sum_histrange_err = 0.; /* histogram range error */
3614     double       stat_err          = 0.; /* statistical error */
3615
3616     if (!parse_common_args(&argc, argv,
3617                            PCA_CAN_VIEW,
3618                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3619     {
3620         return 0;
3621     }
3622
3623     if (opt2bSet("-f", NFILE, fnm))
3624     {
3625         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3626     }
3627     if (opt2bSet("-g", NFILE, fnm))
3628     {
3629         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3630     }
3631
3632     sim_data_init(&sim_data);
3633 #if 0
3634     /* make linked list */
3635     lb = &lambda_head;
3636     lambda_data_init(lb, 0, 0);
3637     lb->next = lb;
3638     lb->prev = lb;
3639 #endif
3640
3641
3642     nfile_tot = nxvgfile + nedrfile;
3643
3644     if (nfile_tot == 0)
3645     {
3646         gmx_fatal(FARGS, "No input files!");
3647     }
3648
3649     if (nd < 0)
3650     {
3651         gmx_fatal(FARGS, "Can not have negative number of digits");
3652     }
3653     prec = pow(10, -nd);
3654
3655     snew(partsum, (nbmax+1)*(nbmax+1));
3656     nf = 0;
3657
3658     /* read in all files. First xvg files */
3659     for (f = 0; f < nxvgfile; f++)
3660     {
3661         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3662         nf++;
3663     }
3664     /* then .edr files */
3665     for (f = 0; f < nedrfile; f++)
3666     {
3667         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3668         nf++;
3669     }
3670
3671     /* fix the times to allow for equilibration */
3672     sim_data_impose_times(&sim_data, begin, end);
3673
3674     if (opt2bSet("-oh", NFILE, fnm))
3675     {
3676         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3677     }
3678
3679     /* assemble the output structures from the lambdas */
3680     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3681
3682     sum_disc_err = barres_list_max_disc_err(results, nresults);
3683
3684     if (nresults == 0)
3685     {
3686         printf("\nNo results to calculate.\n");
3687         return 0;
3688     }
3689
3690     if (sum_disc_err > prec)
3691     {
3692         prec = sum_disc_err;
3693         nd   = ceil(-log10(prec));
3694         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3695     }
3696
3697
3698     /*sprintf(lamformat,"%%6.3f");*/
3699     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3700     /* the format strings of the results in kT */
3701     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3702     sprintf( sktformat, "%%%ds", 6+nd);
3703     /* the format strings of the errors in kT */
3704     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3705     sprintf( skteformat, "%%%ds", 4+nd);
3706     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3707     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3708
3709
3710
3711     fpb = NULL;
3712     if (opt2bSet("-o", NFILE, fnm))
3713     {
3714         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3715         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3716                             "\\lambda", buf, exvggtXYDY, oenv);
3717     }
3718
3719     fpi = NULL;
3720     if (opt2bSet("-oi", NFILE, fnm))
3721     {
3722         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3723         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3724                        "\\lambda", buf, oenv);
3725     }
3726
3727
3728
3729     if (nbmin > nbmax)
3730     {
3731         nbmin = nbmax;
3732     }
3733
3734     /* first calculate results */
3735     bEE      = TRUE;
3736     disc_err = FALSE;
3737     for (f = 0; f < nresults; f++)
3738     {
3739         /* Determine the free energy difference with a factor of 10
3740          * more accuracy than requested for printing.
3741          */
3742         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3743                  &bEE, partsum);
3744
3745         if (results[f].dg_disc_err > prec/10.)
3746         {
3747             disc_err = TRUE;
3748         }
3749         if (results[f].dg_histrange_err > prec/10.)
3750         {
3751             histrange_err = TRUE;
3752         }
3753     }
3754
3755     /* print results in kT */
3756     kT   = BOLTZ*temp;
3757     beta = 1/kT;
3758
3759     printf("\nTemperature: %g K\n", temp);
3760
3761     printf("\nDetailed results in kT (see help for explanation):\n\n");
3762     printf("%6s ", " lam_A");
3763     printf("%6s ", " lam_B");
3764     printf(sktformat,  "DG ");
3765     if (bEE)
3766     {
3767         printf(skteformat, "+/- ");
3768     }
3769     if (disc_err)
3770     {
3771         printf(skteformat, "disc ");
3772     }
3773     if (histrange_err)
3774     {
3775         printf(skteformat, "range ");
3776     }
3777     printf(sktformat,  "s_A ");
3778     if (bEE)
3779     {
3780         printf(skteformat, "+/- " );
3781     }
3782     printf(sktformat,  "s_B ");
3783     if (bEE)
3784     {
3785         printf(skteformat, "+/- " );
3786     }
3787     printf(sktformat,  "stdev ");
3788     if (bEE)
3789     {
3790         printf(skteformat, "+/- ");
3791     }
3792     printf("\n");
3793     for (f = 0; f < nresults; f++)
3794     {
3795         lambda_vec_print_short(results[f].a->native_lambda, buf);
3796         printf("%s ", buf);
3797         lambda_vec_print_short(results[f].b->native_lambda, buf);
3798         printf("%s ", buf);
3799         printf(ktformat,  results[f].dg);
3800         printf(" ");
3801         if (bEE)
3802         {
3803             printf(kteformat, results[f].dg_err);
3804             printf(" ");
3805         }
3806         if (disc_err)
3807         {
3808             printf(kteformat, results[f].dg_disc_err);
3809             printf(" ");
3810         }
3811         if (histrange_err)
3812         {
3813             printf(kteformat, results[f].dg_histrange_err);
3814             printf(" ");
3815         }
3816         printf(ktformat,  results[f].sa);
3817         printf(" ");
3818         if (bEE)
3819         {
3820             printf(kteformat, results[f].sa_err);
3821             printf(" ");
3822         }
3823         printf(ktformat,  results[f].sb);
3824         printf(" ");
3825         if (bEE)
3826         {
3827             printf(kteformat, results[f].sb_err);
3828             printf(" ");
3829         }
3830         printf(ktformat,  results[f].dg_stddev);
3831         printf(" ");
3832         if (bEE)
3833         {
3834             printf(kteformat, results[f].dg_stddev_err);
3835         }
3836         printf("\n");
3837
3838         /* Check for negative relative entropy with a 95% certainty. */
3839         if (results[f].sa < -2*results[f].sa_err ||
3840             results[f].sb < -2*results[f].sb_err)
3841         {
3842             result_OK = FALSE;
3843         }
3844     }
3845
3846     if (!result_OK)
3847     {
3848         printf("\nWARNING: Some of these results violate the Second Law of "
3849                "Thermodynamics: \n"
3850                "         This is can be the result of severe undersampling, or "
3851                "(more likely)\n"
3852                "         there is something wrong with the simulations.\n");
3853     }
3854
3855
3856     /* final results in kJ/mol */
3857     printf("\n\nFinal results in kJ/mol:\n\n");
3858     dg_tot  = 0;
3859     for (f = 0; f < nresults; f++)
3860     {
3861
3862         if (fpi != NULL)
3863         {
3864             lambda_vec_print_short(results[f].a->native_lambda, buf);
3865             fprintf(fpi, xvg2format, buf, dg_tot);
3866         }
3867
3868
3869         if (fpb != NULL)
3870         {
3871             lambda_vec_print_intermediate(results[f].a->native_lambda,
3872                                           results[f].b->native_lambda,
3873                                           buf);
3874
3875             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3876         }
3877
3878         printf("point ");
3879         lambda_vec_print_short(results[f].a->native_lambda, buf);
3880         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3881         printf("%s - %s", buf, buf2);
3882         printf(",   DG ");
3883
3884         printf(dgformat, results[f].dg*kT);
3885         if (bEE)
3886         {
3887             printf(" +/- ");
3888             printf(dgformat, results[f].dg_err*kT);
3889         }
3890         if (histrange_err)
3891         {
3892             printf(" (max. range err. = ");
3893             printf(dgformat, results[f].dg_histrange_err*kT);
3894             printf(")");
3895             sum_histrange_err += results[f].dg_histrange_err*kT;
3896         }
3897
3898         printf("\n");
3899         dg_tot += results[f].dg;
3900     }
3901     printf("\n");
3902     printf("total ");
3903     lambda_vec_print_short(results[0].a->native_lambda, buf);
3904     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3905     printf("%s - %s", buf, buf2);
3906     printf(",   DG ");
3907
3908     printf(dgformat, dg_tot*kT);
3909     if (bEE)
3910     {
3911         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3912         printf(" +/- ");
3913         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3914     }
3915     printf("\n");
3916     if (disc_err)
3917     {
3918         printf("\nmaximum discretization error = ");
3919         printf(dgformat, sum_disc_err);
3920         if (bEE && stat_err < sum_disc_err)
3921         {
3922             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3923         }
3924     }
3925     if (histrange_err)
3926     {
3927         printf("\nmaximum histogram range error = ");
3928         printf(dgformat, sum_histrange_err);
3929         if (bEE && stat_err < sum_histrange_err)
3930         {
3931             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3932         }
3933
3934     }
3935     printf("\n");
3936
3937
3938     if (fpi != NULL)
3939     {
3940         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3941         fprintf(fpi, xvg2format, buf, dg_tot);
3942         ffclose(fpi);
3943     }
3944
3945     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3946     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3947
3948     return 0;
3949 }