src/gromacs/gmxana/gmx_bar.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <math.h>
  44 #include <float.h>
  45
  46 #include "sysstuff.h"
  47 #include "typedefs.h"
  48 #include "smalloc.h"
  49 #include "futil.h"
  50 #include "statutil.h"
  51 #include "macros.h"
  52 #include "enxio.h"
  53 #include "physics.h"
  54 #include "gmx_fatal.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57 #include "maths.h"
  58 #include "string2.h"
  59 #include "names.h"
  60 #include "mdebin.h"
  61
  62
  63 /* Structure for the names of lambda vector components */
  64 typedef struct lambda_components_t
  65 {
  66     char **names;  /* Array of strings with names for the lambda vector
  67                       components */
  68     int    N;      /* The number of components */
  69     int    Nalloc; /* The number of allocated components */
  70 } lambda_components_t;
  71
  72 /* Structure for a lambda vector or a dhdl derivative direction */
  73 typedef struct lambda_vec_t
  74 {
  75     double                    *val;   /* The lambda vector component values. Only valid if
  76                                          dhdl == -1 */
  77     int                        dhdl;  /* The coordinate index for the derivative described by this
  78                                          structure, or -1 */
  79     const lambda_components_t *lc;    /* the associated lambda_components
  80                                          structure */
  81     int                        index; /* The state number (init-lambda-state) of this lambda
  82                                          vector, if known. If not, it is set to -1 */
  83 } lambda_vec_t;
  84
  85 /* the dhdl.xvg data from a simulation */
  86 typedef struct xvg_t
  87 {
  88     const char   *filename;
  89     int           ftp;           /* file type */
  90     int           nset;          /* number of lambdas, including dhdl */
  91     int          *np;            /* number of data points (du or hists) per lambda */
  92     int           np_alloc;      /* number of points (du or hists) allocated */
  93     double        temp;          /* temperature */
  94     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  95     double       *t;             /* the times (of second index for y) */
  96     double      **y;             /* the dU values. y[0] holds the derivative, while
  97                                     further ones contain the energy differences between
  98                                     the native lambda and the 'foreign' lambdas. */
  99     lambda_vec_t  native_lambda; /* the native lambda */
 100
 101     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 102 } xvg_t;
 103
 104
 105 typedef struct hist_t
 106 {
 107     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 108     double          dx[2];                  /* the histogram spacing. The reverse
 109                                                dx is the negative of the forward dx.*/
 110     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 111                                                point(s) as int */
 112
 113     int             nbin[2];                /* the (forward+reverse) number of bins */
 114     gmx_large_int_t sum;                    /* the total number of counts. Must be
 115                                                the same for forward + reverse.  */
 116     int             nhist;                  /* number of hist datas (forward or reverse) */
 117
 118     double          start_time, delta_time; /* start time, end time of histogram */
 119 } hist_t;
 120
 121
 122 /* an aggregate of samples for partial free energy calculation */
 123 typedef struct samples_t
 124 {
 125     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 126     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 127     double        temp;           /* the temperature */
 128     gmx_bool      derivative;     /* whether this sample is a derivative */
 129
 130     /* The samples come either as either delta U lists: */
 131     int     ndu;                    /* the number of delta U samples */
 132     double *du;                     /* the delta u's */
 133     double *t;                      /* the times associated with those samples, or: */
 134     double  start_time, delta_time; /*start time and delta time for linear time*/
 135
 136     /* or as histograms: */
 137     hist_t *hist; /* a histogram */
 138
 139     /* allocation data: (not NULL for data 'owned' by this struct) */
 140     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 141     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 142     hist_t         *hist_alloc;          /* allocated hist */
 143
 144     gmx_large_int_t ntot;                /* total number of samples */
 145     const char     *filename;            /* the file name this sample comes from */
 146 } samples_t;
 147
 148 /* a sample range (start to end for du-style data, or boolean
 149     for both du-style data and histograms */
 150 typedef struct sample_range_t
 151 {
 152     int        start, end; /* start and end index for du style data */
 153     gmx_bool   use;        /* whether to use this sample */
 154
 155     samples_t *s;          /* the samples this range belongs to */
 156 } sample_range_t;
 157
 158
 159 /* a collection of samples for a partial free energy calculation
 160     (i.e. the collection of samples from one native lambda to one
 161     foreign lambda) */
 162 typedef struct sample_coll_t
 163 {
 164     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 165                                           in the histogram */
 166     lambda_vec_t   *foreign_lambda;    /* collection */
 167     double          temp;              /* the temperature */
 168
 169     int             nsamples;          /* the number of samples */
 170     samples_t     **s;                 /* the samples themselves */
 171     sample_range_t *r;                 /* the sample ranges */
 172     int             nsamples_alloc;    /* number of allocated samples */
 173
 174     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 175                                           this collection */
 176
 177     struct sample_coll_t *next, *prev; /* next and previous in the list */
 178 } sample_coll_t;
 179
 180 /* all the samples associated with a lambda point */
 181 typedef struct lambda_data_t
 182 {
 183     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 184     double                temp;        /* temperature */
 185
 186     sample_coll_t        *sc;          /* the samples */
 187
 188     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 189
 190     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 191 } lambda_data_t;
 192
 193 /* Top-level data structure of simulation data */
 194 typedef struct sim_data_t
 195 {
 196     lambda_data_t      *lb;      /* a lambda data linked list */
 197     lambda_data_t       lb_head; /* The head element of the linked list */
 198
 199     lambda_components_t lc;      /* the allowed components of the lambda
 200                                     vectors */
 201 } sim_data_t;
 202
 203 /* Top-level data structure with calculated values. */
 204 typedef struct {
 205     sample_coll_t *a, *b;            /* the simulation data */
 206
 207     double         dg;               /* the free energy difference */
 208     double         dg_err;           /* the free energy difference */
 209
 210     double         dg_disc_err;      /* discretization error */
 211     double         dg_histrange_err; /* histogram range error */
 212
 213     double         sa;               /* relative entropy of b in state a */
 214     double         sa_err;           /* error in sa */
 215     double         sb;               /* relative entropy of a in state b */
 216     double         sb_err;           /* error in sb */
 217
 218     double         dg_stddev;        /* expected dg stddev per sample */
 219     double         dg_stddev_err;    /* error in dg_stddev */
 220 } barres_t;
 221
 222
 223 /* Initialize a lambda_components structure */
 224 static void lambda_components_init(lambda_components_t *lc)
 225 {
 226     lc->N      = 0;
 227     lc->Nalloc = 2;
 228     snew(lc->names, lc->Nalloc);
 229 }
 230
 231 /* Add a component to a lambda_components structure */
 232 static void lambda_components_add(lambda_components_t *lc,
 233                                   const char *name, size_t name_length)
 234 {
 235     while (lc->N + 1 > lc->Nalloc)
 236     {
 237         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 238         srealloc( lc->names, lc->Nalloc );
 239     }
 240     snew(lc->names[lc->N], name_length+1);
 241     strncpy(lc->names[lc->N], name, name_length);
 242     lc->N++;
 243 }
 244
 245 /* check whether a component with index 'index' matches the given name, or
 246    is also NULL. Returns TRUE if this is the case.
 247    the string name does not need to end */
 248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 249                                         int                        index,
 250                                         const char                *name,
 251                                         size_t                     name_length)
 252 {
 253     size_t len;
 254     if (index >= lc->N)
 255     {
 256         return FALSE;
 257     }
 258     if (name == NULL && lc->names[index] == NULL)
 259     {
 260         return TRUE;
 261     }
 262     if ((name == NULL) != (lc->names[index] == NULL))
 263     {
 264         return FALSE;
 265     }
 266     len = strlen(lc->names[index]);
 267     if (len != name_length)
 268     {
 269         return FALSE;
 270     }
 271     if (strncmp(lc->names[index], name, name_length) == 0)
 272     {
 273         return TRUE;
 274     }
 275     return FALSE;
 276 }
 277
 278 /* Find the index of a given lambda component name, or -1 if not found */
 279 static int lambda_components_find(const lambda_components_t *lc,
 280                                   const char                *name,
 281                                   size_t                     name_length)
 282 {
 283     int i;
 284
 285     for (i = 0; i < lc->N; i++)
 286     {
 287         if (strncmp(lc->names[i], name, name_length) == 0)
 288         {
 289             return i;
 290         }
 291     }
 292     return -1;
 293 }
 294
 295
 296
 297 /* initialize a lambda vector */
 298 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 299 {
 300     snew(lv->val, lc->N);
 301     lv->index = -1;
 302     lv->dhdl  = -1;
 303     lv->lc    = lc;
 304 }
 305
 306 static void lambda_vec_destroy(lambda_vec_t *lv)
 307 {
 308     sfree(lv->val);
 309 }
 310
 311 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 312 {
 313     int i;
 314
 315     lambda_vec_init(lv, orig->lc);
 316     lv->dhdl  = orig->dhdl;
 317     lv->index = orig->index;
 318     for (i = 0; i < lv->lc->N; i++)
 319     {
 320         lv->val[i] = orig->val[i];
 321     }
 322 }
 323
 324 /* write a lambda vec to a preallocated string */
 325 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 326 {
 327     int    i;
 328     size_t np;
 329
 330     str[0] = 0; /* reset the string */
 331     if (lv->dhdl < 0)
 332     {
 333         if (named)
 334         {
 335             str += sprintf(str, "delta H to ");
 336         }
 337         if (lv->lc->N > 1)
 338         {
 339             str += sprintf(str, "(");
 340         }
 341         for (i = 0; i < lv->lc->N; i++)
 342         {
 343             str += sprintf(str, "%g", lv->val[i]);
 344             if (i < lv->lc->N-1)
 345             {
 346                 str += sprintf(str, ", ");
 347             }
 348         }
 349         if (lv->lc->N > 1)
 350         {
 351             str += sprintf(str, ")");
 352         }
 353     }
 354     else
 355     {
 356         /* this lambda vector describes a derivative */
 357         str += sprintf(str, "dH/dl");
 358         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 359         {
 360             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 361         }
 362     }
 363 }
 364
 365 /* write a shortened version of the lambda vec to a preallocated string */
 366 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 367 {
 368     int    i;
 369     size_t np;
 370
 371     if (lv->index >= 0)
 372     {
 373         sprintf(str, "%6d", lv->index);
 374     }
 375     else
 376     {
 377         if (lv->dhdl < 0)
 378         {
 379             sprintf(str, "%6.3f", lv->val[0]);
 380         }
 381         else
 382         {
 383             sprintf(str, "dH/dl[%d]", lv->dhdl);
 384         }
 385     }
 386 }
 387
 388 /* write an intermediate version of two lambda vecs to a preallocated string */
 389 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 390                                           const lambda_vec_t *b, char *str)
 391 {
 392     int    i;
 393     size_t np;
 394
 395     str[0] = 0;
 396     if ( (a->index >= 0) && (b->index >= 0) )
 397     {
 398         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 399     }
 400     else
 401     {
 402         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 403         {
 404             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 405         }
 406     }
 407 }
 408
 409
 410
 411 /* calculate the difference in lambda vectors: c = a-b.
 412    c must be initialized already, and a and b must describe non-derivative
 413    lambda points */
 414 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 415                             lambda_vec_t *c)
 416 {
 417     int i;
 418
 419     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 420     {
 421         gmx_fatal(FARGS,
 422                   "Trying to calculate the difference between derivatives instead of lambda points");
 423     }
 424     if ((a->lc != b->lc) || (a->lc != c->lc) )
 425     {
 426         gmx_fatal(FARGS,
 427                   "Trying to calculate the difference lambdas with differing basis set");
 428     }
 429     for (i = 0; i < a->lc->N; i++)
 430     {
 431         c->val[i] = a->val[i] - b->val[i];
 432     }
 433 }
 434
 435 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 436    a and b must describe non-derivative lambda points */
 437 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 438 {
 439     int    i;
 440     double ret = 0.;
 441
 442     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 443     {
 444         gmx_fatal(FARGS,
 445                   "Trying to calculate the difference between derivatives instead of lambda points");
 446     }
 447     if (a->lc != b->lc)
 448     {
 449         gmx_fatal(FARGS,
 450                   "Trying to calculate the difference lambdas with differing basis set");
 451     }
 452     for (i = 0; i < a->lc->N; i++)
 453     {
 454         double df = a->val[i] - b->val[i];
 455         ret += df*df;
 456     }
 457     return sqrt(ret);
 458 }
 459
 460
 461 /* check whether two lambda vectors are the same */
 462 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 463 {
 464     int i;
 465
 466     if (a->lc != b->lc)
 467     {
 468         return FALSE;
 469     }
 470     if (a->dhdl < 0)
 471     {
 472         for (i = 0; i < a->lc->N; i++)
 473         {
 474             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 475             {
 476                 return FALSE;
 477             }
 478         }
 479         return TRUE;
 480     }
 481     else
 482     {
 483         /* they're derivatives, so we check whether the indices match */
 484         return (a->dhdl == b->dhdl);
 485     }
 486 }
 487
 488 /* Compare the sort order of two foreign lambda vectors
 489
 490     returns 1 if a is 'bigger' than b,
 491     returns 0 if they're the same,
 492     returns -1 if a is 'smaller' than b.*/
 493 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 494                                        const lambda_vec_t *b)
 495 {
 496     int      i;
 497     double   norm_a    = 0, norm_b = 0;
 498     gmx_bool different = FALSE;
 499
 500     if (a->lc != b->lc)
 501     {
 502         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 503     }
 504     /* if either one has an index we sort based on that */
 505     if ((a->index >= 0) || (b->index >= 0))
 506     {
 507         if (a->index == b->index)
 508         {
 509             return 0;
 510         }
 511         return (a->index > b->index) ? 1 : -1;
 512     }
 513     if (a->dhdl >= 0 || b->dhdl >= 0)
 514     {
 515         /* lambda vectors that are derivatives always sort higher than those
 516            without derivatives */
 517         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 518         {
 519             return (a->dhdl >= 0) ? 1 : -1;
 520         }
 521         return a->dhdl > b->dhdl;
 522     }
 523
 524     /* neither has an index, so we can only sort on the lambda components,
 525        which is only valid if there is one component */
 526     for (i = 0; i < a->lc->N; i++)
 527     {
 528         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 529         {
 530             different = TRUE;
 531         }
 532         norm_a += a->val[i]*a->val[i];
 533         norm_b += b->val[i]*b->val[i];
 534     }
 535     if (!different)
 536     {
 537         return 0;
 538     }
 539     return norm_a > norm_b;
 540 }
 541
 542 /* Compare the sort order of two native lambda vectors
 543
 544     returns 1 if a is 'bigger' than b,
 545     returns 0 if they're the same,
 546     returns -1 if a is 'smaller' than b.*/
 547 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 548                                       const lambda_vec_t *b)
 549 {
 550     int i;
 551
 552     if (a->lc != b->lc)
 553     {
 554         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 555     }
 556     /* if either one has an index we sort based on that */
 557     if ((a->index >= 0) || (b->index >= 0))
 558     {
 559         if (a->index == b->index)
 560         {
 561             return 0;
 562         }
 563         return (a->index > b->index) ? 1 : -1;
 564     }
 565     /* neither has an index, so we can only sort on the lambda components,
 566        which is only valid if there is one component */
 567     if (a->lc->N > 1)
 568     {
 569         gmx_fatal(FARGS,
 570                   "Can't compare lambdas with no index and > 1 component");
 571     }
 572     if (a->dhdl >= 0 || b->dhdl >= 0)
 573     {
 574         gmx_fatal(FARGS,
 575                   "Can't compare native lambdas that are derivatives");
 576     }
 577     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 578     {
 579         return 0;
 580     }
 581     return a->val[0] > b->val[0] ? 1 : -1;
 582 }
 583
 584
 585
 586
 587 static void hist_init(hist_t *h, int nhist, int *nbin)
 588 {
 589     int i;
 590     if (nhist > 2)
 591     {
 592         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 593     }
 594     for (i = 0; i < nhist; i++)
 595     {
 596         snew(h->bin[i], nbin[i]);
 597         h->x0[i]      = 0;
 598         h->nbin[i]    = nbin[i];
 599         h->start_time = h->delta_time = 0;
 600         h->dx[i]      = 0;
 601     }
 602     h->sum   = 0;
 603     h->nhist = nhist;
 604 }
 605
 606 static void hist_destroy(hist_t *h)
 607 {
 608     sfree(h->bin);
 609 }
 610
 611
 612 static void xvg_init(xvg_t *ba)
 613 {
 614     ba->filename = NULL;
 615     ba->nset     = 0;
 616     ba->np_alloc = 0;
 617     ba->np       = NULL;
 618     ba->y        = NULL;
 619 }
 620
 621 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 622                          lambda_vec_t *foreign_lambda, double temp,
 623                          gmx_bool derivative, const char *filename)
 624 {
 625     s->native_lambda  = native_lambda;
 626     s->foreign_lambda = foreign_lambda;
 627     s->temp           = temp;
 628     s->derivative     = derivative;
 629
 630     s->ndu        = 0;
 631     s->du         = NULL;
 632     s->t          = NULL;
 633     s->start_time = s->delta_time = 0;
 634     s->hist       = NULL;
 635     s->du_alloc   = NULL;
 636     s->t_alloc    = NULL;
 637     s->hist_alloc = NULL;
 638     s->ndu_alloc  = 0;
 639     s->nt_alloc   = 0;
 640
 641     s->ntot     = 0;
 642     s->filename = filename;
 643 }
 644
 645 static void sample_range_init(sample_range_t *r, samples_t *s)
 646 {
 647     r->start = 0;
 648     r->end   = s->ndu;
 649     r->use   = TRUE;
 650     r->s     = NULL;
 651 }
 652
 653 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 654                              lambda_vec_t *foreign_lambda, double temp)
 655 {
 656     sc->native_lambda  = native_lambda;
 657     sc->foreign_lambda = foreign_lambda;
 658     sc->temp           = temp;
 659
 660     sc->nsamples       = 0;
 661     sc->s              = NULL;
 662     sc->r              = NULL;
 663     sc->nsamples_alloc = 0;
 664
 665     sc->ntot = 0;
 666     sc->next = sc->prev = NULL;
 667 }
 668
 669 static void sample_coll_destroy(sample_coll_t *sc)
 670 {
 671     /* don't free the samples themselves */
 672     sfree(sc->r);
 673     sfree(sc->s);
 674 }
 675
 676
 677 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 678                              double temp)
 679 {
 680     l->lambda = native_lambda;
 681     l->temp   = temp;
 682
 683     l->next = NULL;
 684     l->prev = NULL;
 685
 686     l->sc = &(l->sc_head);
 687
 688     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 689     l->sc->next = l->sc;
 690     l->sc->prev = l->sc;
 691 }
 692
 693 static void barres_init(barres_t *br)
 694 {
 695     br->dg            = 0;
 696     br->dg_err        = 0;
 697     br->sa            = 0;
 698     br->sa_err        = 0;
 699     br->sb            = 0;
 700     br->sb_err        = 0;
 701     br->dg_stddev     = 0;
 702     br->dg_stddev_err = 0;
 703
 704     br->a = NULL;
 705     br->b = NULL;
 706 }
 707
 708
 709 /* calculate the total number of samples in a sample collection */
 710 static void sample_coll_calc_ntot(sample_coll_t *sc)
 711 {
 712     int i;
 713
 714     sc->ntot = 0;
 715     for (i = 0; i < sc->nsamples; i++)
 716     {
 717         if (sc->r[i].use)
 718         {
 719             if (sc->s[i]->hist)
 720             {
 721                 sc->ntot += sc->s[i]->ntot;
 722             }
 723             else
 724             {
 725                 sc->ntot += sc->r[i].end - sc->r[i].start;
 726             }
 727         }
 728     }
 729 }
 730
 731
 732 /* find the barsamples_t associated with a lambda that corresponds to
 733    a specific foreign lambda */
 734 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 735                                                    lambda_vec_t  *foreign_lambda)
 736 {
 737     sample_coll_t *sc = l->sc->next;
 738
 739     while (sc != l->sc)
 740     {
 741         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 742         {
 743             return sc;
 744         }
 745         sc = sc->next;
 746     }
 747
 748     return NULL;
 749 }
 750
 751 /* insert li into an ordered list of lambda_colls */
 752 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 753 {
 754     sample_coll_t *scn = l->sc->next;
 755     while ( (scn != l->sc) )
 756     {
 757         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 758         {
 759             break;
 760         }
 761         scn = scn->next;
 762     }
 763     /* now insert it before the found scn */
 764     sc->next        = scn;
 765     sc->prev        = scn->prev;
 766     scn->prev->next = sc;
 767     scn->prev       = sc;
 768 }
 769
 770 /* insert li into an ordered list of lambdas */
 771 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 772 {
 773     lambda_data_t *lc = head->next;
 774     while (lc != head)
 775     {
 776         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 777         {
 778             break;
 779         }
 780         lc = lc->next;
 781     }
 782     /* now insert ourselves before the found lc */
 783     li->next       = lc;
 784     li->prev       = lc->prev;
 785     lc->prev->next = li;
 786     lc->prev       = li;
 787 }
 788
 789 /* insert a sample and a sample_range into a sample_coll. The
 790     samples are stored as a pointer, the range is copied. */
 791 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 792                                       sample_range_t *r)
 793 {
 794     /* first check if it belongs here */
 795     if (sc->temp != s->temp)
 796     {
 797         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 798                   s->filename, sc->next->s[0]->filename);
 799     }
 800     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 801     {
 802         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 803                   s->filename, sc->next->s[0]->filename);
 804     }
 805     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 806     {
 807         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 808                   s->filename, sc->next->s[0]->filename);
 809     }
 810
 811     /* check if there's room */
 812     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 813     {
 814         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 815         srenew(sc->s, sc->nsamples_alloc);
 816         srenew(sc->r, sc->nsamples_alloc);
 817     }
 818     sc->s[sc->nsamples] = s;
 819     sc->r[sc->nsamples] = *r;
 820     sc->nsamples++;
 821
 822     sample_coll_calc_ntot(sc);
 823 }
 824
 825 /* insert a sample into a lambda_list, creating the right sample_coll if
 826    neccesary */
 827 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 828 {
 829     gmx_bool       found = FALSE;
 830     sample_coll_t *sc;
 831     sample_range_t r;
 832
 833     lambda_data_t *l = head->next;
 834
 835     /* first search for the right lambda_data_t */
 836     while (l != head)
 837     {
 838         if (lambda_vec_same(l->lambda, s->native_lambda) )
 839         {
 840             found = TRUE;
 841             break;
 842         }
 843         l = l->next;
 844     }
 845
 846     if (!found)
 847     {
 848         snew(l, 1);                                     /* allocate a new one */
 849         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 850         lambda_data_insert_lambda(head, l);             /* add it to the list */
 851     }
 852
 853     /* now look for a sample collection */
 854     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 855     if (!sc)
 856     {
 857         snew(sc, 1); /* allocate a new one */
 858         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 859         lambda_data_insert_sample_coll(l, sc);
 860     }
 861
 862     /* now insert the samples into the sample coll */
 863     sample_range_init(&r, s);
 864     sample_coll_insert_sample(sc, s, &r);
 865 }
 866
 867
 868 /* make a histogram out of a sample collection */
 869 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 870                                   int *nbin_alloc, int *nbin,
 871                                   double *dx, double *xmin, int nbin_default)
 872 {
 873     int      i, j, k;
 874     gmx_bool dx_set   = FALSE;
 875     gmx_bool xmin_set = FALSE;
 876
 877     gmx_bool xmax_set      = FALSE;
 878     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 879                                        limits of a histogram */
 880     double   xmax          = -1;
 881
 882     /* first determine dx and xmin; try the histograms */
 883     for (i = 0; i < sc->nsamples; i++)
 884     {
 885         if (sc->s[i]->hist)
 886         {
 887             hist_t *hist = sc->s[i]->hist;
 888             for (k = 0; k < hist->nhist; k++)
 889             {
 890                 double hdx      = hist->dx[k];
 891                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 892
 893                 /* we use the biggest dx*/
 894                 if ( (!dx_set) || hist->dx[0] > *dx)
 895                 {
 896                     dx_set = TRUE;
 897                     *dx    = hist->dx[0];
 898                 }
 899                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 900                 {
 901                     xmin_set = TRUE;
 902                     *xmin    = (hist->x0[k]*hdx);
 903                 }
 904
 905                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 906                 {
 907                     xmax_set = TRUE;
 908                     xmax     = xmax_now;
 909                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 910                     {
 911                         xmax_set_hard = TRUE;
 912                     }
 913                 }
 914                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 915                 {
 916                     xmax_set_hard = TRUE;
 917                     xmax          = xmax_now;
 918                 }
 919             }
 920         }
 921     }
 922     /* and the delta us */
 923     for (i = 0; i < sc->nsamples; i++)
 924     {
 925         if (sc->s[i]->ndu > 0)
 926         {
 927             /* determine min and max */
 928             int    starti  = sc->r[i].start;
 929             int    endi    = sc->r[i].end;
 930             double du_xmin = sc->s[i]->du[starti];
 931             double du_xmax = sc->s[i]->du[starti];
 932             for (j = starti+1; j < endi; j++)
 933             {
 934                 if (sc->s[i]->du[j] < du_xmin)
 935                 {
 936                     du_xmin = sc->s[i]->du[j];
 937                 }
 938                 if (sc->s[i]->du[j] > du_xmax)
 939                 {
 940                     du_xmax = sc->s[i]->du[j];
 941                 }
 942             }
 943
 944             /* and now change the limits */
 945             if ( (!xmin_set) || (du_xmin < *xmin) )
 946             {
 947                 xmin_set = TRUE;
 948                 *xmin    = du_xmin;
 949             }
 950             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 951             {
 952                 xmax_set = TRUE;
 953                 xmax     = du_xmax;
 954             }
 955         }
 956     }
 957
 958     if (!xmax_set || !xmin_set)
 959     {
 960         *nbin = 0;
 961         return;
 962     }
 963
 964
 965     if (!dx_set)
 966     {
 967         *nbin = nbin_default;
 968         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 969                                                be 0, and we count from 0 */
 970     }
 971     else
 972     {
 973         *nbin = (xmax-(*xmin))/(*dx);
 974     }
 975
 976     if (*nbin > *nbin_alloc)
 977     {
 978         *nbin_alloc = *nbin;
 979         srenew(*bin, *nbin_alloc);
 980     }
 981
 982     /* reset the histogram */
 983     for (i = 0; i < (*nbin); i++)
 984     {
 985         (*bin)[i] = 0;
 986     }
 987
 988     /* now add the actual data */
 989     for (i = 0; i < sc->nsamples; i++)
 990     {
 991         if (sc->s[i]->hist)
 992         {
 993             hist_t *hist = sc->s[i]->hist;
 994             for (k = 0; k < hist->nhist; k++)
 995             {
 996                 double hdx       = hist->dx[k];
 997                 double xmin_hist = hist->x0[k]*hdx;
 998                 for (j = 0; j < hist->nbin[k]; j++)
 999                 {
1000                     /* calculate the bin corresponding to the middle of the
1001                        original bin */
1002                     double x     = hdx*(j+0.5) + xmin_hist;
1003                     int    binnr = (int)((x-(*xmin))/(*dx));
1004
1005                     if (binnr >= *nbin || binnr < 0)
1006                     {
1007                         binnr = (*nbin)-1;
1008                     }
1009
1010                     (*bin)[binnr] += hist->bin[k][j];
1011                 }
1012             }
1013         }
1014         else
1015         {
1016             int starti = sc->r[i].start;
1017             int endi   = sc->r[i].end;
1018             for (j = starti; j < endi; j++)
1019             {
1020                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1021                 if (binnr >= *nbin || binnr < 0)
1022                 {
1023                     binnr = (*nbin)-1;
1024                 }
1025
1026                 (*bin)[binnr]++;
1027             }
1028         }
1029     }
1030 }
1031
1032 /* write a collection of histograms to a file */
1033 void sim_data_histogram(sim_data_t *sd, const char *filename,
1034                         int nbin_default, const output_env_t oenv)
1035 {
1036     char           label_x[STRLEN];
1037     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1038     const char    *title   = "N(\\DeltaH)";
1039     const char    *label_y = "Samples";
1040     FILE          *fp;
1041     lambda_data_t *bl;
1042     int            nsets     = 0;
1043     char         **setnames  = NULL;
1044     gmx_bool       first_set = FALSE;
1045     /* histogram data: */
1046     int           *hist       = NULL;
1047     int            nbin       = 0;
1048     int            nbin_alloc = 0;
1049     double         dx         = 0;
1050     double         min        = 0;
1051     int            i;
1052     lambda_data_t *bl_head = sd->lb;
1053
1054     printf("\nWriting histogram to %s\n", filename);
1055     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1056
1057     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1058
1059     /* first get all the set names */
1060     bl = bl_head->next;
1061     /* iterate over all lambdas */
1062     while (bl != bl_head)
1063     {
1064         sample_coll_t *sc = bl->sc->next;
1065
1066         /* iterate over all samples */
1067         while (sc != bl->sc)
1068         {
1069             char buf[STRLEN], buf2[STRLEN];
1070
1071             nsets++;
1072             srenew(setnames, nsets);
1073             snew(setnames[nsets-1], STRLEN);
1074             if (sc->foreign_lambda->dhdl < 0)
1075             {
1076                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1077                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1078                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1079                         deltag, lambda, buf2, lambda, buf);
1080             }
1081             else
1082             {
1083                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1084                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1085                         dhdl, lambda, buf);
1086             }
1087             sc = sc->next;
1088         }
1089
1090         bl = bl->next;
1091     }
1092     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1093
1094
1095     /* now make the histograms */
1096     bl = bl_head->next;
1097     /* iterate over all lambdas */
1098     while (bl != bl_head)
1099     {
1100         sample_coll_t *sc = bl->sc->next;
1101
1102         /* iterate over all samples */
1103         while (sc != bl->sc)
1104         {
1105             if (!first_set)
1106             {
1107                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1108             }
1109
1110             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1111                                   nbin_default);
1112
1113             for (i = 0; i < nbin; i++)
1114             {
1115                 double xmin = i*dx + min;
1116                 double xmax = (i+1)*dx + min;
1117
1118                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1119             }
1120
1121             first_set = FALSE;
1122             sc        = sc->next;
1123         }
1124
1125         bl = bl->next;
1126     }
1127
1128     if (hist)
1129     {
1130         sfree(hist);
1131     }
1132
1133     xvgrclose(fp);
1134 }
1135
1136 /* create a collection (array) of barres_t object given a ordered linked list
1137    of barlamda_t sample collections */
1138 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1139                                     gmx_bool use_dhdl)
1140 {
1141     lambda_data_t *bl;
1142     int            nlambda = 0;
1143     barres_t      *res;
1144     int            i;
1145     gmx_bool       dhdl    = FALSE;
1146     gmx_bool       first   = TRUE;
1147     lambda_data_t *bl_head = sd->lb;
1148
1149     /* first count the lambdas */
1150     bl = bl_head->next;
1151     while (bl != bl_head)
1152     {
1153         nlambda++;
1154         bl = bl->next;
1155     }
1156     snew(res, nlambda-1);
1157
1158     /* next put the right samples in the res */
1159     *nres = 0;
1160     bl    = bl_head->next->next; /* we start with the second one. */
1161     while (bl != bl_head)
1162     {
1163         sample_coll_t *sc, *scprev;
1164         barres_t      *br = &(res[*nres]);
1165         /* there is always a previous one. we search for that as a foreign
1166            lambda: */
1167         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1168         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1169
1170         barres_init(br);
1171
1172         if (use_dhdl)
1173         {
1174             /* we use dhdl */
1175
1176             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1177             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1178
1179             if (first)
1180             {
1181                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1182                 dhdl = TRUE;
1183             }
1184             if (!dhdl)
1185             {
1186                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1187             }
1188         }
1189         else if (!scprev && !sc)
1190         {
1191             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1192         }
1193
1194         /* normal delta H */
1195         if (!scprev)
1196         {
1197             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1198         }
1199         if (!sc)
1200         {
1201             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1202         }
1203         br->a = scprev;
1204         br->b = sc;
1205
1206         first = FALSE;
1207         (*nres)++;
1208         bl = bl->next;
1209     }
1210     return res;
1211 }
1212
1213 /* estimate the maximum discretization error */
1214 static double barres_list_max_disc_err(barres_t *res, int nres)
1215 {
1216     int    i, j;
1217     double disc_err = 0.;
1218     double delta_lambda;
1219
1220     for (i = 0; i < nres; i++)
1221     {
1222         barres_t *br = &(res[i]);
1223
1224         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1225                                            br->a->native_lambda);
1226
1227         for (j = 0; j < br->a->nsamples; j++)
1228         {
1229             if (br->a->s[j]->hist)
1230             {
1231                 double Wfac = 1.;
1232                 if (br->a->s[j]->derivative)
1233                 {
1234                     Wfac =  delta_lambda;
1235                 }
1236
1237                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1238             }
1239         }
1240         for (j = 0; j < br->b->nsamples; j++)
1241         {
1242             if (br->b->s[j]->hist)
1243             {
1244                 double Wfac = 1.;
1245                 if (br->b->s[j]->derivative)
1246                 {
1247                     Wfac =  delta_lambda;
1248                 }
1249                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1250             }
1251         }
1252     }
1253     return disc_err;
1254 }
1255
1256
1257 /* impose start and end times on a sample collection, updating sample_ranges */
1258 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1259                                      double end_t)
1260 {
1261     int i;
1262     for (i = 0; i < sc->nsamples; i++)
1263     {
1264         samples_t      *s = sc->s[i];
1265         sample_range_t *r = &(sc->r[i]);
1266         if (s->hist)
1267         {
1268             double end_time = s->hist->delta_time*s->hist->sum +
1269                 s->hist->start_time;
1270             if (s->hist->start_time < begin_t || end_time > end_t)
1271             {
1272                 r->use = FALSE;
1273             }
1274         }
1275         else
1276         {
1277             if (!s->t)
1278             {
1279                 double end_time;
1280                 if (s->start_time < begin_t)
1281                 {
1282                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1283                 }
1284                 end_time = s->delta_time*s->ndu + s->start_time;
1285                 if (end_time > end_t)
1286                 {
1287                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1288                 }
1289             }
1290             else
1291             {
1292                 int j;
1293                 for (j = 0; j < s->ndu; j++)
1294                 {
1295                     if (s->t[j] < begin_t)
1296                     {
1297                         r->start = j;
1298                     }
1299
1300                     if (s->t[j] >= end_t)
1301                     {
1302                         r->end = j;
1303                         break;
1304                     }
1305                 }
1306             }
1307             if (r->start > r->end)
1308             {
1309                 r->use = FALSE;
1310             }
1311         }
1312     }
1313     sample_coll_calc_ntot(sc);
1314 }
1315
1316 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1317 {
1318     double         first_t, last_t;
1319     double         begin_t, end_t;
1320     lambda_data_t *lc;
1321     lambda_data_t *head = sd->lb;
1322     int            j;
1323
1324     if (begin <= 0 && end < 0)
1325     {
1326         return;
1327     }
1328
1329     /* first determine the global start and end times */
1330     first_t = -1;
1331     last_t  = -1;
1332     lc      = head->next;
1333     while (lc != head)
1334     {
1335         sample_coll_t *sc = lc->sc->next;
1336         while (sc != lc->sc)
1337         {
1338             for (j = 0; j < sc->nsamples; j++)
1339             {
1340                 double start_t, end_t;
1341
1342                 start_t = sc->s[j]->start_time;
1343                 end_t   =   sc->s[j]->start_time;
1344                 if (sc->s[j]->hist)
1345                 {
1346                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1347                 }
1348                 else
1349                 {
1350                     if (sc->s[j]->t)
1351                     {
1352                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1353                     }
1354                     else
1355                     {
1356                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1357                     }
1358                 }
1359
1360                 if (start_t < first_t || first_t < 0)
1361                 {
1362                     first_t = start_t;
1363                 }
1364                 if (end_t > last_t)
1365                 {
1366                     last_t = end_t;
1367                 }
1368             }
1369             sc = sc->next;
1370         }
1371         lc = lc->next;
1372     }
1373
1374     /* calculate the actual times */
1375     if (begin > 0)
1376     {
1377         begin_t = begin;
1378     }
1379     else
1380     {
1381         begin_t = first_t;
1382     }
1383
1384     if (end > 0)
1385     {
1386         end_t = end;
1387     }
1388     else
1389     {
1390         end_t = last_t;
1391     }
1392     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1393
1394     if (begin_t > end_t)
1395     {
1396         return;
1397     }
1398     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1399
1400     /* then impose them */
1401     lc = head->next;
1402     while (lc != head)
1403     {
1404         sample_coll_t *sc = lc->sc->next;
1405         while (sc != lc->sc)
1406         {
1407             sample_coll_impose_times(sc, begin_t, end_t);
1408             sc = sc->next;
1409         }
1410         lc = lc->next;
1411     }
1412 }
1413
1414
1415 /* create subsample i out of ni from an existing sample_coll */
1416 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1417                                              sample_coll_t *sc_orig,
1418                                              int i, int ni)
1419 {
1420     int             j;
1421     int             hist_start, hist_end;
1422
1423     gmx_large_int_t ntot_start;
1424     gmx_large_int_t ntot_end;
1425     gmx_large_int_t ntot_so_far;
1426
1427     *sc = *sc_orig; /* just copy all fields */
1428
1429     /* allocate proprietary memory */
1430     snew(sc->s, sc_orig->nsamples);
1431     snew(sc->r, sc_orig->nsamples);
1432
1433     /* copy the samples */
1434     for (j = 0; j < sc_orig->nsamples; j++)
1435     {
1436         sc->s[j] = sc_orig->s[j];
1437         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1438     }
1439
1440     /* now fix start and end fields */
1441     /* the casts avoid possible overflows */
1442     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1443     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1444     ntot_so_far = 0;
1445     for (j = 0; j < sc->nsamples; j++)
1446     {
1447         gmx_large_int_t ntot_add;
1448         gmx_large_int_t new_start, new_end;
1449
1450         if (sc->r[j].use)
1451         {
1452             if (sc->s[j]->hist)
1453             {
1454                 ntot_add = sc->s[j]->hist->sum;
1455             }
1456             else
1457             {
1458                 ntot_add = sc->r[j].end - sc->r[j].start;
1459             }
1460         }
1461         else
1462         {
1463             ntot_add = 0;
1464         }
1465
1466         if (!sc->s[j]->hist)
1467         {
1468             if (ntot_so_far < ntot_start)
1469             {
1470                 /* adjust starting point */
1471                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1472             }
1473             else
1474             {
1475                 new_start = sc->r[j].start;
1476             }
1477             /* adjust end point */
1478             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1479             if (new_end > sc->r[j].end)
1480             {
1481                 new_end = sc->r[j].end;
1482             }
1483
1484             /* check if we're in range at all */
1485             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1486             {
1487                 new_start = 0;
1488                 new_end   = 0;
1489             }
1490             /* and write the new range */
1491             sc->r[j].start = (int)new_start;
1492             sc->r[j].end   = (int)new_end;
1493         }
1494         else
1495         {
1496             if (sc->r[j].use)
1497             {
1498                 double overlap;
1499                 double ntot_start_norm, ntot_end_norm;
1500                 /* calculate the amount of overlap of the
1501                    desired range (ntot_start -- ntot_end) onto
1502                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1503
1504                 /* first calculate normalized bounds
1505                    (where 0 is the start of the hist range, and 1 the end) */
1506                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1507                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1508
1509                 /* now fix the boundaries */
1510                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1511                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1512
1513                 /* and calculate the overlap */
1514                 overlap = ntot_end_norm - ntot_start_norm;
1515
1516                 if (overlap > 0.95) /* we allow for 5% slack */
1517                 {
1518                     sc->r[j].use = TRUE;
1519                 }
1520                 else if (overlap < 0.05)
1521                 {
1522                     sc->r[j].use = FALSE;
1523                 }
1524                 else
1525                 {
1526                     return FALSE;
1527                 }
1528             }
1529         }
1530         ntot_so_far += ntot_add;
1531     }
1532     sample_coll_calc_ntot(sc);
1533
1534     return TRUE;
1535 }
1536
1537 /* calculate minimum and maximum work values in sample collection */
1538 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1539                                 double *Wmin, double *Wmax)
1540 {
1541     int i, j;
1542
1543     *Wmin = FLT_MAX;
1544     *Wmax = -FLT_MAX;
1545
1546     for (i = 0; i < sc->nsamples; i++)
1547     {
1548         samples_t      *s = sc->s[i];
1549         sample_range_t *r = &(sc->r[i]);
1550         if (r->use)
1551         {
1552             if (!s->hist)
1553             {
1554                 for (j = r->start; j < r->end; j++)
1555                 {
1556                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1557                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1558                 }
1559             }
1560             else
1561             {
1562                 int    hd = 0; /* determine the histogram direction: */
1563                 double dx;
1564                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1565                 {
1566                     hd = 1;
1567                 }
1568                 dx = s->hist->dx[hd];
1569
1570                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1571                 {
1572                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1573                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1574                     /* look for the highest value bin with values */
1575                     if (s->hist->bin[hd][j] > 0)
1576                     {
1577                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1578                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1579                         break;
1580                     }
1581                 }
1582             }
1583         }
1584     }
1585 }
1586
1587 /* Initialize a sim_data structure */
1588 static void sim_data_init(sim_data_t *sd)
1589 {
1590     /* make linked list */
1591     sd->lb       = &(sd->lb_head);
1592     sd->lb->next = sd->lb;
1593     sd->lb->prev = sd->lb;
1594
1595     lambda_components_init(&(sd->lc));
1596 }
1597
1598
1599 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1600 {
1601     int    i;
1602     double sum;
1603
1604     sum = 0;
1605
1606     for (i = 0; i < n; i++)
1607     {
1608         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1609     }
1610
1611     return sum;
1612 }
1613
1614 /* calculate the BAR average given a histogram
1615
1616     if type== 0, calculate the best estimate for the average,
1617     if type==-1, calculate the minimum possible value given the histogram
1618     if type== 1, calculate the maximum possible value given the histogram */
1619 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1620                                 int type)
1621 {
1622     double sum = 0.;
1623     int    i;
1624     int    max;
1625     /* normalization factor multiplied with bin width and
1626        number of samples (we normalize through M): */
1627     double normdx = 1.;
1628     int    hd     = 0; /* determine the histogram direction: */
1629     double dx;
1630
1631     if ( (hist->nhist > 1) && (Wfac < 0) )
1632     {
1633         hd = 1;
1634     }
1635     dx  = hist->dx[hd];
1636     max = hist->nbin[hd]-1;
1637     if (type == 1)
1638     {
1639         max = hist->nbin[hd]; /* we also add whatever was out of range */
1640     }
1641
1642     for (i = 0; i < max; i++)
1643     {
1644         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1645         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1646
1647         sum += pxdx/(1. + exp(x + sbMmDG));
1648     }
1649
1650     return sum;
1651 }
1652
1653 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1654                                 double temp, double tol, int type)
1655 {
1656     double kT, beta, M;
1657     double DG;
1658     int    i, j;
1659     double Wfac1, Wfac2, Wmin, Wmax;
1660     double DG0, DG1, DG2, dDG1;
1661     double sum1, sum2;
1662     double n1, n2; /* numbers of samples as doubles */
1663
1664     kT   = BOLTZ*temp;
1665     beta = 1/kT;
1666
1667     /* count the numbers of samples */
1668     n1 = ca->ntot;
1669     n2 = cb->ntot;
1670
1671     M = log(n1/n2);
1672
1673     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1674     if (ca->foreign_lambda->dhdl < 0)
1675     {
1676         /* this is the case when the delta U were calculated directly
1677            (i.e. we're not scaling dhdl) */
1678         Wfac1 = beta;
1679         Wfac2 = beta;
1680     }
1681     else
1682     {
1683         /* we're using dhdl, so delta_lambda needs to be a
1684            multiplication factor.  */
1685         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1686         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1687                                                   ca->native_lambda);
1688         if (cb->native_lambda->lc->N > 1)
1689         {
1690             gmx_fatal(FARGS,
1691                       "Can't (yet) do multi-component dhdl interpolation");
1692         }
1693
1694         Wfac1 =  beta*delta_lambda;
1695         Wfac2 = -beta*delta_lambda;
1696     }
1697
1698     if (beta < 1)
1699     {
1700         /* We print the output both in kT and kJ/mol.
1701          * Here we determine DG in kT, so when beta < 1
1702          * the precision has to be increased.
1703          */
1704         tol *= beta;
1705     }
1706
1707     /* Calculate minimum and maximum work to give an initial estimate of
1708      * delta G  as their average.
1709      */
1710     {
1711         double Wmin1, Wmin2, Wmax1, Wmax2;
1712         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1713         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1714
1715         Wmin = min(Wmin1, Wmin2);
1716         Wmax = max(Wmax1, Wmax2);
1717     }
1718
1719     DG0 = Wmin;
1720     DG2 = Wmax;
1721
1722     if (debug)
1723     {
1724         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1725     }
1726     /* We approximate by bisection: given our initial estimates
1727        we keep checking whether the halfway point is greater or
1728        smaller than what we get out of the BAR averages.
1729
1730        For the comparison we can use twice the tolerance. */
1731     while (DG2 - DG0 > 2*tol)
1732     {
1733         DG1 = 0.5*(DG0 + DG2);
1734
1735         /* calculate the BAR averages */
1736         dDG1 = 0.;
1737
1738         for (i = 0; i < ca->nsamples; i++)
1739         {
1740             samples_t      *s = ca->s[i];
1741             sample_range_t *r = &(ca->r[i]);
1742             if (r->use)
1743             {
1744                 if (s->hist)
1745                 {
1746                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1747                 }
1748                 else
1749                 {
1750                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1751                                          Wfac1, (M-DG1));
1752                 }
1753             }
1754         }
1755         for (i = 0; i < cb->nsamples; i++)
1756         {
1757             samples_t      *s = cb->s[i];
1758             sample_range_t *r = &(cb->r[i]);
1759             if (r->use)
1760             {
1761                 if (s->hist)
1762                 {
1763                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1764                 }
1765                 else
1766                 {
1767                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1768                                          Wfac2, -(M-DG1));
1769                 }
1770             }
1771         }
1772
1773         if (dDG1 < 0)
1774         {
1775             DG0 = DG1;
1776         }
1777         else
1778         {
1779             DG2 = DG1;
1780         }
1781         if (debug)
1782         {
1783             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1784         }
1785     }
1786
1787     return 0.5*(DG0 + DG2);
1788 }
1789
1790 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1791                              double temp, double dg, double *sa, double *sb)
1792 {
1793     int    i, j;
1794     double W_ab = 0.;
1795     double W_ba = 0.;
1796     double kT, beta;
1797     double Wfac1, Wfac2;
1798     double n1, n2;
1799
1800     kT   = BOLTZ*temp;
1801     beta = 1/kT;
1802
1803     /* count the numbers of samples */
1804     n1 = ca->ntot;
1805     n2 = cb->ntot;
1806
1807     /* to ensure the work values are the same as during the delta_G */
1808     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1809     if (ca->foreign_lambda->dhdl < 0)
1810     {
1811         /* this is the case when the delta U were calculated directly
1812            (i.e. we're not scaling dhdl) */
1813         Wfac1 = beta;
1814         Wfac2 = beta;
1815     }
1816     else
1817     {
1818         /* we're using dhdl, so delta_lambda needs to be a
1819            multiplication factor.  */
1820         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1821                                                   ca->native_lambda);
1822         Wfac1 =  beta*delta_lambda;
1823         Wfac2 = -beta*delta_lambda;
1824     }
1825
1826     /* first calculate the average work in both directions */
1827     for (i = 0; i < ca->nsamples; i++)
1828     {
1829         samples_t      *s = ca->s[i];
1830         sample_range_t *r = &(ca->r[i]);
1831         if (r->use)
1832         {
1833             if (!s->hist)
1834             {
1835                 for (j = r->start; j < r->end; j++)
1836                 {
1837                     W_ab += Wfac1*s->du[j];
1838                 }
1839             }
1840             else
1841             {
1842                 /* normalization factor multiplied with bin width and
1843                    number of samples (we normalize through M): */
1844                 double normdx = 1.;
1845                 double dx;
1846                 int    hd = 0; /* histogram direction */
1847                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1848                 {
1849                     hd = 1;
1850                 }
1851                 dx = s->hist->dx[hd];
1852
1853                 for (j = 0; j < s->hist->nbin[0]; j++)
1854                 {
1855                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1856                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1857                     W_ab += pxdx*x;
1858                 }
1859             }
1860         }
1861     }
1862     W_ab /= n1;
1863
1864     for (i = 0; i < cb->nsamples; i++)
1865     {
1866         samples_t      *s = cb->s[i];
1867         sample_range_t *r = &(cb->r[i]);
1868         if (r->use)
1869         {
1870             if (!s->hist)
1871             {
1872                 for (j = r->start; j < r->end; j++)
1873                 {
1874                     W_ba += Wfac1*s->du[j];
1875                 }
1876             }
1877             else
1878             {
1879                 /* normalization factor multiplied with bin width and
1880                    number of samples (we normalize through M): */
1881                 double normdx = 1.;
1882                 double dx;
1883                 int    hd = 0; /* histogram direction */
1884                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1885                 {
1886                     hd = 1;
1887                 }
1888                 dx = s->hist->dx[hd];
1889
1890                 for (j = 0; j < s->hist->nbin[0]; j++)
1891                 {
1892                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1893                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1894                     W_ba += pxdx*x;
1895                 }
1896             }
1897         }
1898     }
1899     W_ba /= n2;
1900
1901     /* then calculate the relative entropies */
1902     *sa = (W_ab - dg);
1903     *sb = (W_ba + dg);
1904 }
1905
1906 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1907                            double temp, double dg, double *stddev)
1908 {
1909     int    i, j;
1910     double M;
1911     double sigmafact = 0.;
1912     double kT, beta;
1913     double Wfac1, Wfac2;
1914     double n1, n2;
1915
1916     kT   = BOLTZ*temp;
1917     beta = 1/kT;
1918
1919     /* count the numbers of samples */
1920     n1 = ca->ntot;
1921     n2 = cb->ntot;
1922
1923     /* to ensure the work values are the same as during the delta_G */
1924     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1925     if (ca->foreign_lambda->dhdl < 0)
1926     {
1927         /* this is the case when the delta U were calculated directly
1928            (i.e. we're not scaling dhdl) */
1929         Wfac1 = beta;
1930         Wfac2 = beta;
1931     }
1932     else
1933     {
1934         /* we're using dhdl, so delta_lambda needs to be a
1935            multiplication factor.  */
1936         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1937                                                   ca->native_lambda);
1938         Wfac1 =  beta*delta_lambda;
1939         Wfac2 = -beta*delta_lambda;
1940     }
1941
1942     M = log(n1/n2);
1943
1944
1945     /* calculate average in both directions */
1946     for (i = 0; i < ca->nsamples; i++)
1947     {
1948         samples_t      *s = ca->s[i];
1949         sample_range_t *r = &(ca->r[i]);
1950         if (r->use)
1951         {
1952             if (!s->hist)
1953             {
1954                 for (j = r->start; j < r->end; j++)
1955                 {
1956                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1957                 }
1958             }
1959             else
1960             {
1961                 /* normalization factor multiplied with bin width and
1962                    number of samples (we normalize through M): */
1963                 double normdx = 1.;
1964                 double dx;
1965                 int    hd = 0; /* histogram direction */
1966                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1967                 {
1968                     hd = 1;
1969                 }
1970                 dx = s->hist->dx[hd];
1971
1972                 for (j = 0; j < s->hist->nbin[0]; j++)
1973                 {
1974                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1975                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1976
1977                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1978                 }
1979             }
1980         }
1981     }
1982     for (i = 0; i < cb->nsamples; i++)
1983     {
1984         samples_t      *s = cb->s[i];
1985         sample_range_t *r = &(cb->r[i]);
1986         if (r->use)
1987         {
1988             if (!s->hist)
1989             {
1990                 for (j = r->start; j < r->end; j++)
1991                 {
1992                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1993                 }
1994             }
1995             else
1996             {
1997                 /* normalization factor multiplied with bin width and
1998                    number of samples (we normalize through M): */
1999                 double normdx = 1.;
2000                 double dx;
2001                 int    hd = 0; /* histogram direction */
2002                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2003                 {
2004                     hd = 1;
2005                 }
2006                 dx = s->hist->dx[hd];
2007
2008                 for (j = 0; j < s->hist->nbin[0]; j++)
2009                 {
2010                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2011                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2012
2013                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2014                 }
2015             }
2016         }
2017     }
2018
2019     sigmafact /= (n1 + n2);
2020
2021
2022     /* Eq. 10 from
2023        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2024     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2025 }
2026
2027
2028
2029 static void calc_bar(barres_t *br, double tol,
2030                      int npee_min, int npee_max, gmx_bool *bEE,
2031                      double *partsum)
2032 {
2033     int      npee, p;
2034     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2035                                                         for calculated quantities */
2036     int      nsample1, nsample2;
2037     double   temp = br->a->temp;
2038     int      i, j;
2039     double   dg_min, dg_max;
2040     gmx_bool have_hist = FALSE;
2041
2042     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2043
2044     br->dg_disc_err      = 0.;
2045     br->dg_histrange_err = 0.;
2046
2047     /* check if there are histograms */
2048     for (i = 0; i < br->a->nsamples; i++)
2049     {
2050         if (br->a->r[i].use && br->a->s[i]->hist)
2051         {
2052             have_hist = TRUE;
2053             break;
2054         }
2055     }
2056     if (!have_hist)
2057     {
2058         for (i = 0; i < br->b->nsamples; i++)
2059         {
2060             if (br->b->r[i].use && br->b->s[i]->hist)
2061             {
2062                 have_hist = TRUE;
2063                 break;
2064             }
2065         }
2066     }
2067
2068     /* calculate histogram-specific errors */
2069     if (have_hist)
2070     {
2071         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2072         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2073
2074         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2075         {
2076             /* the histogram range  error is the biggest of the differences
2077                between the best estimate and the extremes */
2078             br->dg_histrange_err = fabs(dg_max - dg_min);
2079         }
2080         br->dg_disc_err = 0.;
2081         for (i = 0; i < br->a->nsamples; i++)
2082         {
2083             if (br->a->s[i]->hist)
2084             {
2085                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2086             }
2087         }
2088         for (i = 0; i < br->b->nsamples; i++)
2089         {
2090             if (br->b->s[i]->hist)
2091             {
2092                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2093             }
2094         }
2095     }
2096     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2097
2098     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2099
2100     dg_sig2     = 0;
2101     sa_sig2     = 0;
2102     sb_sig2     = 0;
2103     stddev_sig2 = 0;
2104
2105     *bEE = TRUE;
2106     {
2107         sample_coll_t ca, cb;
2108
2109         /* initialize the samples */
2110         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2111                          br->a->temp);
2112         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2113                          br->b->temp);
2114
2115         for (npee = npee_min; npee <= npee_max; npee++)
2116         {
2117             double dgs      = 0;
2118             double dgs2     = 0;
2119             double dsa      = 0;
2120             double dsb      = 0;
2121             double dsa2     = 0;
2122             double dsb2     = 0;
2123             double dstddev  = 0;
2124             double dstddev2 = 0;
2125
2126
2127             for (p = 0; p < npee; p++)
2128             {
2129                 double   dgp;
2130                 double   stddevc;
2131                 double   sac, sbc;
2132                 gmx_bool cac, cbc;
2133
2134                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2135                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2136
2137                 if (!cac || !cbc)
2138                 {
2139                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2140                     *bEE = FALSE;
2141                     if (cac)
2142                     {
2143                         sample_coll_destroy(&ca);
2144                     }
2145                     if (cbc)
2146                     {
2147                         sample_coll_destroy(&cb);
2148                     }
2149                     return;
2150                 }
2151
2152                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2153                 dgs  += dgp;
2154                 dgs2 += dgp*dgp;
2155
2156                 partsum[npee*(npee_max+1)+p] += dgp;
2157
2158                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2159                 dsa  += sac;
2160                 dsa2 += sac*sac;
2161                 dsb  += sbc;
2162                 dsb2 += sbc*sbc;
2163                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2164
2165                 dstddev  += stddevc;
2166                 dstddev2 += stddevc*stddevc;
2167
2168                 sample_coll_destroy(&ca);
2169                 sample_coll_destroy(&cb);
2170             }
2171             dgs     /= npee;
2172             dgs2    /= npee;
2173             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2174
2175             dsa     /= npee;
2176             dsa2    /= npee;
2177             dsb     /= npee;
2178             dsb2    /= npee;
2179             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2180             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2181
2182             dstddev     /= npee;
2183             dstddev2    /= npee;
2184             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2185         }
2186         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2187         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2188         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2189         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2190     }
2191 }
2192
2193
2194 static double bar_err(int nbmin, int nbmax, const double *partsum)
2195 {
2196     int    nb, b;
2197     double svar, s, s2, dg;
2198
2199     svar = 0;
2200     for (nb = nbmin; nb <= nbmax; nb++)
2201     {
2202         s  = 0;
2203         s2 = 0;
2204         for (b = 0; b < nb; b++)
2205         {
2206             dg  = partsum[nb*(nbmax+1)+b];
2207             s  += dg;
2208             s2 += dg*dg;
2209         }
2210         s    /= nb;
2211         s2   /= nb;
2212         svar += (s2 - s*s)/(nb - 1);
2213     }
2214
2215     return sqrt(svar/(nbmax + 1 - nbmin));
2216 }
2217
2218
2219 /* Seek the end of an identifier (consecutive non-spaces), followed by
2220    an optional number of spaces or '='-signs. Returns a pointer to the
2221    first non-space value found after that. Returns NULL if the string
2222    ends before that.
2223  */
2224 static const char *find_value(const char *str)
2225 {
2226     gmx_bool name_end_found = FALSE;
2227
2228     /* if the string is a NULL pointer, return a NULL pointer. */
2229     if (str == NULL)
2230     {
2231         return NULL;
2232     }
2233     while (*str != '\0')
2234     {
2235         /* first find the end of the name */
2236         if (!name_end_found)
2237         {
2238             if (isspace(*str) || (*str == '=') )
2239             {
2240                 name_end_found = TRUE;
2241             }
2242         }
2243         else
2244         {
2245             if (!( isspace(*str) || (*str == '=') ))
2246             {
2247                 return str;
2248             }
2249         }
2250         str++;
2251     }
2252     return NULL;
2253 }
2254
2255
2256
2257 /* read a vector-notation description of a lambda vector */
2258 static gmx_bool read_lambda_compvec(const char                *str,
2259                                     lambda_vec_t              *lv,
2260                                     const lambda_components_t *lc_in,
2261                                     lambda_components_t       *lc_out,
2262                                     const char               **end,
2263                                     const char                *fn)
2264 {
2265     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2266                                           components, or to check them */
2267     gmx_bool    start_reached = FALSE; /* whether the start of component names
2268                                           has been reached */
2269     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2270     int         n             = 0;     /* current component number */
2271     const char *val_start     = NULL;  /* start of the component name, or NULL
2272                                           if not in a value */
2273     char       *strtod_end;
2274     gmx_bool    OK = TRUE;
2275
2276     if (end)
2277     {
2278         *end = str;
2279     }
2280
2281
2282     if (lc_out && lc_out->N == 0)
2283     {
2284         initialize_lc = TRUE;
2285     }
2286
2287     if (lc_in == NULL)
2288     {
2289         lc_in = lc_out;
2290     }
2291
2292     while (1)
2293     {
2294         if (!start_reached)
2295         {
2296             if (isalnum(*str))
2297             {
2298                 vector        = FALSE;
2299                 start_reached = TRUE;
2300                 val_start     = str;
2301             }
2302             else if (*str == '(')
2303             {
2304                 vector        = TRUE;
2305                 start_reached = TRUE;
2306             }
2307             else if (!isspace(*str))
2308             {
2309                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2310             }
2311         }
2312         else
2313         {
2314             if (val_start)
2315             {
2316                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2317                 {
2318                     /* end of value */
2319                     if (lv == NULL)
2320                     {
2321                         if (initialize_lc)
2322                         {
2323                             lambda_components_add(lc_out, val_start,
2324                                                   (str-val_start));
2325                         }
2326                         else
2327                         {
2328                             if (!lambda_components_check(lc_out, n, val_start,
2329                                                          (str-val_start)))
2330                             {
2331                                 return FALSE;
2332                             }
2333                         }
2334                     }
2335                     else
2336                     {
2337                         /* add a vector component to lv */
2338                         lv->val[n] = strtod(val_start, &strtod_end);
2339                         if (val_start == strtod_end)
2340                         {
2341                             gmx_fatal(FARGS,
2342                                       "Error reading lambda vector in %s", fn);
2343                         }
2344                     }
2345                     /* reset for the next identifier */
2346                     val_start = NULL;
2347                     n++;
2348                     if (!vector)
2349                     {
2350                         return OK;
2351                     }
2352                 }
2353             }
2354             else if (isalnum(*str))
2355             {
2356                 val_start = str;
2357             }
2358             if (*str == ')')
2359             {
2360                 str++;
2361                 if (end)
2362                 {
2363                     *end = str;
2364                 }
2365                 if (!vector)
2366                 {
2367                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2368                 }
2369                 else
2370                 {
2371                     if (n == lc_in->N)
2372                     {
2373                         return OK;
2374                     }
2375                     else if (lv == NULL)
2376                     {
2377                         return FALSE;
2378                     }
2379                     else
2380                     {
2381                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2382                                   fn);
2383                         return FALSE;
2384                     }
2385
2386                 }
2387             }
2388         }
2389         if (*str == '\0')
2390         {
2391             break;
2392         }
2393         str++;
2394         if (end)
2395         {
2396             *end = str;
2397         }
2398     }
2399     if (vector)
2400     {
2401         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2402         return FALSE;
2403     }
2404     return OK;
2405 }
2406
2407 /* read and check the component names from a string */
2408 static gmx_bool read_lambda_components(const char          *str,
2409                                        lambda_components_t *lc,
2410                                        const char         **end,
2411                                        const char          *fn)
2412 {
2413     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2414 }
2415
2416 /* read an initialized lambda vector from a string */
2417 static gmx_bool read_lambda_vector(const char   *str,
2418                                    lambda_vec_t *lv,
2419                                    const char  **end,
2420                                    const char   *fn)
2421 {
2422     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2423 }
2424
2425
2426
2427 /* deduce lambda value from legend.
2428     fn = the file name
2429     legend = the legend string
2430     ba = the xvg data
2431     lam = the initialized lambda vector
2432    returns whether to use the data in this set.
2433  */
2434 static gmx_bool legend2lambda(const char   *fn,
2435                               const char   *legend,
2436                               xvg_t        *ba,
2437                               lambda_vec_t *lam)
2438 {
2439     double        lambda = 0;
2440     const char   *ptr    = NULL, *ptr2 = NULL;
2441     gmx_bool      ok     = FALSE;
2442     gmx_bool      bdhdl  = FALSE;
2443     const char   *tostr  = " to ";
2444
2445     if (legend == NULL)
2446     {
2447         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2448     }
2449
2450     /* look for the last 'to': */
2451     ptr2 = legend;
2452     do
2453     {
2454         ptr2 = strstr(ptr2, tostr);
2455         if (ptr2 != NULL)
2456         {
2457             ptr = ptr2;
2458             ptr2++;
2459         }
2460     }
2461     while (ptr2 != NULL && *ptr2 != '\0');
2462
2463     if (ptr)
2464     {
2465         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2466     }
2467     else
2468     {
2469         /* look for the = sign */
2470         ptr = strrchr(legend, '=');
2471         if (!ptr)
2472         {
2473             /* otherwise look for the last space */
2474             ptr = strrchr(legend, ' ');
2475         }
2476     }
2477
2478     if (strstr(legend, "dH"))
2479     {
2480         ok    = TRUE;
2481         bdhdl = TRUE;
2482     }
2483     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2484     {
2485         ok    = TRUE;
2486         bdhdl = FALSE;
2487     }
2488     else /*if (strstr(legend, "pV"))*/
2489     {
2490         return FALSE;
2491     }
2492     if (!ptr)
2493     {
2494         ok = FALSE;
2495     }
2496
2497     if (!ok)
2498     {
2499         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2500     }
2501     if (!bdhdl)
2502     {
2503         ptr = find_value(ptr);
2504         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2505         {
2506             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2507         }
2508     }
2509     else
2510     {
2511         int         dhdl_index;
2512         const char *end;
2513         char        buf[STRLEN];
2514
2515         ptr = strrchr(legend, '=');
2516         end = ptr;
2517         if (ptr)
2518         {
2519             /* there must be a component name */
2520             ptr--;
2521             if (ptr < legend)
2522             {
2523                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2524             }
2525             /* now backtrack to the start of the identifier */
2526             while (isspace(*ptr))
2527             {
2528                 end = ptr;
2529                 ptr--;
2530                 if (ptr < legend)
2531                 {
2532                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2533                 }
2534             }
2535             while (!isspace(*ptr))
2536             {
2537                 ptr--;
2538                 if (ptr < legend)
2539                 {
2540                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2541                 }
2542             }
2543             ptr++;
2544             strncpy(buf, ptr, (end-ptr));
2545             buf[(end-ptr)] = '\0';
2546             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2547             if (dhdl_index < 0)
2548             {
2549                 char buf[STRLEN];
2550                 strncpy(buf, ptr, (end-ptr));
2551                 buf[(end-ptr)] = '\0';
2552                 gmx_fatal(FARGS,
2553                           "Did not find lambda component for '%s' in %s",
2554                           buf, fn);
2555             }
2556         }
2557         else
2558         {
2559             if (lam->lc->N > 1)
2560             {
2561                 gmx_fatal(FARGS,
2562                           "dhdl without component name with >1 lambda component in %s",
2563                           fn);
2564             }
2565             dhdl_index = 0;
2566         }
2567         lam->dhdl = dhdl_index;
2568     }
2569     return TRUE;
2570 }
2571
2572 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2573                                 lambda_components_t *lc)
2574 {
2575     gmx_bool    bFound;
2576     const char *ptr;
2577     char       *end;
2578     double      native_lambda;
2579
2580     bFound = FALSE;
2581
2582     /* first check for a state string */
2583     ptr = strstr(subtitle, "state");
2584     if (ptr)
2585     {
2586         int         index = -1;
2587         const char *val_end;
2588
2589         /* the new 4.6 style lambda vectors */
2590         ptr = find_value(ptr);
2591         if (ptr)
2592         {
2593             index = strtol(ptr, &end, 10);
2594             if (ptr == end)
2595             {
2596                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2597                 return FALSE;
2598             }
2599             ptr = end;
2600         }
2601         else
2602         {
2603             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2604             return FALSE;
2605         }
2606         /* now find the lambda vector component names */
2607         while (*ptr != '(' && !isalnum(*ptr))
2608         {
2609             ptr++;
2610             if (*ptr == '\0')
2611             {
2612                 gmx_fatal(FARGS,
2613                           "Incomplete lambda vector component data in %s", fn);
2614                 return FALSE;
2615             }
2616         }
2617         val_end = ptr;
2618         if (!read_lambda_components(ptr, lc, &val_end, fn))
2619         {
2620             gmx_fatal(FARGS,
2621                       "lambda vector components in %s don't match those previously read",
2622                       fn);
2623         }
2624         ptr = find_value(val_end);
2625         if (!ptr)
2626         {
2627             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2628             return FALSE;
2629         }
2630         lambda_vec_init(&(ba->native_lambda), lc);
2631         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2632         {
2633             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2634         }
2635         ba->native_lambda.index = index;
2636         bFound                  = TRUE;
2637     }
2638     else
2639     {
2640         /* compatibility mode: check for lambda in other ways. */
2641         /* plain text lambda string */
2642         ptr = strstr(subtitle, "lambda");
2643         if (ptr == NULL)
2644         {
2645             /* xmgrace formatted lambda string */
2646             ptr = strstr(subtitle, "\\xl\\f{}");
2647         }
2648         if (ptr == NULL)
2649         {
2650             /* xmgr formatted lambda string */
2651             ptr = strstr(subtitle, "\\8l\\4");
2652         }
2653         if (ptr != NULL)
2654         {
2655             ptr = strstr(ptr, "=");
2656         }
2657         if (ptr != NULL)
2658         {
2659             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2660             /* add the lambda component name as an empty string */
2661             if (lc->N > 0)
2662             {
2663                 if (!lambda_components_check(lc, 0, "", 0))
2664                 {
2665                     gmx_fatal(FARGS,
2666                               "lambda vector components in %s don't match those previously read",
2667                               fn);
2668                 }
2669             }
2670             else
2671             {
2672                 lambda_components_add(lc, "", 0);
2673             }
2674             lambda_vec_init(&(ba->native_lambda), lc);
2675             ba->native_lambda.val[0] = native_lambda;
2676         }
2677     }
2678
2679     return bFound;
2680 }
2681
2682 static void filename2lambda(const char *fn, xvg_t *ba)
2683 {
2684     double        lambda;
2685     const char   *ptr, *digitptr;
2686     char         *endptr;
2687     int           dirsep;
2688     ptr = fn;
2689     /* go to the end of the path string and search backward to find the last
2690        directory in the path which has to contain the value of lambda
2691      */
2692     while (ptr[1] != '\0')
2693     {
2694         ptr++;
2695     }
2696     /* searching backward to find the second directory separator */
2697     dirsep   = 0;
2698     digitptr = NULL;
2699     while (ptr >= fn)
2700     {
2701         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2702         {
2703             if (dirsep == 1)
2704             {
2705                 break;
2706             }
2707             dirsep++;
2708         }
2709         /* save the last position of a digit between the last two
2710            separators = in the last dirname */
2711         if (dirsep > 0 && isdigit(*ptr))
2712         {
2713             digitptr = ptr;
2714         }
2715         ptr--;
2716     }
2717     if (!digitptr)
2718     {
2719         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2720                   " last directory in the path '%s' does not contain a number", fn);
2721     }
2722     if (digitptr[-1] == '-')
2723     {
2724         digitptr--;
2725     }
2726     lambda = strtod(digitptr, &endptr);
2727     if (endptr == digitptr)
2728     {
2729         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2730     }
2731 }
2732
2733 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2734                                   lambda_components_t *lc)
2735 {
2736     int          i;
2737     char        *subtitle, **legend, *ptr;
2738     int          np;
2739     gmx_bool     native_lambda_read = FALSE;
2740     char         buf[STRLEN];
2741     lambda_vec_t lv;
2742
2743     xvg_init(ba);
2744
2745     ba->filename = fn;
2746
2747     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2748     if (!ba->y)
2749     {
2750         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2751     }
2752     /* Reorder the data */
2753     ba->t  = ba->y[0];
2754     for (i = 1; i < ba->nset; i++)
2755     {
2756         ba->y[i-1] = ba->y[i];
2757     }
2758     ba->nset--;
2759
2760     snew(ba->np, ba->nset);
2761     for (i = 0; i < ba->nset; i++)
2762     {
2763         ba->np[i] = np;
2764     }
2765
2766     ba->temp = -1;
2767     if (subtitle != NULL)
2768     {
2769         /* try to extract temperature */
2770         ptr = strstr(subtitle, "T =");
2771         if (ptr != NULL)
2772         {
2773             ptr += 3;
2774             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2775             {
2776                 if (ba->temp <= 0)
2777                 {
2778                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2779                               ba->temp, fn);
2780                 }
2781             }
2782         }
2783     }
2784     if (ba->temp < 0)
2785     {
2786         if (*temp <= 0)
2787         {
2788             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2789         }
2790         ba->temp = *temp;
2791     }
2792
2793     /* Try to deduce lambda from the subtitle */
2794     if (subtitle)
2795     {
2796         if (subtitle2lambda(subtitle, ba, fn, lc))
2797         {
2798             native_lambda_read = TRUE;
2799         }
2800     }
2801     snew(ba->lambda, ba->nset);
2802     if (legend == NULL)
2803     {
2804         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2805         if (ba->nset == 1)
2806         {
2807             if (!native_lambda_read)
2808             {
2809                 /* Deduce lambda from the file name */
2810                 filename2lambda(fn, ba);
2811                 native_lambda_read = TRUE;
2812             }
2813             ba->lambda[0] = ba->native_lambda;
2814         }
2815         else
2816         {
2817             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2818         }
2819     }
2820     else
2821     {
2822         for (i = 0; i < ba->nset; )
2823         {
2824             gmx_bool use = FALSE;
2825             /* Read lambda from the legend */
2826             lambda_vec_init( &(ba->lambda[i]), lc );
2827             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2828             use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2829             if (use)
2830             {
2831                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2832                 i++;
2833             }
2834             else
2835             {
2836                 int j;
2837                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2838                 for (j = i+1; j < ba->nset; j++)
2839                 {
2840                     ba->y[j-1]  = ba->y[j];
2841                     legend[j-1] = legend[j];
2842                 }
2843                 ba->nset--;
2844             }
2845         }
2846     }
2847
2848     if (!native_lambda_read)
2849     {
2850         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2851     }
2852
2853     if (legend != NULL)
2854     {
2855         for (i = 0; i < ba->nset-1; i++)
2856         {
2857             sfree(legend[i]);
2858         }
2859         sfree(legend);
2860     }
2861 }
2862
2863 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2864 {
2865     xvg_t     *barsim;
2866     samples_t *s;
2867     int        i;
2868     double    *lambda;
2869
2870     snew(barsim, 1);
2871
2872     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2873
2874     if (barsim->nset < 1)
2875     {
2876         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2877     }
2878
2879     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2880     {
2881         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2882     }
2883     *temp = barsim->temp;
2884
2885     /* now create a series of samples_t */
2886     snew(s, barsim->nset);
2887     for (i = 0; i < barsim->nset; i++)
2888     {
2889         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2890                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2891                                                    &(barsim->lambda[i])),
2892                      fn);
2893         s[i].du  = barsim->y[i];
2894         s[i].ndu = barsim->np[i];
2895         s[i].t   = barsim->t;
2896
2897         lambda_data_list_insert_sample(sd->lb, s+i);
2898     }
2899     {
2900         char buf[STRLEN];
2901
2902         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2903         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2904                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2905         for (i = 0; i < barsim->nset; i++)
2906         {
2907             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2908             printf("        %s (%d pts)\n", buf, s[i].ndu);
2909         }
2910     }
2911     printf("\n\n");
2912 }
2913
2914 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2915                                  double start_time, double delta_time,
2916                                  lambda_vec_t *native_lambda, double temp,
2917                                  double *last_t, const char *filename)
2918 {
2919     int           i, j;
2920     gmx_bool      allocated;
2921     double        old_foreign_lambda;
2922     lambda_vec_t *foreign_lambda;
2923     int           type;
2924     samples_t    *s; /* convenience pointer */
2925     int           startj;
2926
2927     /* check the block types etc. */
2928     if ( (blk->nsub < 3) ||
2929          (blk->sub[0].type != xdr_datatype_int) ||
2930          (blk->sub[1].type != xdr_datatype_double) ||
2931          (
2932              (blk->sub[2].type != xdr_datatype_float) &&
2933              (blk->sub[2].type != xdr_datatype_double)
2934          ) ||
2935          (blk->sub[0].nr < 1) ||
2936          (blk->sub[1].nr < 1) )
2937     {
2938         gmx_fatal(FARGS,
2939                   "Unexpected/corrupted block data in file %s around time %f.",
2940                   filename, start_time);
2941     }
2942
2943     snew(foreign_lambda, 1);
2944     lambda_vec_init(foreign_lambda, native_lambda->lc);
2945     lambda_vec_copy(foreign_lambda, native_lambda);
2946     type = blk->sub[0].ival[0];
2947     if (type == dhbtDH)
2948     {
2949         for (i = 0; i < native_lambda->lc->N; i++)
2950         {
2951             foreign_lambda->val[i] = blk->sub[1].dval[i];
2952         }
2953     }
2954     else
2955     {
2956         if (blk->sub[0].nr > 1)
2957         {
2958             foreign_lambda->dhdl = blk->sub[0].ival[1];
2959         }
2960         else
2961         {
2962             foreign_lambda->dhdl = 0;
2963         }
2964     }
2965
2966     if (!*smp)
2967     {
2968         /* initialize the samples structure if it's empty. */
2969         snew(*smp, 1);
2970         samples_init(*smp, native_lambda, foreign_lambda, temp,
2971                      type == dhbtDHDL, filename);
2972         (*smp)->start_time = start_time;
2973         (*smp)->delta_time = delta_time;
2974     }
2975
2976     /* set convenience pointer */
2977     s = *smp;
2978
2979     /* now double check */
2980     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2981     {
2982         char buf[STRLEN], buf2[STRLEN];
2983         lambda_vec_print(foreign_lambda, buf, FALSE);
2984         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2985         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2986         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2987                   filename, start_time);
2988     }
2989
2990     /* make room for the data */
2991     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2992     {
2993         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2994             blk->sub[2].nr*2 : s->ndu_alloc;
2995         srenew(s->du_alloc, s->ndu_alloc);
2996         s->du = s->du_alloc;
2997     }
2998     startj   = s->ndu;
2999     s->ndu  += blk->sub[2].nr;
3000     s->ntot += blk->sub[2].nr;
3001     *ndu     = blk->sub[2].nr;
3002
3003     /* and copy the data*/
3004     for (j = 0; j < blk->sub[2].nr; j++)
3005     {
3006         if (blk->sub[2].type == xdr_datatype_float)
3007         {
3008             s->du[startj+j] = blk->sub[2].fval[j];
3009         }
3010         else
3011         {
3012             s->du[startj+j] = blk->sub[2].dval[j];
3013         }
3014     }
3015     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3016     {
3017         *last_t = start_time + blk->sub[2].nr*delta_time;
3018     }
3019 }
3020
3021 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3022                                       double start_time, double delta_time,
3023                                       lambda_vec_t *native_lambda, double temp,
3024                                       double *last_t, const char *filename)
3025 {
3026     int           i, j;
3027     samples_t    *s;
3028     int           nhist;
3029     double        old_foreign_lambda;
3030     lambda_vec_t *foreign_lambda;
3031     int           type;
3032     int           nbins[2];
3033
3034     /* check the block types etc. */
3035     if ( (blk->nsub < 2) ||
3036          (blk->sub[0].type != xdr_datatype_double) ||
3037          (blk->sub[1].type != xdr_datatype_large_int) ||
3038          (blk->sub[0].nr < 2)  ||
3039          (blk->sub[1].nr < 2) )
3040     {
3041         gmx_fatal(FARGS,
3042                   "Unexpected/corrupted block data in file %s around time %f",
3043                   filename, start_time);
3044     }
3045
3046     nhist = blk->nsub-2;
3047     if (nhist == 0)
3048     {
3049         return NULL;
3050     }
3051     if (nhist > 2)
3052     {
3053         gmx_fatal(FARGS,
3054                   "Unexpected/corrupted block data in file %s around time %f",
3055                   filename, start_time);
3056     }
3057
3058     snew(s, 1);
3059     *nsamples = 1;
3060
3061     snew(foreign_lambda, 1);
3062     lambda_vec_init(foreign_lambda, native_lambda->lc);
3063     lambda_vec_copy(foreign_lambda, native_lambda);
3064     type = (int)(blk->sub[1].lval[1]);
3065     if (type == dhbtDH)
3066     {
3067         double old_foreign_lambda;
3068
3069         old_foreign_lambda = blk->sub[0].dval[0];
3070         if (old_foreign_lambda >= 0)
3071         {
3072             foreign_lambda->val[0] = old_foreign_lambda;
3073             if (foreign_lambda->lc->N > 1)
3074             {
3075                 gmx_fatal(FARGS,
3076                           "Single-component lambda in multi-component file %s",
3077                           filename);
3078             }
3079         }
3080         else
3081         {
3082             for (i = 0; i < native_lambda->lc->N; i++)
3083             {
3084                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3085             }
3086         }
3087     }
3088     else
3089     {
3090         if (foreign_lambda->lc->N > 1)
3091         {
3092             if (blk->sub[1].nr < 3 + nhist)
3093             {
3094                 gmx_fatal(FARGS,
3095                           "Missing derivative coord in multi-component file %s",
3096                           filename);
3097             }
3098             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3099         }
3100         else
3101         {
3102             foreign_lambda->dhdl = 0;
3103         }
3104     }
3105
3106     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3107                  filename);
3108     snew(s->hist, 1);
3109
3110     for (i = 0; i < nhist; i++)
3111     {
3112         nbins[i] = blk->sub[i+2].nr;
3113     }
3114
3115     hist_init(s->hist, nhist, nbins);
3116
3117     for (i = 0; i < nhist; i++)
3118     {
3119         s->hist->x0[i] = blk->sub[1].lval[2+i];
3120         s->hist->dx[i] = blk->sub[0].dval[1];
3121         if (i == 1)
3122         {
3123             s->hist->dx[i] = -s->hist->dx[i];
3124         }
3125     }
3126
3127     s->hist->start_time = start_time;
3128     s->hist->delta_time = delta_time;
3129     s->start_time       = start_time;
3130     s->delta_time       = delta_time;
3131
3132     for (i = 0; i < nhist; i++)
3133     {
3134         int             nbin;
3135         gmx_large_int_t sum = 0;
3136
3137         for (j = 0; j < s->hist->nbin[i]; j++)
3138         {
3139             int binv = (int)(blk->sub[i+2].ival[j]);
3140
3141             s->hist->bin[i][j] = binv;
3142             sum               += binv;
3143
3144         }
3145         if (i == 0)
3146         {
3147             s->ntot      = sum;
3148             s->hist->sum = sum;
3149         }
3150         else
3151         {
3152             if (s->ntot != sum)
3153             {
3154                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3155                           filename);
3156             }
3157         }
3158     }
3159
3160     if (start_time + s->hist->sum*delta_time > *last_t)
3161     {
3162         *last_t = start_time + s->hist->sum*delta_time;
3163     }
3164     return s;
3165 }
3166
3167
3168 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3169 {
3170     int            i, j;
3171     ener_file_t    fp;
3172     t_enxframe    *fr;
3173     int            nre;
3174     gmx_enxnm_t   *enm           = NULL;
3175     double         first_t       = -1;
3176     double         last_t        = -1;
3177     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3178     int           *nhists        = NULL; /* array to keep count & print at end */
3179     int           *npts          = NULL; /* array to keep count & print at end */
3180     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3181     lambda_vec_t  *native_lambda;
3182     double         end_time;             /* the end time of the last batch of samples */
3183     int            nsamples = 0;
3184     lambda_vec_t   start_lambda;
3185
3186     fp = open_enx(fn, "r");
3187     do_enxnms(fp, &nre, &enm);
3188     snew(fr, 1);
3189
3190     snew(native_lambda, 1);
3191     start_lambda.lc = NULL;
3192
3193     while (do_enx(fp, fr))
3194     {
3195         /* count the data blocks */
3196         int    nblocks_raw  = 0;
3197         int    nblocks_hist = 0;
3198         int    nlam         = 0;
3199         int    k;
3200         /* DHCOLL block information: */
3201         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3202         double rtemp      = 0;
3203
3204         /* count the blocks and handle collection information: */
3205         for (i = 0; i < fr->nblock; i++)
3206         {
3207             if (fr->block[i].id == enxDHHIST)
3208             {
3209                 nblocks_hist++;
3210             }
3211             if (fr->block[i].id == enxDH)
3212             {
3213                 nblocks_raw++;
3214             }
3215             if (fr->block[i].id == enxDHCOLL)
3216             {
3217                 nlam++;
3218                 if ( (fr->block[i].nsub < 1) ||
3219                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3220                      (fr->block[i].sub[0].nr < 5))
3221                 {
3222                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3223                 }
3224
3225                 /* read the data from the DHCOLL block */
3226                 rtemp            =        fr->block[i].sub[0].dval[0];
3227                 start_time       =   fr->block[i].sub[0].dval[1];
3228                 delta_time       =   fr->block[i].sub[0].dval[2];
3229                 old_start_lambda = fr->block[i].sub[0].dval[3];
3230                 delta_lambda     = fr->block[i].sub[0].dval[4];
3231
3232                 if (delta_lambda > 0)
3233                 {
3234                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3235                 }
3236                 if ( ( *temp != rtemp) && (*temp > 0) )
3237                 {
3238                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3239                 }
3240                 *temp = rtemp;
3241
3242                 if (old_start_lambda >= 0)
3243                 {
3244                     if (sd->lc.N > 0)
3245                     {
3246                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3247                         {
3248                             gmx_fatal(FARGS,
3249                                       "lambda vector components in %s don't match those previously read",
3250                                       fn);
3251                         }
3252                     }
3253                     else
3254                     {
3255                         lambda_components_add(&(sd->lc), "", 0);
3256                     }
3257                     if (!start_lambda.lc)
3258                     {
3259                         lambda_vec_init(&start_lambda, &(sd->lc));
3260                     }
3261                     start_lambda.val[0] = old_start_lambda;
3262                 }
3263                 else
3264                 {
3265                     /* read lambda vector */
3266                     int      n_lambda_vec;
3267                     gmx_bool check = (sd->lc.N > 0);
3268                     if (fr->block[i].nsub < 2)
3269                     {
3270                         gmx_fatal(FARGS,
3271                                   "No lambda vector, but start_lambda=%f\n",
3272                                   old_start_lambda);
3273                     }
3274                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3275                     for (j = 0; j < n_lambda_vec; j++)
3276                     {
3277                         const char *name =
3278                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3279                         if (check)
3280                         {
3281                             /* check the components */
3282                             lambda_components_check(&(sd->lc), j, name,
3283                                                     strlen(name));
3284                         }
3285                         else
3286                         {
3287                             lambda_components_add(&(sd->lc), name,
3288                                                   strlen(name));
3289                         }
3290                     }
3291                     lambda_vec_init(&start_lambda, &(sd->lc));
3292                     start_lambda.index = fr->block[i].sub[1].ival[0];
3293                     for (j = 0; j < n_lambda_vec; j++)
3294                     {
3295                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3296                     }
3297                 }
3298                 if (first_t < 0)
3299                 {
3300                     first_t = start_time;
3301                 }
3302             }
3303         }
3304
3305         if (nlam != 1)
3306         {
3307             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3308         }
3309         if (nblocks_raw > 0 && nblocks_hist > 0)
3310         {
3311             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3312         }
3313
3314         if (nsamples > 0)
3315         {
3316             /* check the native lambda */
3317             if (!lambda_vec_same(&start_lambda, native_lambda) )
3318             {
3319                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3320                           fn, native_lambda, start_lambda, start_time);
3321             }
3322             /* check the number of samples against the previous number */
3323             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3324             {
3325                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3326                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3327             }
3328             /* check whether last iterations's end time matches with
3329                the currrent start time */
3330             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3331             {
3332                 /* it didn't. We need to store our samples and reallocate */
3333                 for (i = 0; i < nsamples; i++)
3334                 {
3335                     if (samples_rawdh[i])
3336                     {
3337                         /* insert it into the existing list */
3338                         lambda_data_list_insert_sample(sd->lb,
3339                                                        samples_rawdh[i]);
3340                         /* and make sure we'll allocate a new one this time
3341                            around */
3342                         samples_rawdh[i] = NULL;
3343                     }
3344                 }
3345             }
3346         }
3347         else
3348         {
3349             /* this is the first round; allocate the associated data
3350                structures */
3351             /*native_lambda=start_lambda;*/
3352             lambda_vec_init(native_lambda, &(sd->lc));
3353             lambda_vec_copy(native_lambda, &start_lambda);
3354             nsamples = nblocks_raw+nblocks_hist;
3355             snew(nhists, nsamples);
3356             snew(npts, nsamples);
3357             snew(lambdas, nsamples);
3358             snew(samples_rawdh, nsamples);
3359             for (i = 0; i < nsamples; i++)
3360             {
3361                 nhists[i]        = 0;
3362                 npts[i]          = 0;
3363                 lambdas[i]       = NULL;
3364                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3365                                             ones contain values */
3366             }
3367         }
3368
3369         /* and read them */
3370         k = 0; /* counter for the lambdas, etc. arrays */
3371         for (i = 0; i < fr->nblock; i++)
3372         {
3373             if (fr->block[i].id == enxDH)
3374             {
3375                 int type = (fr->block[i].sub[0].ival[0]);
3376                 if (type == dhbtDH || type == dhbtDHDL)
3377                 {
3378                     int ndu;
3379                     read_edr_rawdh_block(&(samples_rawdh[k]),
3380                                          &ndu,
3381                                          &(fr->block[i]),
3382                                          start_time, delta_time,
3383                                          native_lambda, rtemp,
3384                                          &last_t, fn);
3385                     npts[k] += ndu;
3386                     if (samples_rawdh[k])
3387                     {
3388                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3389                     }
3390                     k++;
3391                 }
3392             }
3393             else if (fr->block[i].id == enxDHHIST)
3394             {
3395                 int type = (int)(fr->block[i].sub[1].lval[1]);
3396                 if (type == dhbtDH || type == dhbtDHDL)
3397                 {
3398                     int        j;
3399                     int        nb = 0;
3400                     samples_t *s; /* this is where the data will go */
3401                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3402                                             start_time, delta_time,
3403                                             native_lambda, rtemp,
3404                                             &last_t, fn);
3405                     nhists[k] += nb;
3406                     if (nb > 0)
3407                     {
3408                         lambdas[k] = s->foreign_lambda;
3409                     }
3410                     k++;
3411                     /* and insert the new sample immediately */
3412                     for (j = 0; j < nb; j++)
3413                     {
3414                         lambda_data_list_insert_sample(sd->lb, s+j);
3415                     }
3416                 }
3417             }
3418         }
3419     }
3420     /* Now store all our extant sample collections */
3421     for (i = 0; i < nsamples; i++)
3422     {
3423         if (samples_rawdh[i])
3424         {
3425             /* insert it into the existing list */
3426             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3427         }
3428     }
3429
3430
3431     {
3432         char buf[STRLEN];
3433         printf("\n");
3434         lambda_vec_print(native_lambda, buf, FALSE);
3435         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3436                fn, first_t, last_t, buf);
3437         for (i = 0; i < nsamples; i++)
3438         {
3439             if (lambdas[i])
3440             {
3441                 lambda_vec_print(lambdas[i], buf, TRUE);
3442                 if (nhists[i] > 0)
3443                 {
3444                     printf("        %s (%d hists)\n", buf, nhists[i]);
3445                 }
3446                 else
3447                 {
3448                     printf("        %s (%d pts)\n", buf, npts[i]);
3449                 }
3450             }
3451         }
3452     }
3453     printf("\n\n");
3454     sfree(npts);
3455     sfree(nhists);
3456     sfree(lambdas);
3457 }
3458
3459
3460 int gmx_bar(int argc, char *argv[])
3461 {
3462     static const char *desc[] = {
3463         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3464         "Bennett's acceptance ratio method (BAR). It also automatically",
3465         "adds series of individual free energies obtained with BAR into",
3466         "a combined free energy estimate.[PAR]",
3467
3468         "Every individual BAR free energy difference relies on two ",
3469         "simulations at different states: say state A and state B, as",
3470         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3471         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3472         "average of the Hamiltonian difference of state B given state A and",
3473         "vice versa.",
3474         "The energy differences to the other state must be calculated",
3475         "explicitly during the simulation. This can be done with",
3476         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3477
3478         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3479         "Two types of input files are supported:[BR]",
3480         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3481         "The files should have columns ",
3482         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3483         "The [GRK]lambda[grk] values are inferred ",
3484         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3485         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3486         "legends of Delta H",
3487         "[BR]",
3488         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3489         "[TT]-extp[tt] option for these files, it is assumed",
3490         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3491         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3492         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3493         "subtitle (if present), otherwise from a number in the subdirectory ",
3494         "in the file name.[PAR]",
3495
3496         "The [GRK]lambda[grk] of the simulation is parsed from ",
3497         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3498         "foreign [GRK]lambda[grk] values from the legend containing the ",
3499         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3500         "the legend line containing 'T ='.[PAR]",
3501
3502         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3503         "These can contain either lists of energy differences (see the ",
3504         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3505         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3506         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3507         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3508
3509         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3510         "the energy difference can also be extrapolated from the ",
3511         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3512         "option, which assumes that the system's Hamiltonian depends linearly",
3513         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3514
3515         "The free energy estimates are determined using BAR with bisection, ",
3516         "with the precision of the output set with [TT]-prec[tt]. ",
3517         "An error estimate taking into account time correlations ",
3518         "is made by splitting the data into blocks and determining ",
3519         "the free energy differences over those blocks and assuming ",
3520         "the blocks are independent. ",
3521         "The final error estimate is determined from the average variance ",
3522         "over 5 blocks. A range of block numbers for error estimation can ",
3523         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3524
3525         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3526         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3527         "samples. [BB]Note[bb] that when aggregating energy ",
3528         "differences/derivatives with different sampling intervals, this is ",
3529         "almost certainly not correct. Usually subsequent energies are ",
3530         "correlated and different time intervals mean different degrees ",
3531         "of correlation between samples.[PAR]",
3532
3533         "The results are split in two parts: the last part contains the final ",
3534         "results in kJ/mol, together with the error estimate for each part ",
3535         "and the total. The first part contains detailed free energy ",
3536         "difference estimates and phase space overlap measures in units of ",
3537         "kT (together with their computed error estimate). The printed ",
3538         "values are:[BR]",
3539         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3540         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3541         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3542         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3543         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3544         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3545
3546         "The relative entropy of both states in each other's ensemble can be ",
3547         "interpreted as a measure of phase space overlap: ",
3548         "the relative entropy s_A of the work samples of lambda_B in the ",
3549         "ensemble of lambda_A (and vice versa for s_B), is a ",
3550         "measure of the 'distance' between Boltzmann distributions of ",
3551         "the two states, that goes to zero for identical distributions. See ",
3552         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3553         "[PAR]",
3554         "The estimate of the expected per-sample standard deviation, as given ",
3555         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3556         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3557         "of the actual statistical error, because it assumes independent samples).[PAR]",
3558
3559         "To get a visual estimate of the phase space overlap, use the ",
3560         "[TT]-oh[tt] option to write series of histograms, together with the ",
3561         "[TT]-nbin[tt] option.[PAR]"
3562     };
3563     static real        begin    = 0, end = -1, temp = -1;
3564     int                nd       = 2, nbmin = 5, nbmax = 5;
3565     int                nbin     = 100;
3566     gmx_bool           use_dhdl = FALSE;
3567     gmx_bool           calc_s, calc_v;
3568     t_pargs            pa[] = {
3569         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3570         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3571         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3572         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3573         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3574         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3575         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3576         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3577     };
3578
3579     t_filenm           fnm[] = {
3580         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3581         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3582         { efXVG, "-o",  "bar",    ffOPTWR },
3583         { efXVG, "-oi", "barint", ffOPTWR },
3584         { efXVG, "-oh", "histogram", ffOPTWR }
3585     };
3586 #define NFILE asize(fnm)
3587
3588     int          f, i, j;
3589     int          nf = 0;    /* file counter */
3590     int          nbs;
3591     int          nfile_tot; /* total number of input files */
3592     int          nxvgfile = 0;
3593     int          nedrfile = 0;
3594     char       **fxvgnms;
3595     char       **fedrnms;
3596     sim_data_t   sim_data; /* the simulation data */
3597     barres_t    *results;  /* the results */
3598     int          nresults; /* number of results in results array */
3599
3600     double      *partsum;
3601     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3602     FILE        *fpb, *fpi;
3603     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3604     char         buf[STRLEN], buf2[STRLEN];
3605     char         ktformat[STRLEN], sktformat[STRLEN];
3606     char         kteformat[STRLEN], skteformat[STRLEN];
3607     output_env_t oenv;
3608     double       kT, beta;
3609     gmx_bool     result_OK = TRUE, bEE = TRUE;
3610
3611     gmx_bool     disc_err          = FALSE;
3612     double       sum_disc_err      = 0.; /* discretization error */
3613     gmx_bool     histrange_err     = FALSE;
3614     double       sum_histrange_err = 0.; /* histogram range error */
3615     double       stat_err          = 0.; /* statistical error */
3616
3617     parse_common_args(&argc, argv,
3618                       PCA_CAN_VIEW,
3619                       NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3620
3621     if (opt2bSet("-f", NFILE, fnm))
3622     {
3623         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3624     }
3625     if (opt2bSet("-g", NFILE, fnm))
3626     {
3627         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3628     }
3629
3630     sim_data_init(&sim_data);
3631 #if 0
3632     /* make linked list */
3633     lb = &lambda_head;
3634     lambda_data_init(lb, 0, 0);
3635     lb->next = lb;
3636     lb->prev = lb;
3637 #endif
3638
3639
3640     nfile_tot = nxvgfile + nedrfile;
3641
3642     if (nfile_tot == 0)
3643     {
3644         gmx_fatal(FARGS, "No input files!");
3645     }
3646
3647     if (nd < 0)
3648     {
3649         gmx_fatal(FARGS, "Can not have negative number of digits");
3650     }
3651     prec = pow(10, -nd);
3652
3653     snew(partsum, (nbmax+1)*(nbmax+1));
3654     nf = 0;
3655
3656     /* read in all files. First xvg files */
3657     for (f = 0; f < nxvgfile; f++)
3658     {
3659         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3660         nf++;
3661     }
3662     /* then .edr files */
3663     for (f = 0; f < nedrfile; f++)
3664     {
3665         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3666         nf++;
3667     }
3668
3669     /* fix the times to allow for equilibration */
3670     sim_data_impose_times(&sim_data, begin, end);
3671
3672     if (opt2bSet("-oh", NFILE, fnm))
3673     {
3674         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3675     }
3676
3677     /* assemble the output structures from the lambdas */
3678     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3679
3680     sum_disc_err = barres_list_max_disc_err(results, nresults);
3681
3682     if (nresults == 0)
3683     {
3684         printf("\nNo results to calculate.\n");
3685         return 0;
3686     }
3687
3688     if (sum_disc_err > prec)
3689     {
3690         prec = sum_disc_err;
3691         nd   = ceil(-log10(prec));
3692         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3693     }
3694
3695
3696     /*sprintf(lamformat,"%%6.3f");*/
3697     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3698     /* the format strings of the results in kT */
3699     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3700     sprintf( sktformat, "%%%ds", 6+nd);
3701     /* the format strings of the errors in kT */
3702     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3703     sprintf( skteformat, "%%%ds", 4+nd);
3704     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3705     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3706
3707
3708
3709     fpb = NULL;
3710     if (opt2bSet("-o", NFILE, fnm))
3711     {
3712         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3713         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3714                             "\\lambda", buf, exvggtXYDY, oenv);
3715     }
3716
3717     fpi = NULL;
3718     if (opt2bSet("-oi", NFILE, fnm))
3719     {
3720         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3721         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3722                        "\\lambda", buf, oenv);
3723     }
3724
3725
3726
3727     if (nbmin > nbmax)
3728     {
3729         nbmin = nbmax;
3730     }
3731
3732     /* first calculate results */
3733     bEE      = TRUE;
3734     disc_err = FALSE;
3735     for (f = 0; f < nresults; f++)
3736     {
3737         /* Determine the free energy difference with a factor of 10
3738          * more accuracy than requested for printing.
3739          */
3740         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3741                  &bEE, partsum);
3742
3743         if (results[f].dg_disc_err > prec/10.)
3744         {
3745             disc_err = TRUE;
3746         }
3747         if (results[f].dg_histrange_err > prec/10.)
3748         {
3749             histrange_err = TRUE;
3750         }
3751     }
3752
3753     /* print results in kT */
3754     kT   = BOLTZ*temp;
3755     beta = 1/kT;
3756
3757     printf("\nTemperature: %g K\n", temp);
3758
3759     printf("\nDetailed results in kT (see help for explanation):\n\n");
3760     printf("%6s ", " lam_A");
3761     printf("%6s ", " lam_B");
3762     printf(sktformat,  "DG ");
3763     if (bEE)
3764     {
3765         printf(skteformat, "+/- ");
3766     }
3767     if (disc_err)
3768     {
3769         printf(skteformat, "disc ");
3770     }
3771     if (histrange_err)
3772     {
3773         printf(skteformat, "range ");
3774     }
3775     printf(sktformat,  "s_A ");
3776     if (bEE)
3777     {
3778         printf(skteformat, "+/- " );
3779     }
3780     printf(sktformat,  "s_B ");
3781     if (bEE)
3782     {
3783         printf(skteformat, "+/- " );
3784     }
3785     printf(sktformat,  "stdev ");
3786     if (bEE)
3787     {
3788         printf(skteformat, "+/- ");
3789     }
3790     printf("\n");
3791     for (f = 0; f < nresults; f++)
3792     {
3793         lambda_vec_print_short(results[f].a->native_lambda, buf);
3794         printf("%s ", buf);
3795         lambda_vec_print_short(results[f].b->native_lambda, buf);
3796         printf("%s ", buf);
3797         printf(ktformat,  results[f].dg);
3798         printf(" ");
3799         if (bEE)
3800         {
3801             printf(kteformat, results[f].dg_err);
3802             printf(" ");
3803         }
3804         if (disc_err)
3805         {
3806             printf(kteformat, results[f].dg_disc_err);
3807             printf(" ");
3808         }
3809         if (histrange_err)
3810         {
3811             printf(kteformat, results[f].dg_histrange_err);
3812             printf(" ");
3813         }
3814         printf(ktformat,  results[f].sa);
3815         printf(" ");
3816         if (bEE)
3817         {
3818             printf(kteformat, results[f].sa_err);
3819             printf(" ");
3820         }
3821         printf(ktformat,  results[f].sb);
3822         printf(" ");
3823         if (bEE)
3824         {
3825             printf(kteformat, results[f].sb_err);
3826             printf(" ");
3827         }
3828         printf(ktformat,  results[f].dg_stddev);
3829         printf(" ");
3830         if (bEE)
3831         {
3832             printf(kteformat, results[f].dg_stddev_err);
3833         }
3834         printf("\n");
3835
3836         /* Check for negative relative entropy with a 95% certainty. */
3837         if (results[f].sa < -2*results[f].sa_err ||
3838             results[f].sb < -2*results[f].sb_err)
3839         {
3840             result_OK = FALSE;
3841         }
3842     }
3843
3844     if (!result_OK)
3845     {
3846         printf("\nWARNING: Some of these results violate the Second Law of "
3847                "Thermodynamics: \n"
3848                "         This is can be the result of severe undersampling, or "
3849                "(more likely)\n"
3850                "         there is something wrong with the simulations.\n");
3851     }
3852
3853
3854     /* final results in kJ/mol */
3855     printf("\n\nFinal results in kJ/mol:\n\n");
3856     dg_tot  = 0;
3857     for (f = 0; f < nresults; f++)
3858     {
3859
3860         if (fpi != NULL)
3861         {
3862             lambda_vec_print_short(results[f].a->native_lambda, buf);
3863             fprintf(fpi, xvg2format, buf, dg_tot);
3864         }
3865
3866
3867         if (fpb != NULL)
3868         {
3869             lambda_vec_print_intermediate(results[f].a->native_lambda,
3870                                           results[f].b->native_lambda,
3871                                           buf);
3872
3873             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3874         }
3875
3876         printf("point ");
3877         lambda_vec_print_short(results[f].a->native_lambda, buf);
3878         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3879         printf("%s - %s", buf, buf2);
3880         printf(",   DG ");
3881
3882         printf(dgformat, results[f].dg*kT);
3883         if (bEE)
3884         {
3885             printf(" +/- ");
3886             printf(dgformat, results[f].dg_err*kT);
3887         }
3888         if (histrange_err)
3889         {
3890             printf(" (max. range err. = ");
3891             printf(dgformat, results[f].dg_histrange_err*kT);
3892             printf(")");
3893             sum_histrange_err += results[f].dg_histrange_err*kT;
3894         }
3895
3896         printf("\n");
3897         dg_tot += results[f].dg;
3898     }
3899     printf("\n");
3900     printf("total ");
3901     lambda_vec_print_short(results[0].a->native_lambda, buf);
3902     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3903     printf("%s - %s", buf, buf2);
3904     printf(",   DG ");
3905
3906     printf(dgformat, dg_tot*kT);
3907     if (bEE)
3908     {
3909         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3910         printf(" +/- ");
3911         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3912     }
3913     printf("\n");
3914     if (disc_err)
3915     {
3916         printf("\nmaximum discretization error = ");
3917         printf(dgformat, sum_disc_err);
3918         if (bEE && stat_err < sum_disc_err)
3919         {
3920             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3921         }
3922     }
3923     if (histrange_err)
3924     {
3925         printf("\nmaximum histogram range error = ");
3926         printf(dgformat, sum_histrange_err);
3927         if (bEE && stat_err < sum_histrange_err)
3928         {
3929             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3930         }
3931
3932     }
3933     printf("\n");
3934
3935
3936     if (fpi != NULL)
3937     {
3938         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3939         fprintf(fpi, xvg2format, buf, dg_tot);
3940         ffclose(fpi);
3941     }
3942
3943     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3944     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3945
3946     return 0;
3947 }