src/gromacs/gmxana/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "config.h"
  36
  37 #include <ctype.h>
  38 #include <float.h>
  39 #include <math.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "typedefs.h"
  44 #include "gromacs/utility/smalloc.h"
  45 #include "gromacs/utility/futil.h"
  46 #include "gromacs/commandline/pargs.h"
  47 #include "macros.h"
  48 #include "gromacs/fileio/enxio.h"
  49 #include "gromacs/math/units.h"
  50 #include "gromacs/utility/fatalerror.h"
  51 #include "gromacs/fileio/xvgr.h"
  52 #include "viewit.h"
  53 #include "gmx_ana.h"
  54 #include "gromacs/math/utilities.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "names.h"
  57 #include "mdebin.h"
  58
  59
  60 /* Structure for the names of lambda vector components */
  61 typedef struct lambda_components_t
  62 {
  63     char **names;  /* Array of strings with names for the lambda vector
  64                       components */
  65     int    N;      /* The number of components */
  66     int    Nalloc; /* The number of allocated components */
  67 } lambda_components_t;
  68
  69 /* Structure for a lambda vector or a dhdl derivative direction */
  70 typedef struct lambda_vec_t
  71 {
  72     double                    *val;   /* The lambda vector component values. Only valid if
  73                                          dhdl == -1 */
  74     int                        dhdl;  /* The coordinate index for the derivative described by this
  75                                          structure, or -1 */
  76     const lambda_components_t *lc;    /* the associated lambda_components
  77                                          structure */
  78     int                        index; /* The state number (init-lambda-state) of this lambda
  79                                          vector, if known. If not, it is set to -1 */
  80 } lambda_vec_t;
  81
  82 /* the dhdl.xvg data from a simulation */
  83 typedef struct xvg_t
  84 {
  85     const char   *filename;
  86     int           ftp;           /* file type */
  87     int           nset;          /* number of lambdas, including dhdl */
  88     int          *np;            /* number of data points (du or hists) per lambda */
  89     int           np_alloc;      /* number of points (du or hists) allocated */
  90     double        temp;          /* temperature */
  91     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  92     double       *t;             /* the times (of second index for y) */
  93     double      **y;             /* the dU values. y[0] holds the derivative, while
  94                                     further ones contain the energy differences between
  95                                     the native lambda and the 'foreign' lambdas. */
  96     lambda_vec_t  native_lambda; /* the native lambda */
  97
  98     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
  99 } xvg_t;
 100
 101
 102 typedef struct hist_t
 103 {
 104     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 105     double          dx[2];                  /* the histogram spacing. The reverse
 106                                                dx is the negative of the forward dx.*/
 107     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 108                                                    point(s) as int */
 109
 110     int             nbin[2];                /* the (forward+reverse) number of bins */
 111     gmx_int64_t     sum;                    /* the total number of counts. Must be
 112                                                    the same for forward + reverse.  */
 113     int             nhist;                  /* number of hist datas (forward or reverse) */
 114
 115     double          start_time, delta_time; /* start time, end time of histogram */
 116 } hist_t;
 117
 118
 119 /* an aggregate of samples for partial free energy calculation */
 120 typedef struct samples_t
 121 {
 122     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 123     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 124     double        temp;           /* the temperature */
 125     gmx_bool      derivative;     /* whether this sample is a derivative */
 126
 127     /* The samples come either as either delta U lists: */
 128     int     ndu;                    /* the number of delta U samples */
 129     double *du;                     /* the delta u's */
 130     double *t;                      /* the times associated with those samples, or: */
 131     double  start_time, delta_time; /*start time and delta time for linear time*/
 132
 133     /* or as histograms: */
 134     hist_t *hist; /* a histogram */
 135
 136     /* allocation data: (not NULL for data 'owned' by this struct) */
 137     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 138     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 139     hist_t         *hist_alloc;          /* allocated hist */
 140
 141     gmx_int64_t     ntot;                /* total number of samples */
 142     const char     *filename;            /* the file name this sample comes from */
 143 } samples_t;
 144
 145 /* a sample range (start to end for du-style data, or boolean
 146     for both du-style data and histograms */
 147 typedef struct sample_range_t
 148 {
 149     int        start, end; /* start and end index for du style data */
 150     gmx_bool   use;        /* whether to use this sample */
 151
 152     samples_t *s;          /* the samples this range belongs to */
 153 } sample_range_t;
 154
 155
 156 /* a collection of samples for a partial free energy calculation
 157     (i.e. the collection of samples from one native lambda to one
 158     foreign lambda) */
 159 typedef struct sample_coll_t
 160 {
 161     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 162                                           in the histogram */
 163     lambda_vec_t   *foreign_lambda;    /* collection */
 164     double          temp;              /* the temperature */
 165
 166     int             nsamples;          /* the number of samples */
 167     samples_t     **s;                 /* the samples themselves */
 168     sample_range_t *r;                 /* the sample ranges */
 169     int             nsamples_alloc;    /* number of allocated samples */
 170
 171     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 172                                               this collection */
 173
 174     struct sample_coll_t *next, *prev; /* next and previous in the list */
 175 } sample_coll_t;
 176
 177 /* all the samples associated with a lambda point */
 178 typedef struct lambda_data_t
 179 {
 180     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 181     double                temp;        /* temperature */
 182
 183     sample_coll_t        *sc;          /* the samples */
 184
 185     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 186
 187     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 188 } lambda_data_t;
 189
 190 /* Top-level data structure of simulation data */
 191 typedef struct sim_data_t
 192 {
 193     lambda_data_t      *lb;      /* a lambda data linked list */
 194     lambda_data_t       lb_head; /* The head element of the linked list */
 195
 196     lambda_components_t lc;      /* the allowed components of the lambda
 197                                     vectors */
 198 } sim_data_t;
 199
 200 /* Top-level data structure with calculated values. */
 201 typedef struct {
 202     sample_coll_t *a, *b;            /* the simulation data */
 203
 204     double         dg;               /* the free energy difference */
 205     double         dg_err;           /* the free energy difference */
 206
 207     double         dg_disc_err;      /* discretization error */
 208     double         dg_histrange_err; /* histogram range error */
 209
 210     double         sa;               /* relative entropy of b in state a */
 211     double         sa_err;           /* error in sa */
 212     double         sb;               /* relative entropy of a in state b */
 213     double         sb_err;           /* error in sb */
 214
 215     double         dg_stddev;        /* expected dg stddev per sample */
 216     double         dg_stddev_err;    /* error in dg_stddev */
 217 } barres_t;
 218
 219
 220 /* Initialize a lambda_components structure */
 221 static void lambda_components_init(lambda_components_t *lc)
 222 {
 223     lc->N      = 0;
 224     lc->Nalloc = 2;
 225     snew(lc->names, lc->Nalloc);
 226 }
 227
 228 /* Add a component to a lambda_components structure */
 229 static void lambda_components_add(lambda_components_t *lc,
 230                                   const char *name, size_t name_length)
 231 {
 232     while (lc->N + 1 > lc->Nalloc)
 233     {
 234         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 235         srenew(lc->names, lc->Nalloc);
 236     }
 237     snew(lc->names[lc->N], name_length+1);
 238     strncpy(lc->names[lc->N], name, name_length);
 239     lc->N++;
 240 }
 241
 242 /* check whether a component with index 'index' matches the given name, or
 243    is also NULL. Returns TRUE if this is the case.
 244    the string name does not need to end */
 245 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 246                                         int                        index,
 247                                         const char                *name,
 248                                         size_t                     name_length)
 249 {
 250     size_t len;
 251     if (index >= lc->N)
 252     {
 253         return FALSE;
 254     }
 255     if (name == NULL && lc->names[index] == NULL)
 256     {
 257         return TRUE;
 258     }
 259     if ((name == NULL) != (lc->names[index] == NULL))
 260     {
 261         return FALSE;
 262     }
 263     len = strlen(lc->names[index]);
 264     if (len != name_length)
 265     {
 266         return FALSE;
 267     }
 268     if (strncmp(lc->names[index], name, name_length) == 0)
 269     {
 270         return TRUE;
 271     }
 272     return FALSE;
 273 }
 274
 275 /* Find the index of a given lambda component name, or -1 if not found */
 276 static int lambda_components_find(const lambda_components_t *lc,
 277                                   const char                *name,
 278                                   size_t                     name_length)
 279 {
 280     int i;
 281
 282     for (i = 0; i < lc->N; i++)
 283     {
 284         if (strncmp(lc->names[i], name, name_length) == 0)
 285         {
 286             return i;
 287         }
 288     }
 289     return -1;
 290 }
 291
 292
 293
 294 /* initialize a lambda vector */
 295 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 296 {
 297     snew(lv->val, lc->N);
 298     lv->index = -1;
 299     lv->dhdl  = -1;
 300     lv->lc    = lc;
 301 }
 302
 303 static void lambda_vec_destroy(lambda_vec_t *lv)
 304 {
 305     sfree(lv->val);
 306 }
 307
 308 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 309 {
 310     int i;
 311
 312     lambda_vec_init(lv, orig->lc);
 313     lv->dhdl  = orig->dhdl;
 314     lv->index = orig->index;
 315     for (i = 0; i < lv->lc->N; i++)
 316     {
 317         lv->val[i] = orig->val[i];
 318     }
 319 }
 320
 321 /* write a lambda vec to a preallocated string */
 322 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 323 {
 324     int    i;
 325     size_t np;
 326
 327     str[0] = 0; /* reset the string */
 328     if (lv->dhdl < 0)
 329     {
 330         if (named)
 331         {
 332             str += sprintf(str, "delta H to ");
 333         }
 334         if (lv->lc->N > 1)
 335         {
 336             str += sprintf(str, "(");
 337         }
 338         for (i = 0; i < lv->lc->N; i++)
 339         {
 340             str += sprintf(str, "%g", lv->val[i]);
 341             if (i < lv->lc->N-1)
 342             {
 343                 str += sprintf(str, ", ");
 344             }
 345         }
 346         if (lv->lc->N > 1)
 347         {
 348             str += sprintf(str, ")");
 349         }
 350     }
 351     else
 352     {
 353         /* this lambda vector describes a derivative */
 354         str += sprintf(str, "dH/dl");
 355         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 356         {
 357             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 358         }
 359     }
 360 }
 361
 362 /* write a shortened version of the lambda vec to a preallocated string */
 363 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 364 {
 365     int    i;
 366     size_t np;
 367
 368     if (lv->index >= 0)
 369     {
 370         sprintf(str, "%6d", lv->index);
 371     }
 372     else
 373     {
 374         if (lv->dhdl < 0)
 375         {
 376             sprintf(str, "%6.3f", lv->val[0]);
 377         }
 378         else
 379         {
 380             sprintf(str, "dH/dl[%d]", lv->dhdl);
 381         }
 382     }
 383 }
 384
 385 /* write an intermediate version of two lambda vecs to a preallocated string */
 386 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 387                                           const lambda_vec_t *b, char *str)
 388 {
 389     int    i;
 390     size_t np;
 391
 392     str[0] = 0;
 393     if ( (a->index >= 0) && (b->index >= 0) )
 394     {
 395         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 396     }
 397     else
 398     {
 399         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 400         {
 401             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 402         }
 403     }
 404 }
 405
 406
 407
 408 /* calculate the difference in lambda vectors: c = a-b.
 409    c must be initialized already, and a and b must describe non-derivative
 410    lambda points */
 411 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 412                             lambda_vec_t *c)
 413 {
 414     int i;
 415
 416     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 417     {
 418         gmx_fatal(FARGS,
 419                   "Trying to calculate the difference between derivatives instead of lambda points");
 420     }
 421     if ((a->lc != b->lc) || (a->lc != c->lc) )
 422     {
 423         gmx_fatal(FARGS,
 424                   "Trying to calculate the difference lambdas with differing basis set");
 425     }
 426     for (i = 0; i < a->lc->N; i++)
 427     {
 428         c->val[i] = a->val[i] - b->val[i];
 429     }
 430 }
 431
 432 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 433    a and b must describe non-derivative lambda points */
 434 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 435 {
 436     int    i;
 437     double ret = 0.;
 438
 439     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 440     {
 441         gmx_fatal(FARGS,
 442                   "Trying to calculate the difference between derivatives instead of lambda points");
 443     }
 444     if (a->lc != b->lc)
 445     {
 446         gmx_fatal(FARGS,
 447                   "Trying to calculate the difference lambdas with differing basis set");
 448     }
 449     for (i = 0; i < a->lc->N; i++)
 450     {
 451         double df = a->val[i] - b->val[i];
 452         ret += df*df;
 453     }
 454     return sqrt(ret);
 455 }
 456
 457
 458 /* check whether two lambda vectors are the same */
 459 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 460 {
 461     int i;
 462
 463     if (a->lc != b->lc)
 464     {
 465         return FALSE;
 466     }
 467     if (a->dhdl < 0)
 468     {
 469         for (i = 0; i < a->lc->N; i++)
 470         {
 471             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 472             {
 473                 return FALSE;
 474             }
 475         }
 476         return TRUE;
 477     }
 478     else
 479     {
 480         /* they're derivatives, so we check whether the indices match */
 481         return (a->dhdl == b->dhdl);
 482     }
 483 }
 484
 485 /* Compare the sort order of two foreign lambda vectors
 486
 487     returns 1 if a is 'bigger' than b,
 488     returns 0 if they're the same,
 489     returns -1 if a is 'smaller' than b.*/
 490 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 491                                        const lambda_vec_t *b)
 492 {
 493     int      i;
 494     double   norm_a    = 0, norm_b = 0;
 495     gmx_bool different = FALSE;
 496
 497     if (a->lc != b->lc)
 498     {
 499         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 500     }
 501     /* if either one has an index we sort based on that */
 502     if ((a->index >= 0) || (b->index >= 0))
 503     {
 504         if (a->index == b->index)
 505         {
 506             return 0;
 507         }
 508         return (a->index > b->index) ? 1 : -1;
 509     }
 510     if (a->dhdl >= 0 || b->dhdl >= 0)
 511     {
 512         /* lambda vectors that are derivatives always sort higher than those
 513            without derivatives */
 514         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 515         {
 516             return (a->dhdl >= 0) ? 1 : -1;
 517         }
 518         return a->dhdl > b->dhdl;
 519     }
 520
 521     /* neither has an index, so we can only sort on the lambda components,
 522        which is only valid if there is one component */
 523     for (i = 0; i < a->lc->N; i++)
 524     {
 525         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 526         {
 527             different = TRUE;
 528         }
 529         norm_a += a->val[i]*a->val[i];
 530         norm_b += b->val[i]*b->val[i];
 531     }
 532     if (!different)
 533     {
 534         return 0;
 535     }
 536     return norm_a > norm_b;
 537 }
 538
 539 /* Compare the sort order of two native lambda vectors
 540
 541     returns 1 if a is 'bigger' than b,
 542     returns 0 if they're the same,
 543     returns -1 if a is 'smaller' than b.*/
 544 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 545                                       const lambda_vec_t *b)
 546 {
 547     int i;
 548
 549     if (a->lc != b->lc)
 550     {
 551         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 552     }
 553     /* if either one has an index we sort based on that */
 554     if ((a->index >= 0) || (b->index >= 0))
 555     {
 556         if (a->index == b->index)
 557         {
 558             return 0;
 559         }
 560         return (a->index > b->index) ? 1 : -1;
 561     }
 562     /* neither has an index, so we can only sort on the lambda components,
 563        which is only valid if there is one component */
 564     if (a->lc->N > 1)
 565     {
 566         gmx_fatal(FARGS,
 567                   "Can't compare lambdas with no index and > 1 component");
 568     }
 569     if (a->dhdl >= 0 || b->dhdl >= 0)
 570     {
 571         gmx_fatal(FARGS,
 572                   "Can't compare native lambdas that are derivatives");
 573     }
 574     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 575     {
 576         return 0;
 577     }
 578     return a->val[0] > b->val[0] ? 1 : -1;
 579 }
 580
 581
 582
 583
 584 static void hist_init(hist_t *h, int nhist, int *nbin)
 585 {
 586     int i;
 587     if (nhist > 2)
 588     {
 589         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 590     }
 591     for (i = 0; i < nhist; i++)
 592     {
 593         snew(h->bin[i], nbin[i]);
 594         h->x0[i]      = 0;
 595         h->nbin[i]    = nbin[i];
 596         h->start_time = h->delta_time = 0;
 597         h->dx[i]      = 0;
 598     }
 599     h->sum   = 0;
 600     h->nhist = nhist;
 601 }
 602
 603 static void hist_destroy(hist_t *h)
 604 {
 605     sfree(h->bin);
 606 }
 607
 608
 609 static void xvg_init(xvg_t *ba)
 610 {
 611     ba->filename = NULL;
 612     ba->nset     = 0;
 613     ba->np_alloc = 0;
 614     ba->np       = NULL;
 615     ba->y        = NULL;
 616 }
 617
 618 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 619                          lambda_vec_t *foreign_lambda, double temp,
 620                          gmx_bool derivative, const char *filename)
 621 {
 622     s->native_lambda  = native_lambda;
 623     s->foreign_lambda = foreign_lambda;
 624     s->temp           = temp;
 625     s->derivative     = derivative;
 626
 627     s->ndu        = 0;
 628     s->du         = NULL;
 629     s->t          = NULL;
 630     s->start_time = s->delta_time = 0;
 631     s->hist       = NULL;
 632     s->du_alloc   = NULL;
 633     s->t_alloc    = NULL;
 634     s->hist_alloc = NULL;
 635     s->ndu_alloc  = 0;
 636     s->nt_alloc   = 0;
 637
 638     s->ntot     = 0;
 639     s->filename = filename;
 640 }
 641
 642 static void sample_range_init(sample_range_t *r, samples_t *s)
 643 {
 644     r->start = 0;
 645     r->end   = s->ndu;
 646     r->use   = TRUE;
 647     r->s     = NULL;
 648 }
 649
 650 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 651                              lambda_vec_t *foreign_lambda, double temp)
 652 {
 653     sc->native_lambda  = native_lambda;
 654     sc->foreign_lambda = foreign_lambda;
 655     sc->temp           = temp;
 656
 657     sc->nsamples       = 0;
 658     sc->s              = NULL;
 659     sc->r              = NULL;
 660     sc->nsamples_alloc = 0;
 661
 662     sc->ntot = 0;
 663     sc->next = sc->prev = NULL;
 664 }
 665
 666 static void sample_coll_destroy(sample_coll_t *sc)
 667 {
 668     /* don't free the samples themselves */
 669     sfree(sc->r);
 670     sfree(sc->s);
 671 }
 672
 673
 674 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 675                              double temp)
 676 {
 677     l->lambda = native_lambda;
 678     l->temp   = temp;
 679
 680     l->next = NULL;
 681     l->prev = NULL;
 682
 683     l->sc = &(l->sc_head);
 684
 685     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 686     l->sc->next = l->sc;
 687     l->sc->prev = l->sc;
 688 }
 689
 690 static void barres_init(barres_t *br)
 691 {
 692     br->dg            = 0;
 693     br->dg_err        = 0;
 694     br->sa            = 0;
 695     br->sa_err        = 0;
 696     br->sb            = 0;
 697     br->sb_err        = 0;
 698     br->dg_stddev     = 0;
 699     br->dg_stddev_err = 0;
 700
 701     br->a = NULL;
 702     br->b = NULL;
 703 }
 704
 705
 706 /* calculate the total number of samples in a sample collection */
 707 static void sample_coll_calc_ntot(sample_coll_t *sc)
 708 {
 709     int i;
 710
 711     sc->ntot = 0;
 712     for (i = 0; i < sc->nsamples; i++)
 713     {
 714         if (sc->r[i].use)
 715         {
 716             if (sc->s[i]->hist)
 717             {
 718                 sc->ntot += sc->s[i]->ntot;
 719             }
 720             else
 721             {
 722                 sc->ntot += sc->r[i].end - sc->r[i].start;
 723             }
 724         }
 725     }
 726 }
 727
 728
 729 /* find the barsamples_t associated with a lambda that corresponds to
 730    a specific foreign lambda */
 731 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 732                                                    lambda_vec_t  *foreign_lambda)
 733 {
 734     sample_coll_t *sc = l->sc->next;
 735
 736     while (sc != l->sc)
 737     {
 738         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 739         {
 740             return sc;
 741         }
 742         sc = sc->next;
 743     }
 744
 745     return NULL;
 746 }
 747
 748 /* insert li into an ordered list of lambda_colls */
 749 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 750 {
 751     sample_coll_t *scn = l->sc->next;
 752     while ( (scn != l->sc) )
 753     {
 754         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 755         {
 756             break;
 757         }
 758         scn = scn->next;
 759     }
 760     /* now insert it before the found scn */
 761     sc->next        = scn;
 762     sc->prev        = scn->prev;
 763     scn->prev->next = sc;
 764     scn->prev       = sc;
 765 }
 766
 767 /* insert li into an ordered list of lambdas */
 768 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 769 {
 770     lambda_data_t *lc = head->next;
 771     while (lc != head)
 772     {
 773         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 774         {
 775             break;
 776         }
 777         lc = lc->next;
 778     }
 779     /* now insert ourselves before the found lc */
 780     li->next       = lc;
 781     li->prev       = lc->prev;
 782     lc->prev->next = li;
 783     lc->prev       = li;
 784 }
 785
 786 /* insert a sample and a sample_range into a sample_coll. The
 787     samples are stored as a pointer, the range is copied. */
 788 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 789                                       sample_range_t *r)
 790 {
 791     /* first check if it belongs here */
 792     if (sc->temp != s->temp)
 793     {
 794         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 795                   s->filename, sc->next->s[0]->filename);
 796     }
 797     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 798     {
 799         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 800                   s->filename, sc->next->s[0]->filename);
 801     }
 802     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 803     {
 804         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 805                   s->filename, sc->next->s[0]->filename);
 806     }
 807
 808     /* check if there's room */
 809     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 810     {
 811         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 812         srenew(sc->s, sc->nsamples_alloc);
 813         srenew(sc->r, sc->nsamples_alloc);
 814     }
 815     sc->s[sc->nsamples] = s;
 816     sc->r[sc->nsamples] = *r;
 817     sc->nsamples++;
 818
 819     sample_coll_calc_ntot(sc);
 820 }
 821
 822 /* insert a sample into a lambda_list, creating the right sample_coll if
 823    neccesary */
 824 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 825 {
 826     gmx_bool       found = FALSE;
 827     sample_coll_t *sc;
 828     sample_range_t r;
 829
 830     lambda_data_t *l = head->next;
 831
 832     /* first search for the right lambda_data_t */
 833     while (l != head)
 834     {
 835         if (lambda_vec_same(l->lambda, s->native_lambda) )
 836         {
 837             found = TRUE;
 838             break;
 839         }
 840         l = l->next;
 841     }
 842
 843     if (!found)
 844     {
 845         snew(l, 1);                                     /* allocate a new one */
 846         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 847         lambda_data_insert_lambda(head, l);             /* add it to the list */
 848     }
 849
 850     /* now look for a sample collection */
 851     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 852     if (!sc)
 853     {
 854         snew(sc, 1); /* allocate a new one */
 855         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 856         lambda_data_insert_sample_coll(l, sc);
 857     }
 858
 859     /* now insert the samples into the sample coll */
 860     sample_range_init(&r, s);
 861     sample_coll_insert_sample(sc, s, &r);
 862 }
 863
 864
 865 /* make a histogram out of a sample collection */
 866 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 867                                   int *nbin_alloc, int *nbin,
 868                                   double *dx, double *xmin, int nbin_default)
 869 {
 870     int      i, j, k;
 871     gmx_bool dx_set   = FALSE;
 872     gmx_bool xmin_set = FALSE;
 873
 874     gmx_bool xmax_set      = FALSE;
 875     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 876                                        limits of a histogram */
 877     double   xmax          = -1;
 878
 879     /* first determine dx and xmin; try the histograms */
 880     for (i = 0; i < sc->nsamples; i++)
 881     {
 882         if (sc->s[i]->hist)
 883         {
 884             hist_t *hist = sc->s[i]->hist;
 885             for (k = 0; k < hist->nhist; k++)
 886             {
 887                 double hdx      = hist->dx[k];
 888                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 889
 890                 /* we use the biggest dx*/
 891                 if ( (!dx_set) || hist->dx[0] > *dx)
 892                 {
 893                     dx_set = TRUE;
 894                     *dx    = hist->dx[0];
 895                 }
 896                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 897                 {
 898                     xmin_set = TRUE;
 899                     *xmin    = (hist->x0[k]*hdx);
 900                 }
 901
 902                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 903                 {
 904                     xmax_set = TRUE;
 905                     xmax     = xmax_now;
 906                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 907                     {
 908                         xmax_set_hard = TRUE;
 909                     }
 910                 }
 911                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 912                 {
 913                     xmax_set_hard = TRUE;
 914                     xmax          = xmax_now;
 915                 }
 916             }
 917         }
 918     }
 919     /* and the delta us */
 920     for (i = 0; i < sc->nsamples; i++)
 921     {
 922         if (sc->s[i]->ndu > 0)
 923         {
 924             /* determine min and max */
 925             int    starti  = sc->r[i].start;
 926             int    endi    = sc->r[i].end;
 927             double du_xmin = sc->s[i]->du[starti];
 928             double du_xmax = sc->s[i]->du[starti];
 929             for (j = starti+1; j < endi; j++)
 930             {
 931                 if (sc->s[i]->du[j] < du_xmin)
 932                 {
 933                     du_xmin = sc->s[i]->du[j];
 934                 }
 935                 if (sc->s[i]->du[j] > du_xmax)
 936                 {
 937                     du_xmax = sc->s[i]->du[j];
 938                 }
 939             }
 940
 941             /* and now change the limits */
 942             if ( (!xmin_set) || (du_xmin < *xmin) )
 943             {
 944                 xmin_set = TRUE;
 945                 *xmin    = du_xmin;
 946             }
 947             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 948             {
 949                 xmax_set = TRUE;
 950                 xmax     = du_xmax;
 951             }
 952         }
 953     }
 954
 955     if (!xmax_set || !xmin_set)
 956     {
 957         *nbin = 0;
 958         return;
 959     }
 960
 961
 962     if (!dx_set)
 963     {
 964         *nbin = nbin_default;
 965         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 966                                                be 0, and we count from 0 */
 967     }
 968     else
 969     {
 970         *nbin = (xmax-(*xmin))/(*dx);
 971     }
 972
 973     if (*nbin > *nbin_alloc)
 974     {
 975         *nbin_alloc = *nbin;
 976         srenew(*bin, *nbin_alloc);
 977     }
 978
 979     /* reset the histogram */
 980     for (i = 0; i < (*nbin); i++)
 981     {
 982         (*bin)[i] = 0;
 983     }
 984
 985     /* now add the actual data */
 986     for (i = 0; i < sc->nsamples; i++)
 987     {
 988         if (sc->s[i]->hist)
 989         {
 990             hist_t *hist = sc->s[i]->hist;
 991             for (k = 0; k < hist->nhist; k++)
 992             {
 993                 double hdx       = hist->dx[k];
 994                 double xmin_hist = hist->x0[k]*hdx;
 995                 for (j = 0; j < hist->nbin[k]; j++)
 996                 {
 997                     /* calculate the bin corresponding to the middle of the
 998                        original bin */
 999                     double x     = hdx*(j+0.5) + xmin_hist;
1000                     int    binnr = (int)((x-(*xmin))/(*dx));
1001
1002                     if (binnr >= *nbin || binnr < 0)
1003                     {
1004                         binnr = (*nbin)-1;
1005                     }
1006
1007                     (*bin)[binnr] += hist->bin[k][j];
1008                 }
1009             }
1010         }
1011         else
1012         {
1013             int starti = sc->r[i].start;
1014             int endi   = sc->r[i].end;
1015             for (j = starti; j < endi; j++)
1016             {
1017                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1018                 if (binnr >= *nbin || binnr < 0)
1019                 {
1020                     binnr = (*nbin)-1;
1021                 }
1022
1023                 (*bin)[binnr]++;
1024             }
1025         }
1026     }
1027 }
1028
1029 /* write a collection of histograms to a file */
1030 void sim_data_histogram(sim_data_t *sd, const char *filename,
1031                         int nbin_default, const output_env_t oenv)
1032 {
1033     char           label_x[STRLEN];
1034     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1035     const char    *title   = "N(\\DeltaH)";
1036     const char    *label_y = "Samples";
1037     FILE          *fp;
1038     lambda_data_t *bl;
1039     int            nsets     = 0;
1040     char         **setnames  = NULL;
1041     gmx_bool       first_set = FALSE;
1042     /* histogram data: */
1043     int           *hist       = NULL;
1044     int            nbin       = 0;
1045     int            nbin_alloc = 0;
1046     double         dx         = 0;
1047     double         min        = 0;
1048     int            i;
1049     lambda_data_t *bl_head = sd->lb;
1050
1051     printf("\nWriting histogram to %s\n", filename);
1052     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1053
1054     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1055
1056     /* first get all the set names */
1057     bl = bl_head->next;
1058     /* iterate over all lambdas */
1059     while (bl != bl_head)
1060     {
1061         sample_coll_t *sc = bl->sc->next;
1062
1063         /* iterate over all samples */
1064         while (sc != bl->sc)
1065         {
1066             char buf[STRLEN], buf2[STRLEN];
1067
1068             nsets++;
1069             srenew(setnames, nsets);
1070             snew(setnames[nsets-1], STRLEN);
1071             if (sc->foreign_lambda->dhdl < 0)
1072             {
1073                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1074                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1075                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1076                         deltag, lambda, buf2, lambda, buf);
1077             }
1078             else
1079             {
1080                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1081                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1082                         dhdl, lambda, buf);
1083             }
1084             sc = sc->next;
1085         }
1086
1087         bl = bl->next;
1088     }
1089     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1090
1091
1092     /* now make the histograms */
1093     bl = bl_head->next;
1094     /* iterate over all lambdas */
1095     while (bl != bl_head)
1096     {
1097         sample_coll_t *sc = bl->sc->next;
1098
1099         /* iterate over all samples */
1100         while (sc != bl->sc)
1101         {
1102             if (!first_set)
1103             {
1104                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1105             }
1106
1107             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1108                                   nbin_default);
1109
1110             for (i = 0; i < nbin; i++)
1111             {
1112                 double xmin = i*dx + min;
1113                 double xmax = (i+1)*dx + min;
1114
1115                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1116             }
1117
1118             first_set = FALSE;
1119             sc        = sc->next;
1120         }
1121
1122         bl = bl->next;
1123     }
1124
1125     if (hist)
1126     {
1127         sfree(hist);
1128     }
1129
1130     xvgrclose(fp);
1131 }
1132
1133 /* create a collection (array) of barres_t object given a ordered linked list
1134    of barlamda_t sample collections */
1135 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1136                                     gmx_bool use_dhdl)
1137 {
1138     lambda_data_t *bl;
1139     int            nlambda = 0;
1140     barres_t      *res;
1141     int            i;
1142     gmx_bool       dhdl    = FALSE;
1143     gmx_bool       first   = TRUE;
1144     lambda_data_t *bl_head = sd->lb;
1145
1146     /* first count the lambdas */
1147     bl = bl_head->next;
1148     while (bl != bl_head)
1149     {
1150         nlambda++;
1151         bl = bl->next;
1152     }
1153     snew(res, nlambda-1);
1154
1155     /* next put the right samples in the res */
1156     *nres = 0;
1157     bl    = bl_head->next->next; /* we start with the second one. */
1158     while (bl != bl_head)
1159     {
1160         sample_coll_t *sc, *scprev;
1161         barres_t      *br = &(res[*nres]);
1162         /* there is always a previous one. we search for that as a foreign
1163            lambda: */
1164         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1165         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1166
1167         barres_init(br);
1168
1169         if (use_dhdl)
1170         {
1171             /* we use dhdl */
1172
1173             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1174             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1175
1176             if (first)
1177             {
1178                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1179                 dhdl = TRUE;
1180             }
1181             if (!dhdl)
1182             {
1183                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1184             }
1185         }
1186         else if (!scprev && !sc)
1187         {
1188             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1189         }
1190
1191         /* normal delta H */
1192         if (!scprev)
1193         {
1194             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1195         }
1196         if (!sc)
1197         {
1198             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1199         }
1200         br->a = scprev;
1201         br->b = sc;
1202
1203         first = FALSE;
1204         (*nres)++;
1205         bl = bl->next;
1206     }
1207     return res;
1208 }
1209
1210 /* estimate the maximum discretization error */
1211 static double barres_list_max_disc_err(barres_t *res, int nres)
1212 {
1213     int    i, j;
1214     double disc_err = 0.;
1215     double delta_lambda;
1216
1217     for (i = 0; i < nres; i++)
1218     {
1219         barres_t *br = &(res[i]);
1220
1221         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1222                                            br->a->native_lambda);
1223
1224         for (j = 0; j < br->a->nsamples; j++)
1225         {
1226             if (br->a->s[j]->hist)
1227             {
1228                 double Wfac = 1.;
1229                 if (br->a->s[j]->derivative)
1230                 {
1231                     Wfac =  delta_lambda;
1232                 }
1233
1234                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1235             }
1236         }
1237         for (j = 0; j < br->b->nsamples; j++)
1238         {
1239             if (br->b->s[j]->hist)
1240             {
1241                 double Wfac = 1.;
1242                 if (br->b->s[j]->derivative)
1243                 {
1244                     Wfac =  delta_lambda;
1245                 }
1246                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1247             }
1248         }
1249     }
1250     return disc_err;
1251 }
1252
1253
1254 /* impose start and end times on a sample collection, updating sample_ranges */
1255 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1256                                      double end_t)
1257 {
1258     int i;
1259     for (i = 0; i < sc->nsamples; i++)
1260     {
1261         samples_t      *s = sc->s[i];
1262         sample_range_t *r = &(sc->r[i]);
1263         if (s->hist)
1264         {
1265             double end_time = s->hist->delta_time*s->hist->sum +
1266                 s->hist->start_time;
1267             if (s->hist->start_time < begin_t || end_time > end_t)
1268             {
1269                 r->use = FALSE;
1270             }
1271         }
1272         else
1273         {
1274             if (!s->t)
1275             {
1276                 double end_time;
1277                 if (s->start_time < begin_t)
1278                 {
1279                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1280                 }
1281                 end_time = s->delta_time*s->ndu + s->start_time;
1282                 if (end_time > end_t)
1283                 {
1284                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1285                 }
1286             }
1287             else
1288             {
1289                 int j;
1290                 for (j = 0; j < s->ndu; j++)
1291                 {
1292                     if (s->t[j] < begin_t)
1293                     {
1294                         r->start = j;
1295                     }
1296
1297                     if (s->t[j] >= end_t)
1298                     {
1299                         r->end = j;
1300                         break;
1301                     }
1302                 }
1303             }
1304             if (r->start > r->end)
1305             {
1306                 r->use = FALSE;
1307             }
1308         }
1309     }
1310     sample_coll_calc_ntot(sc);
1311 }
1312
1313 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1314 {
1315     double         first_t, last_t;
1316     double         begin_t, end_t;
1317     lambda_data_t *lc;
1318     lambda_data_t *head = sd->lb;
1319     int            j;
1320
1321     if (begin <= 0 && end < 0)
1322     {
1323         return;
1324     }
1325
1326     /* first determine the global start and end times */
1327     first_t = -1;
1328     last_t  = -1;
1329     lc      = head->next;
1330     while (lc != head)
1331     {
1332         sample_coll_t *sc = lc->sc->next;
1333         while (sc != lc->sc)
1334         {
1335             for (j = 0; j < sc->nsamples; j++)
1336             {
1337                 double start_t, end_t;
1338
1339                 start_t = sc->s[j]->start_time;
1340                 end_t   =   sc->s[j]->start_time;
1341                 if (sc->s[j]->hist)
1342                 {
1343                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1344                 }
1345                 else
1346                 {
1347                     if (sc->s[j]->t)
1348                     {
1349                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1350                     }
1351                     else
1352                     {
1353                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1354                     }
1355                 }
1356
1357                 if (start_t < first_t || first_t < 0)
1358                 {
1359                     first_t = start_t;
1360                 }
1361                 if (end_t > last_t)
1362                 {
1363                     last_t = end_t;
1364                 }
1365             }
1366             sc = sc->next;
1367         }
1368         lc = lc->next;
1369     }
1370
1371     /* calculate the actual times */
1372     if (begin > 0)
1373     {
1374         begin_t = begin;
1375     }
1376     else
1377     {
1378         begin_t = first_t;
1379     }
1380
1381     if (end > 0)
1382     {
1383         end_t = end;
1384     }
1385     else
1386     {
1387         end_t = last_t;
1388     }
1389     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1390
1391     if (begin_t > end_t)
1392     {
1393         return;
1394     }
1395     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1396
1397     /* then impose them */
1398     lc = head->next;
1399     while (lc != head)
1400     {
1401         sample_coll_t *sc = lc->sc->next;
1402         while (sc != lc->sc)
1403         {
1404             sample_coll_impose_times(sc, begin_t, end_t);
1405             sc = sc->next;
1406         }
1407         lc = lc->next;
1408     }
1409 }
1410
1411
1412 /* create subsample i out of ni from an existing sample_coll */
1413 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1414                                              sample_coll_t *sc_orig,
1415                                              int i, int ni)
1416 {
1417     int             j;
1418     int             hist_start, hist_end;
1419
1420     gmx_int64_t     ntot_start;
1421     gmx_int64_t     ntot_end;
1422     gmx_int64_t     ntot_so_far;
1423
1424     *sc = *sc_orig; /* just copy all fields */
1425
1426     /* allocate proprietary memory */
1427     snew(sc->s, sc_orig->nsamples);
1428     snew(sc->r, sc_orig->nsamples);
1429
1430     /* copy the samples */
1431     for (j = 0; j < sc_orig->nsamples; j++)
1432     {
1433         sc->s[j] = sc_orig->s[j];
1434         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1435     }
1436
1437     /* now fix start and end fields */
1438     /* the casts avoid possible overflows */
1439     ntot_start  = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1440     ntot_end    = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1441     ntot_so_far = 0;
1442     for (j = 0; j < sc->nsamples; j++)
1443     {
1444         gmx_int64_t ntot_add;
1445         gmx_int64_t new_start, new_end;
1446
1447         if (sc->r[j].use)
1448         {
1449             if (sc->s[j]->hist)
1450             {
1451                 ntot_add = sc->s[j]->hist->sum;
1452             }
1453             else
1454             {
1455                 ntot_add = sc->r[j].end - sc->r[j].start;
1456             }
1457         }
1458         else
1459         {
1460             ntot_add = 0;
1461         }
1462
1463         if (!sc->s[j]->hist)
1464         {
1465             if (ntot_so_far < ntot_start)
1466             {
1467                 /* adjust starting point */
1468                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1469             }
1470             else
1471             {
1472                 new_start = sc->r[j].start;
1473             }
1474             /* adjust end point */
1475             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1476             if (new_end > sc->r[j].end)
1477             {
1478                 new_end = sc->r[j].end;
1479             }
1480
1481             /* check if we're in range at all */
1482             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1483             {
1484                 new_start = 0;
1485                 new_end   = 0;
1486             }
1487             /* and write the new range */
1488             sc->r[j].start = (int)new_start;
1489             sc->r[j].end   = (int)new_end;
1490         }
1491         else
1492         {
1493             if (sc->r[j].use)
1494             {
1495                 double overlap;
1496                 double ntot_start_norm, ntot_end_norm;
1497                 /* calculate the amount of overlap of the
1498                    desired range (ntot_start -- ntot_end) onto
1499                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1500
1501                 /* first calculate normalized bounds
1502                    (where 0 is the start of the hist range, and 1 the end) */
1503                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1504                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1505
1506                 /* now fix the boundaries */
1507                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1508                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1509
1510                 /* and calculate the overlap */
1511                 overlap = ntot_end_norm - ntot_start_norm;
1512
1513                 if (overlap > 0.95) /* we allow for 5% slack */
1514                 {
1515                     sc->r[j].use = TRUE;
1516                 }
1517                 else if (overlap < 0.05)
1518                 {
1519                     sc->r[j].use = FALSE;
1520                 }
1521                 else
1522                 {
1523                     return FALSE;
1524                 }
1525             }
1526         }
1527         ntot_so_far += ntot_add;
1528     }
1529     sample_coll_calc_ntot(sc);
1530
1531     return TRUE;
1532 }
1533
1534 /* calculate minimum and maximum work values in sample collection */
1535 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1536                                 double *Wmin, double *Wmax)
1537 {
1538     int i, j;
1539
1540     *Wmin = FLT_MAX;
1541     *Wmax = -FLT_MAX;
1542
1543     for (i = 0; i < sc->nsamples; i++)
1544     {
1545         samples_t      *s = sc->s[i];
1546         sample_range_t *r = &(sc->r[i]);
1547         if (r->use)
1548         {
1549             if (!s->hist)
1550             {
1551                 for (j = r->start; j < r->end; j++)
1552                 {
1553                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1554                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1555                 }
1556             }
1557             else
1558             {
1559                 int    hd = 0; /* determine the histogram direction: */
1560                 double dx;
1561                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1562                 {
1563                     hd = 1;
1564                 }
1565                 dx = s->hist->dx[hd];
1566
1567                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1568                 {
1569                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1570                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1571                     /* look for the highest value bin with values */
1572                     if (s->hist->bin[hd][j] > 0)
1573                     {
1574                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1575                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1576                         break;
1577                     }
1578                 }
1579             }
1580         }
1581     }
1582 }
1583
1584 /* Initialize a sim_data structure */
1585 static void sim_data_init(sim_data_t *sd)
1586 {
1587     /* make linked list */
1588     sd->lb       = &(sd->lb_head);
1589     sd->lb->next = sd->lb;
1590     sd->lb->prev = sd->lb;
1591
1592     lambda_components_init(&(sd->lc));
1593 }
1594
1595
1596 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1597 {
1598     int    i;
1599     double sum;
1600
1601     sum = 0;
1602
1603     for (i = 0; i < n; i++)
1604     {
1605         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1606     }
1607
1608     return sum;
1609 }
1610
1611 /* calculate the BAR average given a histogram
1612
1613     if type== 0, calculate the best estimate for the average,
1614     if type==-1, calculate the minimum possible value given the histogram
1615     if type== 1, calculate the maximum possible value given the histogram */
1616 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1617                                 int type)
1618 {
1619     double sum = 0.;
1620     int    i;
1621     int    max;
1622     /* normalization factor multiplied with bin width and
1623        number of samples (we normalize through M): */
1624     double normdx = 1.;
1625     int    hd     = 0; /* determine the histogram direction: */
1626     double dx;
1627
1628     if ( (hist->nhist > 1) && (Wfac < 0) )
1629     {
1630         hd = 1;
1631     }
1632     dx  = hist->dx[hd];
1633     max = hist->nbin[hd]-1;
1634     if (type == 1)
1635     {
1636         max = hist->nbin[hd]; /* we also add whatever was out of range */
1637     }
1638
1639     for (i = 0; i < max; i++)
1640     {
1641         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1642         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1643
1644         sum += pxdx/(1. + exp(x + sbMmDG));
1645     }
1646
1647     return sum;
1648 }
1649
1650 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1651                                 double temp, double tol, int type)
1652 {
1653     double kT, beta, M;
1654     double DG;
1655     int    i, j;
1656     double Wfac1, Wfac2, Wmin, Wmax;
1657     double DG0, DG1, DG2, dDG1;
1658     double sum1, sum2;
1659     double n1, n2; /* numbers of samples as doubles */
1660
1661     kT   = BOLTZ*temp;
1662     beta = 1/kT;
1663
1664     /* count the numbers of samples */
1665     n1 = ca->ntot;
1666     n2 = cb->ntot;
1667
1668     M = log(n1/n2);
1669
1670     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1671     if (ca->foreign_lambda->dhdl < 0)
1672     {
1673         /* this is the case when the delta U were calculated directly
1674            (i.e. we're not scaling dhdl) */
1675         Wfac1 = beta;
1676         Wfac2 = beta;
1677     }
1678     else
1679     {
1680         /* we're using dhdl, so delta_lambda needs to be a
1681            multiplication factor.  */
1682         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1683         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1684                                                   ca->native_lambda);
1685         if (cb->native_lambda->lc->N > 1)
1686         {
1687             gmx_fatal(FARGS,
1688                       "Can't (yet) do multi-component dhdl interpolation");
1689         }
1690
1691         Wfac1 =  beta*delta_lambda;
1692         Wfac2 = -beta*delta_lambda;
1693     }
1694
1695     if (beta < 1)
1696     {
1697         /* We print the output both in kT and kJ/mol.
1698          * Here we determine DG in kT, so when beta < 1
1699          * the precision has to be increased.
1700          */
1701         tol *= beta;
1702     }
1703
1704     /* Calculate minimum and maximum work to give an initial estimate of
1705      * delta G  as their average.
1706      */
1707     {
1708         double Wmin1, Wmin2, Wmax1, Wmax2;
1709         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1710         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1711
1712         Wmin = min(Wmin1, Wmin2);
1713         Wmax = max(Wmax1, Wmax2);
1714     }
1715
1716     DG0 = Wmin;
1717     DG2 = Wmax;
1718
1719     if (debug)
1720     {
1721         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1722     }
1723     /* We approximate by bisection: given our initial estimates
1724        we keep checking whether the halfway point is greater or
1725        smaller than what we get out of the BAR averages.
1726
1727        For the comparison we can use twice the tolerance. */
1728     while (DG2 - DG0 > 2*tol)
1729     {
1730         DG1 = 0.5*(DG0 + DG2);
1731
1732         /* calculate the BAR averages */
1733         dDG1 = 0.;
1734
1735         for (i = 0; i < ca->nsamples; i++)
1736         {
1737             samples_t      *s = ca->s[i];
1738             sample_range_t *r = &(ca->r[i]);
1739             if (r->use)
1740             {
1741                 if (s->hist)
1742                 {
1743                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1744                 }
1745                 else
1746                 {
1747                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1748                                          Wfac1, (M-DG1));
1749                 }
1750             }
1751         }
1752         for (i = 0; i < cb->nsamples; i++)
1753         {
1754             samples_t      *s = cb->s[i];
1755             sample_range_t *r = &(cb->r[i]);
1756             if (r->use)
1757             {
1758                 if (s->hist)
1759                 {
1760                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1761                 }
1762                 else
1763                 {
1764                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1765                                          Wfac2, -(M-DG1));
1766                 }
1767             }
1768         }
1769
1770         if (dDG1 < 0)
1771         {
1772             DG0 = DG1;
1773         }
1774         else
1775         {
1776             DG2 = DG1;
1777         }
1778         if (debug)
1779         {
1780             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1781         }
1782     }
1783
1784     return 0.5*(DG0 + DG2);
1785 }
1786
1787 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1788                              double temp, double dg, double *sa, double *sb)
1789 {
1790     int    i, j;
1791     double W_ab = 0.;
1792     double W_ba = 0.;
1793     double kT, beta;
1794     double Wfac1, Wfac2;
1795     double n1, n2;
1796
1797     kT   = BOLTZ*temp;
1798     beta = 1/kT;
1799
1800     /* count the numbers of samples */
1801     n1 = ca->ntot;
1802     n2 = cb->ntot;
1803
1804     /* to ensure the work values are the same as during the delta_G */
1805     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1806     if (ca->foreign_lambda->dhdl < 0)
1807     {
1808         /* this is the case when the delta U were calculated directly
1809            (i.e. we're not scaling dhdl) */
1810         Wfac1 = beta;
1811         Wfac2 = beta;
1812     }
1813     else
1814     {
1815         /* we're using dhdl, so delta_lambda needs to be a
1816            multiplication factor.  */
1817         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1818                                                   ca->native_lambda);
1819         Wfac1 =  beta*delta_lambda;
1820         Wfac2 = -beta*delta_lambda;
1821     }
1822
1823     /* first calculate the average work in both directions */
1824     for (i = 0; i < ca->nsamples; i++)
1825     {
1826         samples_t      *s = ca->s[i];
1827         sample_range_t *r = &(ca->r[i]);
1828         if (r->use)
1829         {
1830             if (!s->hist)
1831             {
1832                 for (j = r->start; j < r->end; j++)
1833                 {
1834                     W_ab += Wfac1*s->du[j];
1835                 }
1836             }
1837             else
1838             {
1839                 /* normalization factor multiplied with bin width and
1840                    number of samples (we normalize through M): */
1841                 double normdx = 1.;
1842                 double dx;
1843                 int    hd = 0; /* histogram direction */
1844                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1845                 {
1846                     hd = 1;
1847                 }
1848                 dx = s->hist->dx[hd];
1849
1850                 for (j = 0; j < s->hist->nbin[0]; j++)
1851                 {
1852                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1853                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1854                     W_ab += pxdx*x;
1855                 }
1856             }
1857         }
1858     }
1859     W_ab /= n1;
1860
1861     for (i = 0; i < cb->nsamples; i++)
1862     {
1863         samples_t      *s = cb->s[i];
1864         sample_range_t *r = &(cb->r[i]);
1865         if (r->use)
1866         {
1867             if (!s->hist)
1868             {
1869                 for (j = r->start; j < r->end; j++)
1870                 {
1871                     W_ba += Wfac1*s->du[j];
1872                 }
1873             }
1874             else
1875             {
1876                 /* normalization factor multiplied with bin width and
1877                    number of samples (we normalize through M): */
1878                 double normdx = 1.;
1879                 double dx;
1880                 int    hd = 0; /* histogram direction */
1881                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1882                 {
1883                     hd = 1;
1884                 }
1885                 dx = s->hist->dx[hd];
1886
1887                 for (j = 0; j < s->hist->nbin[0]; j++)
1888                 {
1889                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1890                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1891                     W_ba += pxdx*x;
1892                 }
1893             }
1894         }
1895     }
1896     W_ba /= n2;
1897
1898     /* then calculate the relative entropies */
1899     *sa = (W_ab - dg);
1900     *sb = (W_ba + dg);
1901 }
1902
1903 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1904                            double temp, double dg, double *stddev)
1905 {
1906     int    i, j;
1907     double M;
1908     double sigmafact = 0.;
1909     double kT, beta;
1910     double Wfac1, Wfac2;
1911     double n1, n2;
1912
1913     kT   = BOLTZ*temp;
1914     beta = 1/kT;
1915
1916     /* count the numbers of samples */
1917     n1 = ca->ntot;
1918     n2 = cb->ntot;
1919
1920     /* to ensure the work values are the same as during the delta_G */
1921     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1922     if (ca->foreign_lambda->dhdl < 0)
1923     {
1924         /* this is the case when the delta U were calculated directly
1925            (i.e. we're not scaling dhdl) */
1926         Wfac1 = beta;
1927         Wfac2 = beta;
1928     }
1929     else
1930     {
1931         /* we're using dhdl, so delta_lambda needs to be a
1932            multiplication factor.  */
1933         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1934                                                   ca->native_lambda);
1935         Wfac1 =  beta*delta_lambda;
1936         Wfac2 = -beta*delta_lambda;
1937     }
1938
1939     M = log(n1/n2);
1940
1941
1942     /* calculate average in both directions */
1943     for (i = 0; i < ca->nsamples; i++)
1944     {
1945         samples_t      *s = ca->s[i];
1946         sample_range_t *r = &(ca->r[i]);
1947         if (r->use)
1948         {
1949             if (!s->hist)
1950             {
1951                 for (j = r->start; j < r->end; j++)
1952                 {
1953                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1954                 }
1955             }
1956             else
1957             {
1958                 /* normalization factor multiplied with bin width and
1959                    number of samples (we normalize through M): */
1960                 double normdx = 1.;
1961                 double dx;
1962                 int    hd = 0; /* histogram direction */
1963                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1964                 {
1965                     hd = 1;
1966                 }
1967                 dx = s->hist->dx[hd];
1968
1969                 for (j = 0; j < s->hist->nbin[0]; j++)
1970                 {
1971                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1972                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1973
1974                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1975                 }
1976             }
1977         }
1978     }
1979     for (i = 0; i < cb->nsamples; i++)
1980     {
1981         samples_t      *s = cb->s[i];
1982         sample_range_t *r = &(cb->r[i]);
1983         if (r->use)
1984         {
1985             if (!s->hist)
1986             {
1987                 for (j = r->start; j < r->end; j++)
1988                 {
1989                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1990                 }
1991             }
1992             else
1993             {
1994                 /* normalization factor multiplied with bin width and
1995                    number of samples (we normalize through M): */
1996                 double normdx = 1.;
1997                 double dx;
1998                 int    hd = 0; /* histogram direction */
1999                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2000                 {
2001                     hd = 1;
2002                 }
2003                 dx = s->hist->dx[hd];
2004
2005                 for (j = 0; j < s->hist->nbin[0]; j++)
2006                 {
2007                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2008                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2009
2010                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2011                 }
2012             }
2013         }
2014     }
2015
2016     sigmafact /= (n1 + n2);
2017
2018
2019     /* Eq. 10 from
2020        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2021     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2022 }
2023
2024
2025
2026 static void calc_bar(barres_t *br, double tol,
2027                      int npee_min, int npee_max, gmx_bool *bEE,
2028                      double *partsum)
2029 {
2030     int      npee, p;
2031     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2032                                                         for calculated quantities */
2033     int      nsample1, nsample2;
2034     double   temp = br->a->temp;
2035     int      i, j;
2036     double   dg_min, dg_max;
2037     gmx_bool have_hist = FALSE;
2038
2039     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2040
2041     br->dg_disc_err      = 0.;
2042     br->dg_histrange_err = 0.;
2043
2044     /* check if there are histograms */
2045     for (i = 0; i < br->a->nsamples; i++)
2046     {
2047         if (br->a->r[i].use && br->a->s[i]->hist)
2048         {
2049             have_hist = TRUE;
2050             break;
2051         }
2052     }
2053     if (!have_hist)
2054     {
2055         for (i = 0; i < br->b->nsamples; i++)
2056         {
2057             if (br->b->r[i].use && br->b->s[i]->hist)
2058             {
2059                 have_hist = TRUE;
2060                 break;
2061             }
2062         }
2063     }
2064
2065     /* calculate histogram-specific errors */
2066     if (have_hist)
2067     {
2068         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2069         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2070
2071         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2072         {
2073             /* the histogram range  error is the biggest of the differences
2074                between the best estimate and the extremes */
2075             br->dg_histrange_err = fabs(dg_max - dg_min);
2076         }
2077         br->dg_disc_err = 0.;
2078         for (i = 0; i < br->a->nsamples; i++)
2079         {
2080             if (br->a->s[i]->hist)
2081             {
2082                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2083             }
2084         }
2085         for (i = 0; i < br->b->nsamples; i++)
2086         {
2087             if (br->b->s[i]->hist)
2088             {
2089                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2090             }
2091         }
2092     }
2093     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2094
2095     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2096
2097     dg_sig2     = 0;
2098     sa_sig2     = 0;
2099     sb_sig2     = 0;
2100     stddev_sig2 = 0;
2101
2102     *bEE = TRUE;
2103     {
2104         sample_coll_t ca, cb;
2105
2106         /* initialize the samples */
2107         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2108                          br->a->temp);
2109         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2110                          br->b->temp);
2111
2112         for (npee = npee_min; npee <= npee_max; npee++)
2113         {
2114             double dgs      = 0;
2115             double dgs2     = 0;
2116             double dsa      = 0;
2117             double dsb      = 0;
2118             double dsa2     = 0;
2119             double dsb2     = 0;
2120             double dstddev  = 0;
2121             double dstddev2 = 0;
2122
2123
2124             for (p = 0; p < npee; p++)
2125             {
2126                 double   dgp;
2127                 double   stddevc;
2128                 double   sac, sbc;
2129                 gmx_bool cac, cbc;
2130
2131                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2132                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2133
2134                 if (!cac || !cbc)
2135                 {
2136                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2137                     *bEE = FALSE;
2138                     if (cac)
2139                     {
2140                         sample_coll_destroy(&ca);
2141                     }
2142                     if (cbc)
2143                     {
2144                         sample_coll_destroy(&cb);
2145                     }
2146                     return;
2147                 }
2148
2149                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2150                 dgs  += dgp;
2151                 dgs2 += dgp*dgp;
2152
2153                 partsum[npee*(npee_max+1)+p] += dgp;
2154
2155                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2156                 dsa  += sac;
2157                 dsa2 += sac*sac;
2158                 dsb  += sbc;
2159                 dsb2 += sbc*sbc;
2160                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2161
2162                 dstddev  += stddevc;
2163                 dstddev2 += stddevc*stddevc;
2164
2165                 sample_coll_destroy(&ca);
2166                 sample_coll_destroy(&cb);
2167             }
2168             dgs     /= npee;
2169             dgs2    /= npee;
2170             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2171
2172             dsa     /= npee;
2173             dsa2    /= npee;
2174             dsb     /= npee;
2175             dsb2    /= npee;
2176             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2177             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2178
2179             dstddev     /= npee;
2180             dstddev2    /= npee;
2181             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2182         }
2183         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2184         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2185         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2186         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2187     }
2188 }
2189
2190
2191 static double bar_err(int nbmin, int nbmax, const double *partsum)
2192 {
2193     int    nb, b;
2194     double svar, s, s2, dg;
2195
2196     svar = 0;
2197     for (nb = nbmin; nb <= nbmax; nb++)
2198     {
2199         s  = 0;
2200         s2 = 0;
2201         for (b = 0; b < nb; b++)
2202         {
2203             dg  = partsum[nb*(nbmax+1)+b];
2204             s  += dg;
2205             s2 += dg*dg;
2206         }
2207         s    /= nb;
2208         s2   /= nb;
2209         svar += (s2 - s*s)/(nb - 1);
2210     }
2211
2212     return sqrt(svar/(nbmax + 1 - nbmin));
2213 }
2214
2215
2216 /* Seek the end of an identifier (consecutive non-spaces), followed by
2217    an optional number of spaces or '='-signs. Returns a pointer to the
2218    first non-space value found after that. Returns NULL if the string
2219    ends before that.
2220  */
2221 static const char *find_value(const char *str)
2222 {
2223     gmx_bool name_end_found = FALSE;
2224
2225     /* if the string is a NULL pointer, return a NULL pointer. */
2226     if (str == NULL)
2227     {
2228         return NULL;
2229     }
2230     while (*str != '\0')
2231     {
2232         /* first find the end of the name */
2233         if (!name_end_found)
2234         {
2235             if (isspace(*str) || (*str == '=') )
2236             {
2237                 name_end_found = TRUE;
2238             }
2239         }
2240         else
2241         {
2242             if (!( isspace(*str) || (*str == '=') ))
2243             {
2244                 return str;
2245             }
2246         }
2247         str++;
2248     }
2249     return NULL;
2250 }
2251
2252
2253
2254 /* read a vector-notation description of a lambda vector */
2255 static gmx_bool read_lambda_compvec(const char                *str,
2256                                     lambda_vec_t              *lv,
2257                                     const lambda_components_t *lc_in,
2258                                     lambda_components_t       *lc_out,
2259                                     const char               **end,
2260                                     const char                *fn)
2261 {
2262     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2263                                           components, or to check them */
2264     gmx_bool    start_reached = FALSE; /* whether the start of component names
2265                                           has been reached */
2266     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2267     int         n             = 0;     /* current component number */
2268     const char *val_start     = NULL;  /* start of the component name, or NULL
2269                                           if not in a value */
2270     char       *strtod_end;
2271     gmx_bool    OK = TRUE;
2272
2273     if (end)
2274     {
2275         *end = str;
2276     }
2277
2278
2279     if (lc_out && lc_out->N == 0)
2280     {
2281         initialize_lc = TRUE;
2282     }
2283
2284     if (lc_in == NULL)
2285     {
2286         lc_in = lc_out;
2287     }
2288
2289     while (1)
2290     {
2291         if (!start_reached)
2292         {
2293             if (isalnum(*str))
2294             {
2295                 vector        = FALSE;
2296                 start_reached = TRUE;
2297                 val_start     = str;
2298             }
2299             else if (*str == '(')
2300             {
2301                 vector        = TRUE;
2302                 start_reached = TRUE;
2303             }
2304             else if (!isspace(*str))
2305             {
2306                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2307             }
2308         }
2309         else
2310         {
2311             if (val_start)
2312             {
2313                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2314                 {
2315                     /* end of value */
2316                     if (lv == NULL)
2317                     {
2318                         if (initialize_lc)
2319                         {
2320                             lambda_components_add(lc_out, val_start,
2321                                                   (str-val_start));
2322                         }
2323                         else
2324                         {
2325                             if (!lambda_components_check(lc_out, n, val_start,
2326                                                          (str-val_start)))
2327                             {
2328                                 return FALSE;
2329                             }
2330                         }
2331                     }
2332                     else
2333                     {
2334                         /* add a vector component to lv */
2335                         lv->val[n] = strtod(val_start, &strtod_end);
2336                         if (val_start == strtod_end)
2337                         {
2338                             gmx_fatal(FARGS,
2339                                       "Error reading lambda vector in %s", fn);
2340                         }
2341                     }
2342                     /* reset for the next identifier */
2343                     val_start = NULL;
2344                     n++;
2345                     if (!vector)
2346                     {
2347                         return OK;
2348                     }
2349                 }
2350             }
2351             else if (isalnum(*str))
2352             {
2353                 val_start = str;
2354             }
2355             if (*str == ')')
2356             {
2357                 str++;
2358                 if (end)
2359                 {
2360                     *end = str;
2361                 }
2362                 if (!vector)
2363                 {
2364                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2365                 }
2366                 else
2367                 {
2368                     if (n == lc_in->N)
2369                     {
2370                         return OK;
2371                     }
2372                     else if (lv == NULL)
2373                     {
2374                         return FALSE;
2375                     }
2376                     else
2377                     {
2378                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2379                                   fn);
2380                         return FALSE;
2381                     }
2382
2383                 }
2384             }
2385         }
2386         if (*str == '\0')
2387         {
2388             break;
2389         }
2390         str++;
2391         if (end)
2392         {
2393             *end = str;
2394         }
2395     }
2396     if (vector)
2397     {
2398         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2399         return FALSE;
2400     }
2401     return OK;
2402 }
2403
2404 /* read and check the component names from a string */
2405 static gmx_bool read_lambda_components(const char          *str,
2406                                        lambda_components_t *lc,
2407                                        const char         **end,
2408                                        const char          *fn)
2409 {
2410     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2411 }
2412
2413 /* read an initialized lambda vector from a string */
2414 static gmx_bool read_lambda_vector(const char   *str,
2415                                    lambda_vec_t *lv,
2416                                    const char  **end,
2417                                    const char   *fn)
2418 {
2419     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2420 }
2421
2422
2423
2424 /* deduce lambda value from legend.
2425     fn = the file name
2426     legend = the legend string
2427     ba = the xvg data
2428     lam = the initialized lambda vector
2429    returns whether to use the data in this set.
2430  */
2431 static gmx_bool legend2lambda(const char   *fn,
2432                               const char   *legend,
2433                               lambda_vec_t *lam)
2434 {
2435     double        lambda = 0;
2436     const char   *ptr    = NULL, *ptr2 = NULL;
2437     gmx_bool      ok     = FALSE;
2438     gmx_bool      bdhdl  = FALSE;
2439     const char   *tostr  = " to ";
2440
2441     if (legend == NULL)
2442     {
2443         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2444     }
2445
2446     /* look for the last 'to': */
2447     ptr2 = legend;
2448     do
2449     {
2450         ptr2 = strstr(ptr2, tostr);
2451         if (ptr2 != NULL)
2452         {
2453             ptr = ptr2;
2454             ptr2++;
2455         }
2456     }
2457     while (ptr2 != NULL && *ptr2 != '\0');
2458
2459     if (ptr)
2460     {
2461         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2462     }
2463     else
2464     {
2465         /* look for the = sign */
2466         ptr = strrchr(legend, '=');
2467         if (!ptr)
2468         {
2469             /* otherwise look for the last space */
2470             ptr = strrchr(legend, ' ');
2471         }
2472     }
2473
2474     if (strstr(legend, "dH"))
2475     {
2476         ok    = TRUE;
2477         bdhdl = TRUE;
2478     }
2479     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2480     {
2481         ok    = TRUE;
2482         bdhdl = FALSE;
2483     }
2484     else /*if (strstr(legend, "pV"))*/
2485     {
2486         return FALSE;
2487     }
2488     if (!ptr)
2489     {
2490         ok = FALSE;
2491     }
2492
2493     if (!ok)
2494     {
2495         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2496     }
2497     if (!bdhdl)
2498     {
2499         ptr = find_value(ptr);
2500         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2501         {
2502             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2503         }
2504     }
2505     else
2506     {
2507         int         dhdl_index;
2508         const char *end;
2509         char        buf[STRLEN];
2510
2511         ptr = strrchr(legend, '=');
2512         end = ptr;
2513         if (ptr)
2514         {
2515             /* there must be a component name */
2516             ptr--;
2517             if (ptr < legend)
2518             {
2519                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2520             }
2521             /* now backtrack to the start of the identifier */
2522             while (isspace(*ptr))
2523             {
2524                 end = ptr;
2525                 ptr--;
2526                 if (ptr < legend)
2527                 {
2528                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2529                 }
2530             }
2531             while (!isspace(*ptr))
2532             {
2533                 ptr--;
2534                 if (ptr < legend)
2535                 {
2536                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2537                 }
2538             }
2539             ptr++;
2540             strncpy(buf, ptr, (end-ptr));
2541             buf[(end-ptr)] = '\0';
2542             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2543             if (dhdl_index < 0)
2544             {
2545                 char buf[STRLEN];
2546                 strncpy(buf, ptr, (end-ptr));
2547                 buf[(end-ptr)] = '\0';
2548                 gmx_fatal(FARGS,
2549                           "Did not find lambda component for '%s' in %s",
2550                           buf, fn);
2551             }
2552         }
2553         else
2554         {
2555             if (lam->lc->N > 1)
2556             {
2557                 gmx_fatal(FARGS,
2558                           "dhdl without component name with >1 lambda component in %s",
2559                           fn);
2560             }
2561             dhdl_index = 0;
2562         }
2563         lam->dhdl = dhdl_index;
2564     }
2565     return TRUE;
2566 }
2567
2568 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2569                                 lambda_components_t *lc)
2570 {
2571     gmx_bool    bFound;
2572     const char *ptr;
2573     char       *end;
2574     double      native_lambda;
2575
2576     bFound = FALSE;
2577
2578     /* first check for a state string */
2579     ptr = strstr(subtitle, "state");
2580     if (ptr)
2581     {
2582         int         index = -1;
2583         const char *val_end;
2584
2585         /* the new 4.6 style lambda vectors */
2586         ptr = find_value(ptr);
2587         if (ptr)
2588         {
2589             index = strtol(ptr, &end, 10);
2590             if (ptr == end)
2591             {
2592                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2593                 return FALSE;
2594             }
2595             ptr = end;
2596         }
2597         else
2598         {
2599             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2600             return FALSE;
2601         }
2602         /* now find the lambda vector component names */
2603         while (*ptr != '(' && !isalnum(*ptr))
2604         {
2605             ptr++;
2606             if (*ptr == '\0')
2607             {
2608                 gmx_fatal(FARGS,
2609                           "Incomplete lambda vector component data in %s", fn);
2610                 return FALSE;
2611             }
2612         }
2613         val_end = ptr;
2614         if (!read_lambda_components(ptr, lc, &val_end, fn))
2615         {
2616             gmx_fatal(FARGS,
2617                       "lambda vector components in %s don't match those previously read",
2618                       fn);
2619         }
2620         ptr = find_value(val_end);
2621         if (!ptr)
2622         {
2623             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2624             return FALSE;
2625         }
2626         lambda_vec_init(&(ba->native_lambda), lc);
2627         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2628         {
2629             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2630         }
2631         ba->native_lambda.index = index;
2632         bFound                  = TRUE;
2633     }
2634     else
2635     {
2636         /* compatibility mode: check for lambda in other ways. */
2637         /* plain text lambda string */
2638         ptr = strstr(subtitle, "lambda");
2639         if (ptr == NULL)
2640         {
2641             /* xmgrace formatted lambda string */
2642             ptr = strstr(subtitle, "\\xl\\f{}");
2643         }
2644         if (ptr == NULL)
2645         {
2646             /* xmgr formatted lambda string */
2647             ptr = strstr(subtitle, "\\8l\\4");
2648         }
2649         if (ptr != NULL)
2650         {
2651             ptr = strstr(ptr, "=");
2652         }
2653         if (ptr != NULL)
2654         {
2655             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2656             /* add the lambda component name as an empty string */
2657             if (lc->N > 0)
2658             {
2659                 if (!lambda_components_check(lc, 0, "", 0))
2660                 {
2661                     gmx_fatal(FARGS,
2662                               "lambda vector components in %s don't match those previously read",
2663                               fn);
2664                 }
2665             }
2666             else
2667             {
2668                 lambda_components_add(lc, "", 0);
2669             }
2670             lambda_vec_init(&(ba->native_lambda), lc);
2671             ba->native_lambda.val[0] = native_lambda;
2672         }
2673     }
2674
2675     return bFound;
2676 }
2677
2678 static void filename2lambda(const char *fn)
2679 {
2680     double        lambda;
2681     const char   *ptr, *digitptr;
2682     char         *endptr;
2683     int           dirsep;
2684     ptr = fn;
2685     /* go to the end of the path string and search backward to find the last
2686        directory in the path which has to contain the value of lambda
2687      */
2688     while (ptr[1] != '\0')
2689     {
2690         ptr++;
2691     }
2692     /* searching backward to find the second directory separator */
2693     dirsep   = 0;
2694     digitptr = NULL;
2695     while (ptr >= fn)
2696     {
2697         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2698         {
2699             if (dirsep == 1)
2700             {
2701                 break;
2702             }
2703             dirsep++;
2704         }
2705         /* save the last position of a digit between the last two
2706            separators = in the last dirname */
2707         if (dirsep > 0 && isdigit(*ptr))
2708         {
2709             digitptr = ptr;
2710         }
2711         ptr--;
2712     }
2713     if (!digitptr)
2714     {
2715         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2716                   " last directory in the path '%s' does not contain a number", fn);
2717     }
2718     if (digitptr[-1] == '-')
2719     {
2720         digitptr--;
2721     }
2722     lambda = strtod(digitptr, &endptr);
2723     if (endptr == digitptr)
2724     {
2725         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2726     }
2727 }
2728
2729 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2730                                   lambda_components_t *lc)
2731 {
2732     int          i;
2733     char        *subtitle, **legend, *ptr;
2734     int          np;
2735     gmx_bool     native_lambda_read = FALSE;
2736     char         buf[STRLEN];
2737     lambda_vec_t lv;
2738
2739     xvg_init(ba);
2740
2741     ba->filename = fn;
2742
2743     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2744     if (!ba->y)
2745     {
2746         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2747     }
2748     /* Reorder the data */
2749     ba->t  = ba->y[0];
2750     for (i = 1; i < ba->nset; i++)
2751     {
2752         ba->y[i-1] = ba->y[i];
2753     }
2754     ba->nset--;
2755
2756     snew(ba->np, ba->nset);
2757     for (i = 0; i < ba->nset; i++)
2758     {
2759         ba->np[i] = np;
2760     }
2761
2762     ba->temp = -1;
2763     if (subtitle != NULL)
2764     {
2765         /* try to extract temperature */
2766         ptr = strstr(subtitle, "T =");
2767         if (ptr != NULL)
2768         {
2769             ptr += 3;
2770             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2771             {
2772                 if (ba->temp <= 0)
2773                 {
2774                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2775                               ba->temp, fn);
2776                 }
2777             }
2778         }
2779     }
2780     if (ba->temp < 0)
2781     {
2782         if (*temp <= 0)
2783         {
2784             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2785         }
2786         ba->temp = *temp;
2787     }
2788
2789     /* Try to deduce lambda from the subtitle */
2790     if (subtitle)
2791     {
2792         if (subtitle2lambda(subtitle, ba, fn, lc))
2793         {
2794             native_lambda_read = TRUE;
2795         }
2796     }
2797     snew(ba->lambda, ba->nset);
2798     if (legend == NULL)
2799     {
2800         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2801         if (ba->nset == 1)
2802         {
2803             if (!native_lambda_read)
2804             {
2805                 /* Deduce lambda from the file name */
2806                 filename2lambda(fn);
2807                 native_lambda_read = TRUE;
2808             }
2809             ba->lambda[0] = ba->native_lambda;
2810         }
2811         else
2812         {
2813             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2814         }
2815     }
2816     else
2817     {
2818         for (i = 0; i < ba->nset; )
2819         {
2820             gmx_bool use = FALSE;
2821             /* Read lambda from the legend */
2822             lambda_vec_init( &(ba->lambda[i]), lc );
2823             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2824             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2825             if (use)
2826             {
2827                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2828                 i++;
2829             }
2830             else
2831             {
2832                 int j;
2833                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2834                 for (j = i+1; j < ba->nset; j++)
2835                 {
2836                     ba->y[j-1]  = ba->y[j];
2837                     legend[j-1] = legend[j];
2838                 }
2839                 ba->nset--;
2840             }
2841         }
2842     }
2843
2844     if (!native_lambda_read)
2845     {
2846         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2847     }
2848
2849     if (legend != NULL)
2850     {
2851         for (i = 0; i < ba->nset-1; i++)
2852         {
2853             sfree(legend[i]);
2854         }
2855         sfree(legend);
2856     }
2857 }
2858
2859 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2860 {
2861     xvg_t     *barsim;
2862     samples_t *s;
2863     int        i;
2864     double    *lambda;
2865
2866     snew(barsim, 1);
2867
2868     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2869
2870     if (barsim->nset < 1)
2871     {
2872         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2873     }
2874
2875     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2876     {
2877         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2878     }
2879     *temp = barsim->temp;
2880
2881     /* now create a series of samples_t */
2882     snew(s, barsim->nset);
2883     for (i = 0; i < barsim->nset; i++)
2884     {
2885         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2886                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2887                                                    &(barsim->lambda[i])),
2888                      fn);
2889         s[i].du  = barsim->y[i];
2890         s[i].ndu = barsim->np[i];
2891         s[i].t   = barsim->t;
2892
2893         lambda_data_list_insert_sample(sd->lb, s+i);
2894     }
2895     {
2896         char buf[STRLEN];
2897
2898         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2899         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2900                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2901         for (i = 0; i < barsim->nset; i++)
2902         {
2903             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2904             printf("        %s (%d pts)\n", buf, s[i].ndu);
2905         }
2906     }
2907     printf("\n\n");
2908 }
2909
2910 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2911                                  double start_time, double delta_time,
2912                                  lambda_vec_t *native_lambda, double temp,
2913                                  double *last_t, const char *filename)
2914 {
2915     int           i, j;
2916     gmx_bool      allocated;
2917     double        old_foreign_lambda;
2918     lambda_vec_t *foreign_lambda;
2919     int           type;
2920     samples_t    *s; /* convenience pointer */
2921     int           startj;
2922
2923     /* check the block types etc. */
2924     if ( (blk->nsub < 3) ||
2925          (blk->sub[0].type != xdr_datatype_int) ||
2926          (blk->sub[1].type != xdr_datatype_double) ||
2927          (
2928              (blk->sub[2].type != xdr_datatype_float) &&
2929              (blk->sub[2].type != xdr_datatype_double)
2930          ) ||
2931          (blk->sub[0].nr < 1) ||
2932          (blk->sub[1].nr < 1) )
2933     {
2934         gmx_fatal(FARGS,
2935                   "Unexpected/corrupted block data in file %s around time %f.",
2936                   filename, start_time);
2937     }
2938
2939     snew(foreign_lambda, 1);
2940     lambda_vec_init(foreign_lambda, native_lambda->lc);
2941     lambda_vec_copy(foreign_lambda, native_lambda);
2942     type = blk->sub[0].ival[0];
2943     if (type == dhbtDH)
2944     {
2945         for (i = 0; i < native_lambda->lc->N; i++)
2946         {
2947             foreign_lambda->val[i] = blk->sub[1].dval[i];
2948         }
2949     }
2950     else
2951     {
2952         if (blk->sub[0].nr > 1)
2953         {
2954             foreign_lambda->dhdl = blk->sub[0].ival[1];
2955         }
2956         else
2957         {
2958             foreign_lambda->dhdl = 0;
2959         }
2960     }
2961
2962     if (!*smp)
2963     {
2964         /* initialize the samples structure if it's empty. */
2965         snew(*smp, 1);
2966         samples_init(*smp, native_lambda, foreign_lambda, temp,
2967                      type == dhbtDHDL, filename);
2968         (*smp)->start_time = start_time;
2969         (*smp)->delta_time = delta_time;
2970     }
2971
2972     /* set convenience pointer */
2973     s = *smp;
2974
2975     /* now double check */
2976     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2977     {
2978         char buf[STRLEN], buf2[STRLEN];
2979         lambda_vec_print(foreign_lambda, buf, FALSE);
2980         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2981         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2982         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2983                   filename, start_time);
2984     }
2985
2986     /* make room for the data */
2987     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2988     {
2989         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2990             blk->sub[2].nr*2 : s->ndu_alloc;
2991         srenew(s->du_alloc, s->ndu_alloc);
2992         s->du = s->du_alloc;
2993     }
2994     startj   = s->ndu;
2995     s->ndu  += blk->sub[2].nr;
2996     s->ntot += blk->sub[2].nr;
2997     *ndu     = blk->sub[2].nr;
2998
2999     /* and copy the data*/
3000     for (j = 0; j < blk->sub[2].nr; j++)
3001     {
3002         if (blk->sub[2].type == xdr_datatype_float)
3003         {
3004             s->du[startj+j] = blk->sub[2].fval[j];
3005         }
3006         else
3007         {
3008             s->du[startj+j] = blk->sub[2].dval[j];
3009         }
3010     }
3011     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3012     {
3013         *last_t = start_time + blk->sub[2].nr*delta_time;
3014     }
3015 }
3016
3017 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3018                                       double start_time, double delta_time,
3019                                       lambda_vec_t *native_lambda, double temp,
3020                                       double *last_t, const char *filename)
3021 {
3022     int           i, j;
3023     samples_t    *s;
3024     int           nhist;
3025     double        old_foreign_lambda;
3026     lambda_vec_t *foreign_lambda;
3027     int           type;
3028     int           nbins[2];
3029
3030     /* check the block types etc. */
3031     if ( (blk->nsub < 2) ||
3032          (blk->sub[0].type != xdr_datatype_double) ||
3033          (blk->sub[1].type != xdr_datatype_int64) ||
3034          (blk->sub[0].nr < 2)  ||
3035          (blk->sub[1].nr < 2) )
3036     {
3037         gmx_fatal(FARGS,
3038                   "Unexpected/corrupted block data in file %s around time %f",
3039                   filename, start_time);
3040     }
3041
3042     nhist = blk->nsub-2;
3043     if (nhist == 0)
3044     {
3045         return NULL;
3046     }
3047     if (nhist > 2)
3048     {
3049         gmx_fatal(FARGS,
3050                   "Unexpected/corrupted block data in file %s around time %f",
3051                   filename, start_time);
3052     }
3053
3054     snew(s, 1);
3055     *nsamples = 1;
3056
3057     snew(foreign_lambda, 1);
3058     lambda_vec_init(foreign_lambda, native_lambda->lc);
3059     lambda_vec_copy(foreign_lambda, native_lambda);
3060     type = (int)(blk->sub[1].lval[1]);
3061     if (type == dhbtDH)
3062     {
3063         double old_foreign_lambda;
3064
3065         old_foreign_lambda = blk->sub[0].dval[0];
3066         if (old_foreign_lambda >= 0)
3067         {
3068             foreign_lambda->val[0] = old_foreign_lambda;
3069             if (foreign_lambda->lc->N > 1)
3070             {
3071                 gmx_fatal(FARGS,
3072                           "Single-component lambda in multi-component file %s",
3073                           filename);
3074             }
3075         }
3076         else
3077         {
3078             for (i = 0; i < native_lambda->lc->N; i++)
3079             {
3080                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3081             }
3082         }
3083     }
3084     else
3085     {
3086         if (foreign_lambda->lc->N > 1)
3087         {
3088             if (blk->sub[1].nr < 3 + nhist)
3089             {
3090                 gmx_fatal(FARGS,
3091                           "Missing derivative coord in multi-component file %s",
3092                           filename);
3093             }
3094             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3095         }
3096         else
3097         {
3098             foreign_lambda->dhdl = 0;
3099         }
3100     }
3101
3102     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3103                  filename);
3104     snew(s->hist, 1);
3105
3106     for (i = 0; i < nhist; i++)
3107     {
3108         nbins[i] = blk->sub[i+2].nr;
3109     }
3110
3111     hist_init(s->hist, nhist, nbins);
3112
3113     for (i = 0; i < nhist; i++)
3114     {
3115         s->hist->x0[i] = blk->sub[1].lval[2+i];
3116         s->hist->dx[i] = blk->sub[0].dval[1];
3117         if (i == 1)
3118         {
3119             s->hist->dx[i] = -s->hist->dx[i];
3120         }
3121     }
3122
3123     s->hist->start_time = start_time;
3124     s->hist->delta_time = delta_time;
3125     s->start_time       = start_time;
3126     s->delta_time       = delta_time;
3127
3128     for (i = 0; i < nhist; i++)
3129     {
3130         int             nbin;
3131         gmx_int64_t     sum = 0;
3132
3133         for (j = 0; j < s->hist->nbin[i]; j++)
3134         {
3135             int binv = (int)(blk->sub[i+2].ival[j]);
3136
3137             s->hist->bin[i][j] = binv;
3138             sum               += binv;
3139
3140         }
3141         if (i == 0)
3142         {
3143             s->ntot      = sum;
3144             s->hist->sum = sum;
3145         }
3146         else
3147         {
3148             if (s->ntot != sum)
3149             {
3150                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3151                           filename);
3152             }
3153         }
3154     }
3155
3156     if (start_time + s->hist->sum*delta_time > *last_t)
3157     {
3158         *last_t = start_time + s->hist->sum*delta_time;
3159     }
3160     return s;
3161 }
3162
3163
3164 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3165 {
3166     int            i, j;
3167     ener_file_t    fp;
3168     t_enxframe    *fr;
3169     int            nre;
3170     gmx_enxnm_t   *enm           = NULL;
3171     double         first_t       = -1;
3172     double         last_t        = -1;
3173     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3174     int           *nhists        = NULL; /* array to keep count & print at end */
3175     int           *npts          = NULL; /* array to keep count & print at end */
3176     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3177     lambda_vec_t  *native_lambda;
3178     double         end_time;             /* the end time of the last batch of samples */
3179     int            nsamples = 0;
3180     lambda_vec_t   start_lambda;
3181
3182     fp = open_enx(fn, "r");
3183     do_enxnms(fp, &nre, &enm);
3184     snew(fr, 1);
3185
3186     snew(native_lambda, 1);
3187     start_lambda.lc = NULL;
3188
3189     while (do_enx(fp, fr))
3190     {
3191         /* count the data blocks */
3192         int    nblocks_raw  = 0;
3193         int    nblocks_hist = 0;
3194         int    nlam         = 0;
3195         int    k;
3196         /* DHCOLL block information: */
3197         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3198         double rtemp      = 0;
3199
3200         /* count the blocks and handle collection information: */
3201         for (i = 0; i < fr->nblock; i++)
3202         {
3203             if (fr->block[i].id == enxDHHIST)
3204             {
3205                 nblocks_hist++;
3206             }
3207             if (fr->block[i].id == enxDH)
3208             {
3209                 nblocks_raw++;
3210             }
3211             if (fr->block[i].id == enxDHCOLL)
3212             {
3213                 nlam++;
3214                 if ( (fr->block[i].nsub < 1) ||
3215                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3216                      (fr->block[i].sub[0].nr < 5))
3217                 {
3218                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3219                 }
3220
3221                 /* read the data from the DHCOLL block */
3222                 rtemp            =        fr->block[i].sub[0].dval[0];
3223                 start_time       =   fr->block[i].sub[0].dval[1];
3224                 delta_time       =   fr->block[i].sub[0].dval[2];
3225                 old_start_lambda = fr->block[i].sub[0].dval[3];
3226                 delta_lambda     = fr->block[i].sub[0].dval[4];
3227
3228                 if (delta_lambda > 0)
3229                 {
3230                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3231                 }
3232                 if ( ( *temp != rtemp) && (*temp > 0) )
3233                 {
3234                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3235                 }
3236                 *temp = rtemp;
3237
3238                 if (old_start_lambda >= 0)
3239                 {
3240                     if (sd->lc.N > 0)
3241                     {
3242                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3243                         {
3244                             gmx_fatal(FARGS,
3245                                       "lambda vector components in %s don't match those previously read",
3246                                       fn);
3247                         }
3248                     }
3249                     else
3250                     {
3251                         lambda_components_add(&(sd->lc), "", 0);
3252                     }
3253                     if (!start_lambda.lc)
3254                     {
3255                         lambda_vec_init(&start_lambda, &(sd->lc));
3256                     }
3257                     start_lambda.val[0] = old_start_lambda;
3258                 }
3259                 else
3260                 {
3261                     /* read lambda vector */
3262                     int      n_lambda_vec;
3263                     gmx_bool check = (sd->lc.N > 0);
3264                     if (fr->block[i].nsub < 2)
3265                     {
3266                         gmx_fatal(FARGS,
3267                                   "No lambda vector, but start_lambda=%f\n",
3268                                   old_start_lambda);
3269                     }
3270                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3271                     for (j = 0; j < n_lambda_vec; j++)
3272                     {
3273                         const char *name =
3274                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3275                         if (check)
3276                         {
3277                             /* check the components */
3278                             lambda_components_check(&(sd->lc), j, name,
3279                                                     strlen(name));
3280                         }
3281                         else
3282                         {
3283                             lambda_components_add(&(sd->lc), name,
3284                                                   strlen(name));
3285                         }
3286                     }
3287                     lambda_vec_init(&start_lambda, &(sd->lc));
3288                     start_lambda.index = fr->block[i].sub[1].ival[0];
3289                     for (j = 0; j < n_lambda_vec; j++)
3290                     {
3291                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3292                     }
3293                 }
3294                 if (first_t < 0)
3295                 {
3296                     first_t = start_time;
3297                 }
3298             }
3299         }
3300
3301         if (nlam != 1)
3302         {
3303             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3304         }
3305         if (nblocks_raw > 0 && nblocks_hist > 0)
3306         {
3307             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3308         }
3309
3310         if (nsamples > 0)
3311         {
3312             /* check the native lambda */
3313             if (!lambda_vec_same(&start_lambda, native_lambda) )
3314             {
3315                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3316                           fn, native_lambda, start_lambda, start_time);
3317             }
3318             /* check the number of samples against the previous number */
3319             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3320             {
3321                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3322                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3323             }
3324             /* check whether last iterations's end time matches with
3325                the currrent start time */
3326             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3327             {
3328                 /* it didn't. We need to store our samples and reallocate */
3329                 for (i = 0; i < nsamples; i++)
3330                 {
3331                     if (samples_rawdh[i])
3332                     {
3333                         /* insert it into the existing list */
3334                         lambda_data_list_insert_sample(sd->lb,
3335                                                        samples_rawdh[i]);
3336                         /* and make sure we'll allocate a new one this time
3337                            around */
3338                         samples_rawdh[i] = NULL;
3339                     }
3340                 }
3341             }
3342         }
3343         else
3344         {
3345             /* this is the first round; allocate the associated data
3346                structures */
3347             /*native_lambda=start_lambda;*/
3348             lambda_vec_init(native_lambda, &(sd->lc));
3349             lambda_vec_copy(native_lambda, &start_lambda);
3350             nsamples = nblocks_raw+nblocks_hist;
3351             snew(nhists, nsamples);
3352             snew(npts, nsamples);
3353             snew(lambdas, nsamples);
3354             snew(samples_rawdh, nsamples);
3355             for (i = 0; i < nsamples; i++)
3356             {
3357                 nhists[i]        = 0;
3358                 npts[i]          = 0;
3359                 lambdas[i]       = NULL;
3360                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3361                                             ones contain values */
3362             }
3363         }
3364
3365         /* and read them */
3366         k = 0; /* counter for the lambdas, etc. arrays */
3367         for (i = 0; i < fr->nblock; i++)
3368         {
3369             if (fr->block[i].id == enxDH)
3370             {
3371                 int type = (fr->block[i].sub[0].ival[0]);
3372                 if (type == dhbtDH || type == dhbtDHDL)
3373                 {
3374                     int ndu;
3375                     read_edr_rawdh_block(&(samples_rawdh[k]),
3376                                          &ndu,
3377                                          &(fr->block[i]),
3378                                          start_time, delta_time,
3379                                          native_lambda, rtemp,
3380                                          &last_t, fn);
3381                     npts[k] += ndu;
3382                     if (samples_rawdh[k])
3383                     {
3384                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3385                     }
3386                     k++;
3387                 }
3388             }
3389             else if (fr->block[i].id == enxDHHIST)
3390             {
3391                 int type = (int)(fr->block[i].sub[1].lval[1]);
3392                 if (type == dhbtDH || type == dhbtDHDL)
3393                 {
3394                     int        j;
3395                     int        nb = 0;
3396                     samples_t *s; /* this is where the data will go */
3397                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3398                                             start_time, delta_time,
3399                                             native_lambda, rtemp,
3400                                             &last_t, fn);
3401                     nhists[k] += nb;
3402                     if (nb > 0)
3403                     {
3404                         lambdas[k] = s->foreign_lambda;
3405                     }
3406                     k++;
3407                     /* and insert the new sample immediately */
3408                     for (j = 0; j < nb; j++)
3409                     {
3410                         lambda_data_list_insert_sample(sd->lb, s+j);
3411                     }
3412                 }
3413             }
3414         }
3415     }
3416     /* Now store all our extant sample collections */
3417     for (i = 0; i < nsamples; i++)
3418     {
3419         if (samples_rawdh[i])
3420         {
3421             /* insert it into the existing list */
3422             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3423         }
3424     }
3425
3426
3427     {
3428         char buf[STRLEN];
3429         printf("\n");
3430         lambda_vec_print(native_lambda, buf, FALSE);
3431         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3432                fn, first_t, last_t, buf);
3433         for (i = 0; i < nsamples; i++)
3434         {
3435             if (lambdas[i])
3436             {
3437                 lambda_vec_print(lambdas[i], buf, TRUE);
3438                 if (nhists[i] > 0)
3439                 {
3440                     printf("        %s (%d hists)\n", buf, nhists[i]);
3441                 }
3442                 else
3443                 {
3444                     printf("        %s (%d pts)\n", buf, npts[i]);
3445                 }
3446             }
3447         }
3448     }
3449     printf("\n\n");
3450     sfree(npts);
3451     sfree(nhists);
3452     sfree(lambdas);
3453 }
3454
3455
3456 int gmx_bar(int argc, char *argv[])
3457 {
3458     static const char *desc[] = {
3459         "[THISMODULE] calculates free energy difference estimates through ",
3460         "Bennett's acceptance ratio method (BAR). It also automatically",
3461         "adds series of individual free energies obtained with BAR into",
3462         "a combined free energy estimate.[PAR]",
3463
3464         "Every individual BAR free energy difference relies on two ",
3465         "simulations at different states: say state A and state B, as",
3466         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3467         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3468         "average of the Hamiltonian difference of state B given state A and",
3469         "vice versa.",
3470         "The energy differences to the other state must be calculated",
3471         "explicitly during the simulation. This can be done with",
3472         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3473
3474         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3475         "Two types of input files are supported:[BR]",
3476         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3477         "The files should have columns ",
3478         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3479         "The [GRK]lambda[grk] values are inferred ",
3480         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3481         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3482         "legends of Delta H",
3483         "[BR]",
3484         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3485         "[TT]-extp[tt] option for these files, it is assumed",
3486         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3487         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3488         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3489         "subtitle (if present), otherwise from a number in the subdirectory ",
3490         "in the file name.[PAR]",
3491
3492         "The [GRK]lambda[grk] of the simulation is parsed from ",
3493         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3494         "foreign [GRK]lambda[grk] values from the legend containing the ",
3495         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3496         "the legend line containing 'T ='.[PAR]",
3497
3498         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3499         "These can contain either lists of energy differences (see the ",
3500         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3501         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3502         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3503         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3504
3505         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3506         "the energy difference can also be extrapolated from the ",
3507         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3508         "option, which assumes that the system's Hamiltonian depends linearly",
3509         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3510
3511         "The free energy estimates are determined using BAR with bisection, ",
3512         "with the precision of the output set with [TT]-prec[tt]. ",
3513         "An error estimate taking into account time correlations ",
3514         "is made by splitting the data into blocks and determining ",
3515         "the free energy differences over those blocks and assuming ",
3516         "the blocks are independent. ",
3517         "The final error estimate is determined from the average variance ",
3518         "over 5 blocks. A range of block numbers for error estimation can ",
3519         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3520
3521         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3522         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3523         "samples. [BB]Note[bb] that when aggregating energy ",
3524         "differences/derivatives with different sampling intervals, this is ",
3525         "almost certainly not correct. Usually subsequent energies are ",
3526         "correlated and different time intervals mean different degrees ",
3527         "of correlation between samples.[PAR]",
3528
3529         "The results are split in two parts: the last part contains the final ",
3530         "results in kJ/mol, together with the error estimate for each part ",
3531         "and the total. The first part contains detailed free energy ",
3532         "difference estimates and phase space overlap measures in units of ",
3533         "kT (together with their computed error estimate). The printed ",
3534         "values are:[BR]",
3535         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3536         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3537         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3538         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3539         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3540         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3541
3542         "The relative entropy of both states in each other's ensemble can be ",
3543         "interpreted as a measure of phase space overlap: ",
3544         "the relative entropy s_A of the work samples of lambda_B in the ",
3545         "ensemble of lambda_A (and vice versa for s_B), is a ",
3546         "measure of the 'distance' between Boltzmann distributions of ",
3547         "the two states, that goes to zero for identical distributions. See ",
3548         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3549         "[PAR]",
3550         "The estimate of the expected per-sample standard deviation, as given ",
3551         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3552         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3553         "of the actual statistical error, because it assumes independent samples).[PAR]",
3554
3555         "To get a visual estimate of the phase space overlap, use the ",
3556         "[TT]-oh[tt] option to write series of histograms, together with the ",
3557         "[TT]-nbin[tt] option.[PAR]"
3558     };
3559     static real        begin    = 0, end = -1, temp = -1;
3560     int                nd       = 2, nbmin = 5, nbmax = 5;
3561     int                nbin     = 100;
3562     gmx_bool           use_dhdl = FALSE;
3563     gmx_bool           calc_s, calc_v;
3564     t_pargs            pa[] = {
3565         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3566         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3567         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3568         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3569         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3570         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3571         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3572         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3573     };
3574
3575     t_filenm           fnm[] = {
3576         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3577         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3578         { efXVG, "-o",  "bar",    ffOPTWR },
3579         { efXVG, "-oi", "barint", ffOPTWR },
3580         { efXVG, "-oh", "histogram", ffOPTWR }
3581     };
3582 #define NFILE asize(fnm)
3583
3584     int          f, i, j;
3585     int          nf = 0;    /* file counter */
3586     int          nbs;
3587     int          nfile_tot; /* total number of input files */
3588     int          nxvgfile = 0;
3589     int          nedrfile = 0;
3590     char       **fxvgnms;
3591     char       **fedrnms;
3592     sim_data_t   sim_data; /* the simulation data */
3593     barres_t    *results;  /* the results */
3594     int          nresults; /* number of results in results array */
3595
3596     double      *partsum;
3597     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3598     FILE        *fpb, *fpi;
3599     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3600     char         buf[STRLEN], buf2[STRLEN];
3601     char         ktformat[STRLEN], sktformat[STRLEN];
3602     char         kteformat[STRLEN], skteformat[STRLEN];
3603     output_env_t oenv;
3604     double       kT, beta;
3605     gmx_bool     result_OK = TRUE, bEE = TRUE;
3606
3607     gmx_bool     disc_err          = FALSE;
3608     double       sum_disc_err      = 0.; /* discretization error */
3609     gmx_bool     histrange_err     = FALSE;
3610     double       sum_histrange_err = 0.; /* histogram range error */
3611     double       stat_err          = 0.; /* statistical error */
3612
3613     if (!parse_common_args(&argc, argv,
3614                            PCA_CAN_VIEW,
3615                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3616     {
3617         return 0;
3618     }
3619
3620     if (opt2bSet("-f", NFILE, fnm))
3621     {
3622         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3623     }
3624     if (opt2bSet("-g", NFILE, fnm))
3625     {
3626         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3627     }
3628
3629     sim_data_init(&sim_data);
3630 #if 0
3631     /* make linked list */
3632     lb = &lambda_head;
3633     lambda_data_init(lb, 0, 0);
3634     lb->next = lb;
3635     lb->prev = lb;
3636 #endif
3637
3638
3639     nfile_tot = nxvgfile + nedrfile;
3640
3641     if (nfile_tot == 0)
3642     {
3643         gmx_fatal(FARGS, "No input files!");
3644     }
3645
3646     if (nd < 0)
3647     {
3648         gmx_fatal(FARGS, "Can not have negative number of digits");
3649     }
3650     prec = pow(10, -nd);
3651
3652     snew(partsum, (nbmax+1)*(nbmax+1));
3653     nf = 0;
3654
3655     /* read in all files. First xvg files */
3656     for (f = 0; f < nxvgfile; f++)
3657     {
3658         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3659         nf++;
3660     }
3661     /* then .edr files */
3662     for (f = 0; f < nedrfile; f++)
3663     {
3664         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3665         nf++;
3666     }
3667
3668     /* fix the times to allow for equilibration */
3669     sim_data_impose_times(&sim_data, begin, end);
3670
3671     if (opt2bSet("-oh", NFILE, fnm))
3672     {
3673         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3674     }
3675
3676     /* assemble the output structures from the lambdas */
3677     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3678
3679     sum_disc_err = barres_list_max_disc_err(results, nresults);
3680
3681     if (nresults == 0)
3682     {
3683         printf("\nNo results to calculate.\n");
3684         return 0;
3685     }
3686
3687     if (sum_disc_err > prec)
3688     {
3689         prec = sum_disc_err;
3690         nd   = ceil(-log10(prec));
3691         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3692     }
3693
3694
3695     /*sprintf(lamformat,"%%6.3f");*/
3696     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3697     /* the format strings of the results in kT */
3698     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3699     sprintf( sktformat, "%%%ds", 6+nd);
3700     /* the format strings of the errors in kT */
3701     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3702     sprintf( skteformat, "%%%ds", 4+nd);
3703     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3704     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3705
3706
3707
3708     fpb = NULL;
3709     if (opt2bSet("-o", NFILE, fnm))
3710     {
3711         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3712         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3713                             "\\lambda", buf, exvggtXYDY, oenv);
3714     }
3715
3716     fpi = NULL;
3717     if (opt2bSet("-oi", NFILE, fnm))
3718     {
3719         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3720         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3721                        "\\lambda", buf, oenv);
3722     }
3723
3724
3725
3726     if (nbmin > nbmax)
3727     {
3728         nbmin = nbmax;
3729     }
3730
3731     /* first calculate results */
3732     bEE      = TRUE;
3733     disc_err = FALSE;
3734     for (f = 0; f < nresults; f++)
3735     {
3736         /* Determine the free energy difference with a factor of 10
3737          * more accuracy than requested for printing.
3738          */
3739         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3740                  &bEE, partsum);
3741
3742         if (results[f].dg_disc_err > prec/10.)
3743         {
3744             disc_err = TRUE;
3745         }
3746         if (results[f].dg_histrange_err > prec/10.)
3747         {
3748             histrange_err = TRUE;
3749         }
3750     }
3751
3752     /* print results in kT */
3753     kT   = BOLTZ*temp;
3754     beta = 1/kT;
3755
3756     printf("\nTemperature: %g K\n", temp);
3757
3758     printf("\nDetailed results in kT (see help for explanation):\n\n");
3759     printf("%6s ", " lam_A");
3760     printf("%6s ", " lam_B");
3761     printf(sktformat,  "DG ");
3762     if (bEE)
3763     {
3764         printf(skteformat, "+/- ");
3765     }
3766     if (disc_err)
3767     {
3768         printf(skteformat, "disc ");
3769     }
3770     if (histrange_err)
3771     {
3772         printf(skteformat, "range ");
3773     }
3774     printf(sktformat,  "s_A ");
3775     if (bEE)
3776     {
3777         printf(skteformat, "+/- " );
3778     }
3779     printf(sktformat,  "s_B ");
3780     if (bEE)
3781     {
3782         printf(skteformat, "+/- " );
3783     }
3784     printf(sktformat,  "stdev ");
3785     if (bEE)
3786     {
3787         printf(skteformat, "+/- ");
3788     }
3789     printf("\n");
3790     for (f = 0; f < nresults; f++)
3791     {
3792         lambda_vec_print_short(results[f].a->native_lambda, buf);
3793         printf("%s ", buf);
3794         lambda_vec_print_short(results[f].b->native_lambda, buf);
3795         printf("%s ", buf);
3796         printf(ktformat,  results[f].dg);
3797         printf(" ");
3798         if (bEE)
3799         {
3800             printf(kteformat, results[f].dg_err);
3801             printf(" ");
3802         }
3803         if (disc_err)
3804         {
3805             printf(kteformat, results[f].dg_disc_err);
3806             printf(" ");
3807         }
3808         if (histrange_err)
3809         {
3810             printf(kteformat, results[f].dg_histrange_err);
3811             printf(" ");
3812         }
3813         printf(ktformat,  results[f].sa);
3814         printf(" ");
3815         if (bEE)
3816         {
3817             printf(kteformat, results[f].sa_err);
3818             printf(" ");
3819         }
3820         printf(ktformat,  results[f].sb);
3821         printf(" ");
3822         if (bEE)
3823         {
3824             printf(kteformat, results[f].sb_err);
3825             printf(" ");
3826         }
3827         printf(ktformat,  results[f].dg_stddev);
3828         printf(" ");
3829         if (bEE)
3830         {
3831             printf(kteformat, results[f].dg_stddev_err);
3832         }
3833         printf("\n");
3834
3835         /* Check for negative relative entropy with a 95% certainty. */
3836         if (results[f].sa < -2*results[f].sa_err ||
3837             results[f].sb < -2*results[f].sb_err)
3838         {
3839             result_OK = FALSE;
3840         }
3841     }
3842
3843     if (!result_OK)
3844     {
3845         printf("\nWARNING: Some of these results violate the Second Law of "
3846                "Thermodynamics: \n"
3847                "         This is can be the result of severe undersampling, or "
3848                "(more likely)\n"
3849                "         there is something wrong with the simulations.\n");
3850     }
3851
3852
3853     /* final results in kJ/mol */
3854     printf("\n\nFinal results in kJ/mol:\n\n");
3855     dg_tot  = 0;
3856     for (f = 0; f < nresults; f++)
3857     {
3858
3859         if (fpi != NULL)
3860         {
3861             lambda_vec_print_short(results[f].a->native_lambda, buf);
3862             fprintf(fpi, xvg2format, buf, dg_tot);
3863         }
3864
3865
3866         if (fpb != NULL)
3867         {
3868             lambda_vec_print_intermediate(results[f].a->native_lambda,
3869                                           results[f].b->native_lambda,
3870                                           buf);
3871
3872             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3873         }
3874
3875         printf("point ");
3876         lambda_vec_print_short(results[f].a->native_lambda, buf);
3877         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3878         printf("%s - %s", buf, buf2);
3879         printf(",   DG ");
3880
3881         printf(dgformat, results[f].dg*kT);
3882         if (bEE)
3883         {
3884             printf(" +/- ");
3885             printf(dgformat, results[f].dg_err*kT);
3886         }
3887         if (histrange_err)
3888         {
3889             printf(" (max. range err. = ");
3890             printf(dgformat, results[f].dg_histrange_err*kT);
3891             printf(")");
3892             sum_histrange_err += results[f].dg_histrange_err*kT;
3893         }
3894
3895         printf("\n");
3896         dg_tot += results[f].dg;
3897     }
3898     printf("\n");
3899     printf("total ");
3900     lambda_vec_print_short(results[0].a->native_lambda, buf);
3901     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3902     printf("%s - %s", buf, buf2);
3903     printf(",   DG ");
3904
3905     printf(dgformat, dg_tot*kT);
3906     if (bEE)
3907     {
3908         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3909         printf(" +/- ");
3910         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3911     }
3912     printf("\n");
3913     if (disc_err)
3914     {
3915         printf("\nmaximum discretization error = ");
3916         printf(dgformat, sum_disc_err);
3917         if (bEE && stat_err < sum_disc_err)
3918         {
3919             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3920         }
3921     }
3922     if (histrange_err)
3923     {
3924         printf("\nmaximum histogram range error = ");
3925         printf(dgformat, sum_histrange_err);
3926         if (bEE && stat_err < sum_histrange_err)
3927         {
3928             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3929         }
3930
3931     }
3932     printf("\n");
3933
3934
3935     if (fpi != NULL)
3936     {
3937         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3938         fprintf(fpi, xvg2format, buf, dg_tot);
3939         gmx_ffclose(fpi);
3940     }
3941
3942     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3943     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3944
3945     return 0;
3946 }