src/tools/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41 #include <math.h>
  42 #include <string.h>
  43 #include <ctype.h>
  44 #include <math.h>
  45 #include <float.h>
  46
  47 #include "sysstuff.h"
  48 #include "typedefs.h"
  49 #include "smalloc.h"
  50 #include "futil.h"
  51 #include "statutil.h"
  52 #include "copyrite.h"
  53 #include "macros.h"
  54 #include "enxio.h"
  55 #include "physics.h"
  56 #include "gmx_fatal.h"
  57 #include "xvgr.h"
  58 #include "gmx_ana.h"
  59 #include "maths.h"
  60 #include "names.h"
  61 #include "mdebin.h"
  62
  63 /* Suppress Cygwin compiler warnings from using newlib version of
  64  * ctype.h */
  65 #ifdef GMX_CYGWIN
  66 #undef isdigit
  67 #endif
  68
  69
  70 /* Structure for the names of lambda vector components */
  71 typedef struct lambda_components_t
  72 {
  73     char **names;  /* Array of strings with names for the lambda vector
  74                       components */
  75     int    N;      /* The number of components */
  76     int    Nalloc; /* The number of allocated components */
  77 } lambda_components_t;
  78
  79 /* Structure for a lambda vector or a dhdl derivative direction */
  80 typedef struct lambda_vec_t
  81 {
  82     double                    *val;   /* The lambda vector component values. Only valid if
  83                                          dhdl == -1 */
  84     int                        dhdl;  /* The coordinate index for the derivative described by this
  85                                          structure, or -1 */
  86     const lambda_components_t *lc;    /* the associated lambda_components
  87                                          structure */
  88     int                        index; /* The state number (init-lambda-state) of this lambda
  89                                          vector, if known. If not, it is set to -1 */
  90 } lambda_vec_t;
  91
  92 /* the dhdl.xvg data from a simulation */
  93 typedef struct xvg_t
  94 {
  95     const char   *filename;
  96     int           ftp;           /* file type */
  97     int           nset;          /* number of lambdas, including dhdl */
  98     int          *np;            /* number of data points (du or hists) per lambda */
  99     int           np_alloc;      /* number of points (du or hists) allocated */
 100     double        temp;          /* temperature */
 101     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
 102     double       *t;             /* the times (of second index for y) */
 103     double      **y;             /* the dU values. y[0] holds the derivative, while
 104                                     further ones contain the energy differences between
 105                                     the native lambda and the 'foreign' lambdas. */
 106     lambda_vec_t  native_lambda; /* the native lambda */
 107
 108     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 109 } xvg_t;
 110
 111
 112 typedef struct hist_t
 113 {
 114     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 115     double          dx[2];                  /* the histogram spacing. The reverse
 116                                                dx is the negative of the forward dx.*/
 117     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 118                                                point(s) as int */
 119
 120     int             nbin[2];                /* the (forward+reverse) number of bins */
 121     gmx_large_int_t sum;                    /* the total number of counts. Must be
 122                                                the same for forward + reverse.  */
 123     int             nhist;                  /* number of hist datas (forward or reverse) */
 124
 125     double          start_time, delta_time; /* start time, end time of histogram */
 126 } hist_t;
 127
 128
 129 /* an aggregate of samples for partial free energy calculation */
 130 typedef struct samples_t
 131 {
 132     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 133     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 134     double        temp;           /* the temperature */
 135     gmx_bool      derivative;     /* whether this sample is a derivative */
 136
 137     /* The samples come either as either delta U lists: */
 138     int     ndu;                    /* the number of delta U samples */
 139     double *du;                     /* the delta u's */
 140     double *t;                      /* the times associated with those samples, or: */
 141     double  start_time, delta_time; /*start time and delta time for linear time*/
 142
 143     /* or as histograms: */
 144     hist_t *hist; /* a histogram */
 145
 146     /* allocation data: (not NULL for data 'owned' by this struct) */
 147     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 148     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 149     hist_t         *hist_alloc;          /* allocated hist */
 150
 151     gmx_large_int_t ntot;                /* total number of samples */
 152     const char     *filename;            /* the file name this sample comes from */
 153 } samples_t;
 154
 155 /* a sample range (start to end for du-style data, or boolean
 156     for both du-style data and histograms */
 157 typedef struct sample_range_t
 158 {
 159     int        start, end; /* start and end index for du style data */
 160     gmx_bool   use;        /* whether to use this sample */
 161
 162     samples_t *s;          /* the samples this range belongs to */
 163 } sample_range_t;
 164
 165
 166 /* a collection of samples for a partial free energy calculation
 167     (i.e. the collection of samples from one native lambda to one
 168     foreign lambda) */
 169 typedef struct sample_coll_t
 170 {
 171     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 172                                           in the histogram */
 173     lambda_vec_t   *foreign_lambda;    /* collection */
 174     double          temp;              /* the temperature */
 175
 176     int             nsamples;          /* the number of samples */
 177     samples_t     **s;                 /* the samples themselves */
 178     sample_range_t *r;                 /* the sample ranges */
 179     int             nsamples_alloc;    /* number of allocated samples */
 180
 181     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 182                                           this collection */
 183
 184     struct sample_coll_t *next, *prev; /* next and previous in the list */
 185 } sample_coll_t;
 186
 187 /* all the samples associated with a lambda point */
 188 typedef struct lambda_data_t
 189 {
 190     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 191     double                temp;        /* temperature */
 192
 193     sample_coll_t        *sc;          /* the samples */
 194
 195     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 196
 197     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 198 } lambda_data_t;
 199
 200 /* Top-level data structure of simulation data */
 201 typedef struct sim_data_t
 202 {
 203     lambda_data_t      *lb;      /* a lambda data linked list */
 204     lambda_data_t       lb_head; /* The head element of the linked list */
 205
 206     lambda_components_t lc;      /* the allowed components of the lambda
 207                                     vectors */
 208 } sim_data_t;
 209
 210 /* Top-level data structure with calculated values. */
 211 typedef struct {
 212     sample_coll_t *a, *b;            /* the simulation data */
 213
 214     double         dg;               /* the free energy difference */
 215     double         dg_err;           /* the free energy difference */
 216
 217     double         dg_disc_err;      /* discretization error */
 218     double         dg_histrange_err; /* histogram range error */
 219
 220     double         sa;               /* relative entropy of b in state a */
 221     double         sa_err;           /* error in sa */
 222     double         sb;               /* relative entropy of a in state b */
 223     double         sb_err;           /* error in sb */
 224
 225     double         dg_stddev;        /* expected dg stddev per sample */
 226     double         dg_stddev_err;    /* error in dg_stddev */
 227 } barres_t;
 228
 229
 230 /* Initialize a lambda_components structure */
 231 static void lambda_components_init(lambda_components_t *lc)
 232 {
 233     lc->N      = 0;
 234     lc->Nalloc = 2;
 235     snew(lc->names, lc->Nalloc);
 236 }
 237
 238 /* Add a component to a lambda_components structure */
 239 static void lambda_components_add(lambda_components_t *lc,
 240                                   const char *name, size_t name_length)
 241 {
 242     while (lc->N + 1 > lc->Nalloc)
 243     {
 244         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 245         srealloc( lc->names, lc->Nalloc );
 246     }
 247     snew(lc->names[lc->N], name_length+1);
 248     strncpy(lc->names[lc->N], name, name_length);
 249     lc->N++;
 250 }
 251
 252 /* check whether a component with index 'index' matches the given name, or
 253    is also NULL. Returns TRUE if this is the case.
 254    the string name does not need to end */
 255 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 256                                         int                        index,
 257                                         const char                *name,
 258                                         size_t                     name_length)
 259 {
 260     size_t len;
 261     if (index >= lc->N)
 262     {
 263         return FALSE;
 264     }
 265     if (name == NULL && lc->names[index] == NULL)
 266     {
 267         return TRUE;
 268     }
 269     if ((name == NULL) != (lc->names[index] == NULL))
 270     {
 271         return FALSE;
 272     }
 273     len = strlen(lc->names[index]);
 274     if (len != name_length)
 275     {
 276         return FALSE;
 277     }
 278     if (strncmp(lc->names[index], name, name_length) == 0)
 279     {
 280         return TRUE;
 281     }
 282     return FALSE;
 283 }
 284
 285 /* Find the index of a given lambda component name, or -1 if not found */
 286 static int lambda_components_find(const lambda_components_t *lc,
 287                                   const char                *name,
 288                                   size_t                     name_length)
 289 {
 290     int i;
 291
 292     for (i = 0; i < lc->N; i++)
 293     {
 294         if (strncmp(lc->names[i], name, name_length) == 0)
 295         {
 296             return i;
 297         }
 298     }
 299     return -1;
 300 }
 301
 302
 303
 304 /* initialize a lambda vector */
 305 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 306 {
 307     snew(lv->val, lc->N);
 308     lv->index = -1;
 309     lv->dhdl  = -1;
 310     lv->lc    = lc;
 311 }
 312
 313 static void lambda_vec_destroy(lambda_vec_t *lv)
 314 {
 315     sfree(lv->val);
 316 }
 317
 318 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 319 {
 320     int i;
 321
 322     lambda_vec_init(lv, orig->lc);
 323     lv->dhdl  = orig->dhdl;
 324     lv->index = orig->index;
 325     for (i = 0; i < lv->lc->N; i++)
 326     {
 327         lv->val[i] = orig->val[i];
 328     }
 329 }
 330
 331 /* write a lambda vec to a preallocated string */
 332 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 333 {
 334     int    i;
 335     size_t np;
 336
 337     str[0] = 0; /* reset the string */
 338     if (lv->dhdl < 0)
 339     {
 340         if (named)
 341         {
 342             str += sprintf(str, "delta H to ");
 343         }
 344         if (lv->lc->N > 1)
 345         {
 346             str += sprintf(str, "(");
 347         }
 348         for (i = 0; i < lv->lc->N; i++)
 349         {
 350             str += sprintf(str, "%g", lv->val[i]);
 351             if (i < lv->lc->N-1)
 352             {
 353                 str += sprintf(str, ", ");
 354             }
 355         }
 356         if (lv->lc->N > 1)
 357         {
 358             str += sprintf(str, ")");
 359         }
 360     }
 361     else
 362     {
 363         /* this lambda vector describes a derivative */
 364         str += sprintf(str, "dH/dl");
 365         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 366         {
 367             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 368         }
 369     }
 370 }
 371
 372 /* write a shortened version of the lambda vec to a preallocated string */
 373 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 374 {
 375     int    i;
 376     size_t np;
 377
 378     if (lv->index >= 0)
 379     {
 380         sprintf(str, "%6d", lv->index);
 381     }
 382     else
 383     {
 384         if (lv->dhdl < 0)
 385         {
 386             sprintf(str, "%6.3f", lv->val[0]);
 387         }
 388         else
 389         {
 390             sprintf(str, "dH/dl[%d]", lv->dhdl);
 391         }
 392     }
 393 }
 394
 395 /* write an intermediate version of two lambda vecs to a preallocated string */
 396 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 397                                           const lambda_vec_t *b, char *str)
 398 {
 399     int    i;
 400     size_t np;
 401
 402     str[0] = 0;
 403     if ( (a->index >= 0) && (b->index >= 0) )
 404     {
 405         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 406     }
 407     else
 408     {
 409         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 410         {
 411             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 412         }
 413     }
 414 }
 415
 416
 417
 418 /* calculate the difference in lambda vectors: c = a-b.
 419    c must be initialized already, and a and b must describe non-derivative
 420    lambda points */
 421 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 422                             lambda_vec_t *c)
 423 {
 424     int i;
 425
 426     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 427     {
 428         gmx_fatal(FARGS,
 429                   "Trying to calculate the difference between derivatives instead of lambda points");
 430     }
 431     if ((a->lc != b->lc) || (a->lc != c->lc) )
 432     {
 433         gmx_fatal(FARGS,
 434                   "Trying to calculate the difference lambdas with differing basis set");
 435     }
 436     for (i = 0; i < a->lc->N; i++)
 437     {
 438         c->val[i] = a->val[i] - b->val[i];
 439     }
 440 }
 441
 442 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 443    a and b must describe non-derivative lambda points */
 444 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 445 {
 446     int    i;
 447     double ret = 0.;
 448
 449     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 450     {
 451         gmx_fatal(FARGS,
 452                   "Trying to calculate the difference between derivatives instead of lambda points");
 453     }
 454     if (a->lc != b->lc)
 455     {
 456         gmx_fatal(FARGS,
 457                   "Trying to calculate the difference lambdas with differing basis set");
 458     }
 459     for (i = 0; i < a->lc->N; i++)
 460     {
 461         double df = a->val[i] - b->val[i];
 462         ret += df*df;
 463     }
 464     return sqrt(ret);
 465 }
 466
 467
 468 /* check whether two lambda vectors are the same */
 469 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 470 {
 471     int i;
 472
 473     if (a->lc != b->lc)
 474     {
 475         return FALSE;
 476     }
 477     if (a->dhdl < 0)
 478     {
 479         for (i = 0; i < a->lc->N; i++)
 480         {
 481             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 482             {
 483                 return FALSE;
 484             }
 485         }
 486         return TRUE;
 487     }
 488     else
 489     {
 490         /* they're derivatives, so we check whether the indices match */
 491         return (a->dhdl == b->dhdl);
 492     }
 493 }
 494
 495 /* Compare the sort order of two foreign lambda vectors
 496
 497     returns 1 if a is 'bigger' than b,
 498     returns 0 if they're the same,
 499     returns -1 if a is 'smaller' than b.*/
 500 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 501                                        const lambda_vec_t *b)
 502 {
 503     int      i;
 504     double   norm_a    = 0, norm_b = 0;
 505     gmx_bool different = FALSE;
 506
 507     if (a->lc != b->lc)
 508     {
 509         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 510     }
 511     /* if either one has an index we sort based on that */
 512     if ((a->index >= 0) || (b->index >= 0))
 513     {
 514         if (a->index == b->index)
 515         {
 516             return 0;
 517         }
 518         return (a->index > b->index) ? 1 : -1;
 519     }
 520     if (a->dhdl >= 0 || b->dhdl >= 0)
 521     {
 522         /* lambda vectors that are derivatives always sort higher than those
 523            without derivatives */
 524         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 525         {
 526             return (a->dhdl >= 0) ? 1 : -1;
 527         }
 528         return a->dhdl > b->dhdl;
 529     }
 530
 531     /* neither has an index, so we can only sort on the lambda components,
 532        which is only valid if there is one component */
 533     for (i = 0; i < a->lc->N; i++)
 534     {
 535         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 536         {
 537             different = TRUE;
 538         }
 539         norm_a += a->val[i]*a->val[i];
 540         norm_b += b->val[i]*b->val[i];
 541     }
 542     if (!different)
 543     {
 544         return 0;
 545     }
 546     return norm_a > norm_b;
 547 }
 548
 549 /* Compare the sort order of two native lambda vectors
 550
 551     returns 1 if a is 'bigger' than b,
 552     returns 0 if they're the same,
 553     returns -1 if a is 'smaller' than b.*/
 554 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 555                                       const lambda_vec_t *b)
 556 {
 557     int i;
 558
 559     if (a->lc != b->lc)
 560     {
 561         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 562     }
 563     /* if either one has an index we sort based on that */
 564     if ((a->index >= 0) || (b->index >= 0))
 565     {
 566         if (a->index == b->index)
 567         {
 568             return 0;
 569         }
 570         return (a->index > b->index) ? 1 : -1;
 571     }
 572     /* neither has an index, so we can only sort on the lambda components,
 573        which is only valid if there is one component */
 574     if (a->lc->N > 1)
 575     {
 576         gmx_fatal(FARGS,
 577                   "Can't compare lambdas with no index and > 1 component");
 578     }
 579     if (a->dhdl >= 0 || b->dhdl >= 0)
 580     {
 581         gmx_fatal(FARGS,
 582                   "Can't compare native lambdas that are derivatives");
 583     }
 584     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 585     {
 586         return 0;
 587     }
 588     return a->val[0] > b->val[0] ? 1 : -1;
 589 }
 590
 591
 592
 593
 594 static void hist_init(hist_t *h, int nhist, int *nbin)
 595 {
 596     int i;
 597     if (nhist > 2)
 598     {
 599         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 600     }
 601     for (i = 0; i < nhist; i++)
 602     {
 603         snew(h->bin[i], nbin[i]);
 604         h->x0[i]      = 0;
 605         h->nbin[i]    = nbin[i];
 606         h->start_time = h->delta_time = 0;
 607         h->dx[i]      = 0;
 608     }
 609     h->sum   = 0;
 610     h->nhist = nhist;
 611 }
 612
 613 static void hist_destroy(hist_t *h)
 614 {
 615     sfree(h->bin);
 616 }
 617
 618
 619 static void xvg_init(xvg_t *ba)
 620 {
 621     ba->filename = NULL;
 622     ba->nset     = 0;
 623     ba->np_alloc = 0;
 624     ba->np       = NULL;
 625     ba->y        = NULL;
 626 }
 627
 628 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 629                          lambda_vec_t *foreign_lambda, double temp,
 630                          gmx_bool derivative, const char *filename)
 631 {
 632     s->native_lambda  = native_lambda;
 633     s->foreign_lambda = foreign_lambda;
 634     s->temp           = temp;
 635     s->derivative     = derivative;
 636
 637     s->ndu        = 0;
 638     s->du         = NULL;
 639     s->t          = NULL;
 640     s->start_time = s->delta_time = 0;
 641     s->hist       = NULL;
 642     s->du_alloc   = NULL;
 643     s->t_alloc    = NULL;
 644     s->hist_alloc = NULL;
 645     s->ndu_alloc  = 0;
 646     s->nt_alloc   = 0;
 647
 648     s->ntot     = 0;
 649     s->filename = filename;
 650 }
 651
 652 static void sample_range_init(sample_range_t *r, samples_t *s)
 653 {
 654     r->start = 0;
 655     r->end   = s->ndu;
 656     r->use   = TRUE;
 657     r->s     = NULL;
 658 }
 659
 660 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 661                              lambda_vec_t *foreign_lambda, double temp)
 662 {
 663     sc->native_lambda  = native_lambda;
 664     sc->foreign_lambda = foreign_lambda;
 665     sc->temp           = temp;
 666
 667     sc->nsamples       = 0;
 668     sc->s              = NULL;
 669     sc->r              = NULL;
 670     sc->nsamples_alloc = 0;
 671
 672     sc->ntot = 0;
 673     sc->next = sc->prev = NULL;
 674 }
 675
 676 static void sample_coll_destroy(sample_coll_t *sc)
 677 {
 678     /* don't free the samples themselves */
 679     sfree(sc->r);
 680     sfree(sc->s);
 681 }
 682
 683
 684 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 685                              double temp)
 686 {
 687     l->lambda = native_lambda;
 688     l->temp   = temp;
 689
 690     l->next = NULL;
 691     l->prev = NULL;
 692
 693     l->sc = &(l->sc_head);
 694
 695     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 696     l->sc->next = l->sc;
 697     l->sc->prev = l->sc;
 698 }
 699
 700 static void barres_init(barres_t *br)
 701 {
 702     br->dg            = 0;
 703     br->dg_err        = 0;
 704     br->sa            = 0;
 705     br->sa_err        = 0;
 706     br->sb            = 0;
 707     br->sb_err        = 0;
 708     br->dg_stddev     = 0;
 709     br->dg_stddev_err = 0;
 710
 711     br->a = NULL;
 712     br->b = NULL;
 713 }
 714
 715
 716 /* calculate the total number of samples in a sample collection */
 717 static void sample_coll_calc_ntot(sample_coll_t *sc)
 718 {
 719     int i;
 720
 721     sc->ntot = 0;
 722     for (i = 0; i < sc->nsamples; i++)
 723     {
 724         if (sc->r[i].use)
 725         {
 726             if (sc->s[i]->hist)
 727             {
 728                 sc->ntot += sc->s[i]->ntot;
 729             }
 730             else
 731             {
 732                 sc->ntot += sc->r[i].end - sc->r[i].start;
 733             }
 734         }
 735     }
 736 }
 737
 738
 739 /* find the barsamples_t associated with a lambda that corresponds to
 740    a specific foreign lambda */
 741 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 742                                                    lambda_vec_t  *foreign_lambda)
 743 {
 744     sample_coll_t *sc = l->sc->next;
 745
 746     while (sc != l->sc)
 747     {
 748         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 749         {
 750             return sc;
 751         }
 752         sc = sc->next;
 753     }
 754
 755     return NULL;
 756 }
 757
 758 /* insert li into an ordered list of lambda_colls */
 759 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 760 {
 761     sample_coll_t *scn = l->sc->next;
 762     while ( (scn != l->sc) )
 763     {
 764         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 765         {
 766             break;
 767         }
 768         scn = scn->next;
 769     }
 770     /* now insert it before the found scn */
 771     sc->next        = scn;
 772     sc->prev        = scn->prev;
 773     scn->prev->next = sc;
 774     scn->prev       = sc;
 775 }
 776
 777 /* insert li into an ordered list of lambdas */
 778 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 779 {
 780     lambda_data_t *lc = head->next;
 781     while (lc != head)
 782     {
 783         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 784         {
 785             break;
 786         }
 787         lc = lc->next;
 788     }
 789     /* now insert ourselves before the found lc */
 790     li->next       = lc;
 791     li->prev       = lc->prev;
 792     lc->prev->next = li;
 793     lc->prev       = li;
 794 }
 795
 796 /* insert a sample and a sample_range into a sample_coll. The
 797     samples are stored as a pointer, the range is copied. */
 798 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 799                                       sample_range_t *r)
 800 {
 801     /* first check if it belongs here */
 802     if (sc->temp != s->temp)
 803     {
 804         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 805                   s->filename, sc->next->s[0]->filename);
 806     }
 807     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 808     {
 809         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 810                   s->filename, sc->next->s[0]->filename);
 811     }
 812     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 813     {
 814         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 815                   s->filename, sc->next->s[0]->filename);
 816     }
 817
 818     /* check if there's room */
 819     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 820     {
 821         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 822         srenew(sc->s, sc->nsamples_alloc);
 823         srenew(sc->r, sc->nsamples_alloc);
 824     }
 825     sc->s[sc->nsamples] = s;
 826     sc->r[sc->nsamples] = *r;
 827     sc->nsamples++;
 828
 829     sample_coll_calc_ntot(sc);
 830 }
 831
 832 /* insert a sample into a lambda_list, creating the right sample_coll if
 833    neccesary */
 834 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 835 {
 836     gmx_bool       found = FALSE;
 837     sample_coll_t *sc;
 838     sample_range_t r;
 839
 840     lambda_data_t *l = head->next;
 841
 842     /* first search for the right lambda_data_t */
 843     while (l != head)
 844     {
 845         if (lambda_vec_same(l->lambda, s->native_lambda) )
 846         {
 847             found = TRUE;
 848             break;
 849         }
 850         l = l->next;
 851     }
 852
 853     if (!found)
 854     {
 855         snew(l, 1);                                     /* allocate a new one */
 856         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 857         lambda_data_insert_lambda(head, l);             /* add it to the list */
 858     }
 859
 860     /* now look for a sample collection */
 861     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 862     if (!sc)
 863     {
 864         snew(sc, 1); /* allocate a new one */
 865         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 866         lambda_data_insert_sample_coll(l, sc);
 867     }
 868
 869     /* now insert the samples into the sample coll */
 870     sample_range_init(&r, s);
 871     sample_coll_insert_sample(sc, s, &r);
 872 }
 873
 874
 875 /* make a histogram out of a sample collection */
 876 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 877                                   int *nbin_alloc, int *nbin,
 878                                   double *dx, double *xmin, int nbin_default)
 879 {
 880     int      i, j, k;
 881     gmx_bool dx_set   = FALSE;
 882     gmx_bool xmin_set = FALSE;
 883
 884     gmx_bool xmax_set      = FALSE;
 885     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 886                                        limits of a histogram */
 887     double   xmax          = -1;
 888
 889     /* first determine dx and xmin; try the histograms */
 890     for (i = 0; i < sc->nsamples; i++)
 891     {
 892         if (sc->s[i]->hist)
 893         {
 894             hist_t *hist = sc->s[i]->hist;
 895             for (k = 0; k < hist->nhist; k++)
 896             {
 897                 double hdx      = hist->dx[k];
 898                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 899
 900                 /* we use the biggest dx*/
 901                 if ( (!dx_set) || hist->dx[0] > *dx)
 902                 {
 903                     dx_set = TRUE;
 904                     *dx    = hist->dx[0];
 905                 }
 906                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 907                 {
 908                     xmin_set = TRUE;
 909                     *xmin    = (hist->x0[k]*hdx);
 910                 }
 911
 912                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 913                 {
 914                     xmax_set = TRUE;
 915                     xmax     = xmax_now;
 916                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 917                     {
 918                         xmax_set_hard = TRUE;
 919                     }
 920                 }
 921                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 922                 {
 923                     xmax_set_hard = TRUE;
 924                     xmax          = xmax_now;
 925                 }
 926             }
 927         }
 928     }
 929     /* and the delta us */
 930     for (i = 0; i < sc->nsamples; i++)
 931     {
 932         if (sc->s[i]->ndu > 0)
 933         {
 934             /* determine min and max */
 935             int    starti  = sc->r[i].start;
 936             int    endi    = sc->r[i].end;
 937             double du_xmin = sc->s[i]->du[starti];
 938             double du_xmax = sc->s[i]->du[starti];
 939             for (j = starti+1; j < endi; j++)
 940             {
 941                 if (sc->s[i]->du[j] < du_xmin)
 942                 {
 943                     du_xmin = sc->s[i]->du[j];
 944                 }
 945                 if (sc->s[i]->du[j] > du_xmax)
 946                 {
 947                     du_xmax = sc->s[i]->du[j];
 948                 }
 949             }
 950
 951             /* and now change the limits */
 952             if ( (!xmin_set) || (du_xmin < *xmin) )
 953             {
 954                 xmin_set = TRUE;
 955                 *xmin    = du_xmin;
 956             }
 957             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 958             {
 959                 xmax_set = TRUE;
 960                 xmax     = du_xmax;
 961             }
 962         }
 963     }
 964
 965     if (!xmax_set || !xmin_set)
 966     {
 967         *nbin = 0;
 968         return;
 969     }
 970
 971
 972     if (!dx_set)
 973     {
 974         *nbin = nbin_default;
 975         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 976                                                be 0, and we count from 0 */
 977     }
 978     else
 979     {
 980         *nbin = (xmax-(*xmin))/(*dx);
 981     }
 982
 983     if (*nbin > *nbin_alloc)
 984     {
 985         *nbin_alloc = *nbin;
 986         srenew(*bin, *nbin_alloc);
 987     }
 988
 989     /* reset the histogram */
 990     for (i = 0; i < (*nbin); i++)
 991     {
 992         (*bin)[i] = 0;
 993     }
 994
 995     /* now add the actual data */
 996     for (i = 0; i < sc->nsamples; i++)
 997     {
 998         if (sc->s[i]->hist)
 999         {
1000             hist_t *hist = sc->s[i]->hist;
1001             for (k = 0; k < hist->nhist; k++)
1002             {
1003                 double hdx       = hist->dx[k];
1004                 double xmin_hist = hist->x0[k]*hdx;
1005                 for (j = 0; j < hist->nbin[k]; j++)
1006                 {
1007                     /* calculate the bin corresponding to the middle of the
1008                        original bin */
1009                     double x     = hdx*(j+0.5) + xmin_hist;
1010                     int    binnr = (int)((x-(*xmin))/(*dx));
1011
1012                     if (binnr >= *nbin || binnr < 0)
1013                     {
1014                         binnr = (*nbin)-1;
1015                     }
1016
1017                     (*bin)[binnr] += hist->bin[k][j];
1018                 }
1019             }
1020         }
1021         else
1022         {
1023             int starti = sc->r[i].start;
1024             int endi   = sc->r[i].end;
1025             for (j = starti; j < endi; j++)
1026             {
1027                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1028                 if (binnr >= *nbin || binnr < 0)
1029                 {
1030                     binnr = (*nbin)-1;
1031                 }
1032
1033                 (*bin)[binnr]++;
1034             }
1035         }
1036     }
1037 }
1038
1039 /* write a collection of histograms to a file */
1040 void sim_data_histogram(sim_data_t *sd, const char *filename,
1041                         int nbin_default, const output_env_t oenv)
1042 {
1043     char           label_x[STRLEN];
1044     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1045     const char    *title   = "N(\\DeltaH)";
1046     const char    *label_y = "Samples";
1047     FILE          *fp;
1048     lambda_data_t *bl;
1049     int            nsets     = 0;
1050     char         **setnames  = NULL;
1051     gmx_bool       first_set = FALSE;
1052     /* histogram data: */
1053     int           *hist       = NULL;
1054     int            nbin       = 0;
1055     int            nbin_alloc = 0;
1056     double         dx         = 0;
1057     double         min        = 0;
1058     int            i;
1059     lambda_data_t *bl_head = sd->lb;
1060
1061     printf("\nWriting histogram to %s\n", filename);
1062     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1063
1064     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1065
1066     /* first get all the set names */
1067     bl = bl_head->next;
1068     /* iterate over all lambdas */
1069     while (bl != bl_head)
1070     {
1071         sample_coll_t *sc = bl->sc->next;
1072
1073         /* iterate over all samples */
1074         while (sc != bl->sc)
1075         {
1076             char buf[STRLEN], buf2[STRLEN];
1077
1078             nsets++;
1079             srenew(setnames, nsets);
1080             snew(setnames[nsets-1], STRLEN);
1081             if (sc->foreign_lambda->dhdl < 0)
1082             {
1083                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1084                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1085                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1086                         deltag, lambda, buf2, lambda, buf);
1087             }
1088             else
1089             {
1090                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1091                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1092                         dhdl, lambda, buf);
1093             }
1094             sc = sc->next;
1095         }
1096
1097         bl = bl->next;
1098     }
1099     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1100
1101
1102     /* now make the histograms */
1103     bl = bl_head->next;
1104     /* iterate over all lambdas */
1105     while (bl != bl_head)
1106     {
1107         sample_coll_t *sc = bl->sc->next;
1108
1109         /* iterate over all samples */
1110         while (sc != bl->sc)
1111         {
1112             if (!first_set)
1113             {
1114                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1115             }
1116
1117             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1118                                   nbin_default);
1119
1120             for (i = 0; i < nbin; i++)
1121             {
1122                 double xmin = i*dx + min;
1123                 double xmax = (i+1)*dx + min;
1124
1125                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1126             }
1127
1128             first_set = FALSE;
1129             sc        = sc->next;
1130         }
1131
1132         bl = bl->next;
1133     }
1134
1135     if (hist)
1136     {
1137         sfree(hist);
1138     }
1139
1140     xvgrclose(fp);
1141 }
1142
1143 /* create a collection (array) of barres_t object given a ordered linked list
1144    of barlamda_t sample collections */
1145 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1146                                     gmx_bool use_dhdl)
1147 {
1148     lambda_data_t *bl;
1149     int            nlambda = 0;
1150     barres_t      *res;
1151     int            i;
1152     gmx_bool       dhdl    = FALSE;
1153     gmx_bool       first   = TRUE;
1154     lambda_data_t *bl_head = sd->lb;
1155
1156     /* first count the lambdas */
1157     bl = bl_head->next;
1158     while (bl != bl_head)
1159     {
1160         nlambda++;
1161         bl = bl->next;
1162     }
1163     snew(res, nlambda-1);
1164
1165     /* next put the right samples in the res */
1166     *nres = 0;
1167     bl    = bl_head->next->next; /* we start with the second one. */
1168     while (bl != bl_head)
1169     {
1170         sample_coll_t *sc, *scprev;
1171         barres_t      *br = &(res[*nres]);
1172         /* there is always a previous one. we search for that as a foreign
1173            lambda: */
1174         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1175         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1176
1177         barres_init(br);
1178
1179         if (use_dhdl)
1180         {
1181             /* we use dhdl */
1182
1183             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1184             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1185
1186             if (first)
1187             {
1188                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1189                 dhdl = TRUE;
1190             }
1191             if (!dhdl)
1192             {
1193                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1194             }
1195         }
1196         else if (!scprev && !sc)
1197         {
1198             gmx_fatal(FARGS, "There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1199         }
1200
1201         /* normal delta H */
1202         if (!scprev)
1203         {
1204             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %g\nin the files for lambda = %g", bl->lambda, bl->prev->lambda);
1205         }
1206         if (!sc)
1207         {
1208             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %g\nin the files for lambda = %g", bl->prev->lambda, bl->lambda);
1209         }
1210         br->a = scprev;
1211         br->b = sc;
1212
1213         first = FALSE;
1214         (*nres)++;
1215         bl = bl->next;
1216     }
1217     return res;
1218 }
1219
1220 /* estimate the maximum discretization error */
1221 static double barres_list_max_disc_err(barres_t *res, int nres)
1222 {
1223     int    i, j;
1224     double disc_err = 0.;
1225     double delta_lambda;
1226
1227     for (i = 0; i < nres; i++)
1228     {
1229         barres_t *br = &(res[i]);
1230
1231         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1232                                            br->a->native_lambda);
1233
1234         for (j = 0; j < br->a->nsamples; j++)
1235         {
1236             if (br->a->s[j]->hist)
1237             {
1238                 double Wfac = 1.;
1239                 if (br->a->s[j]->derivative)
1240                 {
1241                     Wfac =  delta_lambda;
1242                 }
1243
1244                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1245             }
1246         }
1247         for (j = 0; j < br->b->nsamples; j++)
1248         {
1249             if (br->b->s[j]->hist)
1250             {
1251                 double Wfac = 1.;
1252                 if (br->b->s[j]->derivative)
1253                 {
1254                     Wfac =  delta_lambda;
1255                 }
1256                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1257             }
1258         }
1259     }
1260     return disc_err;
1261 }
1262
1263
1264 /* impose start and end times on a sample collection, updating sample_ranges */
1265 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1266                                      double end_t)
1267 {
1268     int i;
1269     for (i = 0; i < sc->nsamples; i++)
1270     {
1271         samples_t      *s = sc->s[i];
1272         sample_range_t *r = &(sc->r[i]);
1273         if (s->hist)
1274         {
1275             double end_time = s->hist->delta_time*s->hist->sum +
1276                 s->hist->start_time;
1277             if (s->hist->start_time < begin_t || end_time > end_t)
1278             {
1279                 r->use = FALSE;
1280             }
1281         }
1282         else
1283         {
1284             if (!s->t)
1285             {
1286                 double end_time;
1287                 if (s->start_time < begin_t)
1288                 {
1289                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1290                 }
1291                 end_time = s->delta_time*s->ndu + s->start_time;
1292                 if (end_time > end_t)
1293                 {
1294                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1295                 }
1296             }
1297             else
1298             {
1299                 int j;
1300                 for (j = 0; j < s->ndu; j++)
1301                 {
1302                     if (s->t[j] < begin_t)
1303                     {
1304                         r->start = j;
1305                     }
1306
1307                     if (s->t[j] >= end_t)
1308                     {
1309                         r->end = j;
1310                         break;
1311                     }
1312                 }
1313             }
1314             if (r->start > r->end)
1315             {
1316                 r->use = FALSE;
1317             }
1318         }
1319     }
1320     sample_coll_calc_ntot(sc);
1321 }
1322
1323 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1324 {
1325     double         first_t, last_t;
1326     double         begin_t, end_t;
1327     lambda_data_t *lc;
1328     lambda_data_t *head = sd->lb;
1329     int            j;
1330
1331     if (begin <= 0 && end < 0)
1332     {
1333         return;
1334     }
1335
1336     /* first determine the global start and end times */
1337     first_t = -1;
1338     last_t  = -1;
1339     lc      = head->next;
1340     while (lc != head)
1341     {
1342         sample_coll_t *sc = lc->sc->next;
1343         while (sc != lc->sc)
1344         {
1345             for (j = 0; j < sc->nsamples; j++)
1346             {
1347                 double start_t, end_t;
1348
1349                 start_t = sc->s[j]->start_time;
1350                 end_t   =   sc->s[j]->start_time;
1351                 if (sc->s[j]->hist)
1352                 {
1353                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1354                 }
1355                 else
1356                 {
1357                     if (sc->s[j]->t)
1358                     {
1359                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1360                     }
1361                     else
1362                     {
1363                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1364                     }
1365                 }
1366
1367                 if (start_t < first_t || first_t < 0)
1368                 {
1369                     first_t = start_t;
1370                 }
1371                 if (end_t > last_t)
1372                 {
1373                     last_t = end_t;
1374                 }
1375             }
1376             sc = sc->next;
1377         }
1378         lc = lc->next;
1379     }
1380
1381     /* calculate the actual times */
1382     if (begin > 0)
1383     {
1384         begin_t = begin;
1385     }
1386     else
1387     {
1388         begin_t = first_t;
1389     }
1390
1391     if (end > 0)
1392     {
1393         end_t = end;
1394     }
1395     else
1396     {
1397         end_t = last_t;
1398     }
1399     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1400
1401     if (begin_t > end_t)
1402     {
1403         return;
1404     }
1405     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1406
1407     /* then impose them */
1408     lc = head->next;
1409     while (lc != head)
1410     {
1411         sample_coll_t *sc = lc->sc->next;
1412         while (sc != lc->sc)
1413         {
1414             sample_coll_impose_times(sc, begin_t, end_t);
1415             sc = sc->next;
1416         }
1417         lc = lc->next;
1418     }
1419 }
1420
1421
1422 /* create subsample i out of ni from an existing sample_coll */
1423 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1424                                              sample_coll_t *sc_orig,
1425                                              int i, int ni)
1426 {
1427     int             j;
1428     int             hist_start, hist_end;
1429
1430     gmx_large_int_t ntot_start;
1431     gmx_large_int_t ntot_end;
1432     gmx_large_int_t ntot_so_far;
1433
1434     *sc = *sc_orig; /* just copy all fields */
1435
1436     /* allocate proprietary memory */
1437     snew(sc->s, sc_orig->nsamples);
1438     snew(sc->r, sc_orig->nsamples);
1439
1440     /* copy the samples */
1441     for (j = 0; j < sc_orig->nsamples; j++)
1442     {
1443         sc->s[j] = sc_orig->s[j];
1444         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1445     }
1446
1447     /* now fix start and end fields */
1448     /* the casts avoid possible overflows */
1449     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1450     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1451     ntot_so_far = 0;
1452     for (j = 0; j < sc->nsamples; j++)
1453     {
1454         gmx_large_int_t ntot_add;
1455         gmx_large_int_t new_start, new_end;
1456
1457         if (sc->r[j].use)
1458         {
1459             if (sc->s[j]->hist)
1460             {
1461                 ntot_add = sc->s[j]->hist->sum;
1462             }
1463             else
1464             {
1465                 ntot_add = sc->r[j].end - sc->r[j].start;
1466             }
1467         }
1468         else
1469         {
1470             ntot_add = 0;
1471         }
1472
1473         if (!sc->s[j]->hist)
1474         {
1475             if (ntot_so_far < ntot_start)
1476             {
1477                 /* adjust starting point */
1478                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1479             }
1480             else
1481             {
1482                 new_start = sc->r[j].start;
1483             }
1484             /* adjust end point */
1485             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1486             if (new_end > sc->r[j].end)
1487             {
1488                 new_end = sc->r[j].end;
1489             }
1490
1491             /* check if we're in range at all */
1492             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1493             {
1494                 new_start = 0;
1495                 new_end   = 0;
1496             }
1497             /* and write the new range */
1498             sc->r[j].start = (int)new_start;
1499             sc->r[j].end   = (int)new_end;
1500         }
1501         else
1502         {
1503             if (sc->r[j].use)
1504             {
1505                 double overlap;
1506                 double ntot_start_norm, ntot_end_norm;
1507                 /* calculate the amount of overlap of the
1508                    desired range (ntot_start -- ntot_end) onto
1509                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1510
1511                 /* first calculate normalized bounds
1512                    (where 0 is the start of the hist range, and 1 the end) */
1513                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1514                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1515
1516                 /* now fix the boundaries */
1517                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1518                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1519
1520                 /* and calculate the overlap */
1521                 overlap = ntot_end_norm - ntot_start_norm;
1522
1523                 if (overlap > 0.95) /* we allow for 5% slack */
1524                 {
1525                     sc->r[j].use = TRUE;
1526                 }
1527                 else if (overlap < 0.05)
1528                 {
1529                     sc->r[j].use = FALSE;
1530                 }
1531                 else
1532                 {
1533                     return FALSE;
1534                 }
1535             }
1536         }
1537         ntot_so_far += ntot_add;
1538     }
1539     sample_coll_calc_ntot(sc);
1540
1541     return TRUE;
1542 }
1543
1544 /* calculate minimum and maximum work values in sample collection */
1545 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1546                                 double *Wmin, double *Wmax)
1547 {
1548     int i, j;
1549
1550     *Wmin = FLT_MAX;
1551     *Wmax = -FLT_MAX;
1552
1553     for (i = 0; i < sc->nsamples; i++)
1554     {
1555         samples_t      *s = sc->s[i];
1556         sample_range_t *r = &(sc->r[i]);
1557         if (r->use)
1558         {
1559             if (!s->hist)
1560             {
1561                 for (j = r->start; j < r->end; j++)
1562                 {
1563                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1564                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1565                 }
1566             }
1567             else
1568             {
1569                 int    hd = 0; /* determine the histogram direction: */
1570                 double dx;
1571                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1572                 {
1573                     hd = 1;
1574                 }
1575                 dx = s->hist->dx[hd];
1576
1577                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1578                 {
1579                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1580                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1581                     /* look for the highest value bin with values */
1582                     if (s->hist->bin[hd][j] > 0)
1583                     {
1584                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1585                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1586                         break;
1587                     }
1588                 }
1589             }
1590         }
1591     }
1592 }
1593
1594 /* Initialize a sim_data structure */
1595 static void sim_data_init(sim_data_t *sd)
1596 {
1597     /* make linked list */
1598     sd->lb       = &(sd->lb_head);
1599     sd->lb->next = sd->lb;
1600     sd->lb->prev = sd->lb;
1601
1602     lambda_components_init(&(sd->lc));
1603 }
1604
1605
1606 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1607 {
1608     int    i;
1609     double sum;
1610
1611     sum = 0;
1612
1613     for (i = 0; i < n; i++)
1614     {
1615         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1616     }
1617
1618     return sum;
1619 }
1620
1621 /* calculate the BAR average given a histogram
1622
1623     if type== 0, calculate the best estimate for the average,
1624     if type==-1, calculate the minimum possible value given the histogram
1625     if type== 1, calculate the maximum possible value given the histogram */
1626 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1627                                 int type)
1628 {
1629     double sum = 0.;
1630     int    i;
1631     int    max;
1632     /* normalization factor multiplied with bin width and
1633        number of samples (we normalize through M): */
1634     double normdx = 1.;
1635     int    hd     = 0; /* determine the histogram direction: */
1636     double dx;
1637
1638     if ( (hist->nhist > 1) && (Wfac < 0) )
1639     {
1640         hd = 1;
1641     }
1642     dx  = hist->dx[hd];
1643     max = hist->nbin[hd]-1;
1644     if (type == 1)
1645     {
1646         max = hist->nbin[hd]; /* we also add whatever was out of range */
1647     }
1648
1649     for (i = 0; i < max; i++)
1650     {
1651         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1652         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1653
1654         sum += pxdx/(1. + exp(x + sbMmDG));
1655     }
1656
1657     return sum;
1658 }
1659
1660 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1661                                 double temp, double tol, int type)
1662 {
1663     double kT, beta, M;
1664     double DG;
1665     int    i, j;
1666     double Wfac1, Wfac2, Wmin, Wmax;
1667     double DG0, DG1, DG2, dDG1;
1668     double sum1, sum2;
1669     double n1, n2; /* numbers of samples as doubles */
1670
1671     kT   = BOLTZ*temp;
1672     beta = 1/kT;
1673
1674     /* count the numbers of samples */
1675     n1 = ca->ntot;
1676     n2 = cb->ntot;
1677
1678     M = log(n1/n2);
1679
1680     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1681     if (ca->foreign_lambda->dhdl < 0)
1682     {
1683         /* this is the case when the delta U were calculated directly
1684            (i.e. we're not scaling dhdl) */
1685         Wfac1 = beta;
1686         Wfac2 = beta;
1687     }
1688     else
1689     {
1690         /* we're using dhdl, so delta_lambda needs to be a
1691            multiplication factor.  */
1692         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1693         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1694                                                   ca->native_lambda);
1695         if (cb->native_lambda->lc->N > 1)
1696         {
1697             gmx_fatal(FARGS,
1698                       "Can't (yet) do multi-component dhdl interpolation");
1699         }
1700
1701         Wfac1 =  beta*delta_lambda;
1702         Wfac2 = -beta*delta_lambda;
1703     }
1704
1705     if (beta < 1)
1706     {
1707         /* We print the output both in kT and kJ/mol.
1708          * Here we determine DG in kT, so when beta < 1
1709          * the precision has to be increased.
1710          */
1711         tol *= beta;
1712     }
1713
1714     /* Calculate minimum and maximum work to give an initial estimate of
1715      * delta G  as their average.
1716      */
1717     {
1718         double Wmin1, Wmin2, Wmax1, Wmax2;
1719         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1720         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1721
1722         Wmin = min(Wmin1, Wmin2);
1723         Wmax = max(Wmax1, Wmax2);
1724     }
1725
1726     DG0 = Wmin;
1727     DG2 = Wmax;
1728
1729     if (debug)
1730     {
1731         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1732     }
1733     /* We approximate by bisection: given our initial estimates
1734        we keep checking whether the halfway point is greater or
1735        smaller than what we get out of the BAR averages.
1736
1737        For the comparison we can use twice the tolerance. */
1738     while (DG2 - DG0 > 2*tol)
1739     {
1740         DG1 = 0.5*(DG0 + DG2);
1741
1742         /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1743            DG1);*/
1744
1745         /* calculate the BAR averages */
1746         dDG1 = 0.;
1747
1748         for (i = 0; i < ca->nsamples; i++)
1749         {
1750             samples_t      *s = ca->s[i];
1751             sample_range_t *r = &(ca->r[i]);
1752             if (r->use)
1753             {
1754                 if (s->hist)
1755                 {
1756                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1757                 }
1758                 else
1759                 {
1760                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1761                                          Wfac1, (M-DG1));
1762                 }
1763             }
1764         }
1765         for (i = 0; i < cb->nsamples; i++)
1766         {
1767             samples_t      *s = cb->s[i];
1768             sample_range_t *r = &(cb->r[i]);
1769             if (r->use)
1770             {
1771                 if (s->hist)
1772                 {
1773                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1774                 }
1775                 else
1776                 {
1777                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1778                                          Wfac2, -(M-DG1));
1779                 }
1780             }
1781         }
1782
1783         if (dDG1 < 0)
1784         {
1785             DG0 = DG1;
1786         }
1787         else
1788         {
1789             DG2 = DG1;
1790         }
1791         if (debug)
1792         {
1793             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1794         }
1795     }
1796
1797     return 0.5*(DG0 + DG2);
1798 }
1799
1800 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1801                              double temp, double dg, double *sa, double *sb)
1802 {
1803     int    i, j;
1804     double W_ab = 0.;
1805     double W_ba = 0.;
1806     double kT, beta;
1807     double Wfac1, Wfac2;
1808     double n1, n2;
1809
1810     kT   = BOLTZ*temp;
1811     beta = 1/kT;
1812
1813     /* count the numbers of samples */
1814     n1 = ca->ntot;
1815     n2 = cb->ntot;
1816
1817     /* to ensure the work values are the same as during the delta_G */
1818     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1819     if (ca->foreign_lambda->dhdl < 0)
1820     {
1821         /* this is the case when the delta U were calculated directly
1822            (i.e. we're not scaling dhdl) */
1823         Wfac1 = beta;
1824         Wfac2 = beta;
1825     }
1826     else
1827     {
1828         /* we're using dhdl, so delta_lambda needs to be a
1829            multiplication factor.  */
1830         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1831                                                   ca->native_lambda);
1832         Wfac1 =  beta*delta_lambda;
1833         Wfac2 = -beta*delta_lambda;
1834     }
1835
1836     /* first calculate the average work in both directions */
1837     for (i = 0; i < ca->nsamples; i++)
1838     {
1839         samples_t      *s = ca->s[i];
1840         sample_range_t *r = &(ca->r[i]);
1841         if (r->use)
1842         {
1843             if (!s->hist)
1844             {
1845                 for (j = r->start; j < r->end; j++)
1846                 {
1847                     W_ab += Wfac1*s->du[j];
1848                 }
1849             }
1850             else
1851             {
1852                 /* normalization factor multiplied with bin width and
1853                    number of samples (we normalize through M): */
1854                 double normdx = 1.;
1855                 double dx;
1856                 int    hd = 0; /* histogram direction */
1857                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1858                 {
1859                     hd = 1;
1860                 }
1861                 dx = s->hist->dx[hd];
1862
1863                 for (j = 0; j < s->hist->nbin[0]; j++)
1864                 {
1865                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1866                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1867                     W_ab += pxdx*x;
1868                 }
1869             }
1870         }
1871     }
1872     W_ab /= n1;
1873
1874     for (i = 0; i < cb->nsamples; i++)
1875     {
1876         samples_t      *s = cb->s[i];
1877         sample_range_t *r = &(cb->r[i]);
1878         if (r->use)
1879         {
1880             if (!s->hist)
1881             {
1882                 for (j = r->start; j < r->end; j++)
1883                 {
1884                     W_ba += Wfac1*s->du[j];
1885                 }
1886             }
1887             else
1888             {
1889                 /* normalization factor multiplied with bin width and
1890                    number of samples (we normalize through M): */
1891                 double normdx = 1.;
1892                 double dx;
1893                 int    hd = 0; /* histogram direction */
1894                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1895                 {
1896                     hd = 1;
1897                 }
1898                 dx = s->hist->dx[hd];
1899
1900                 for (j = 0; j < s->hist->nbin[0]; j++)
1901                 {
1902                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1903                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1904                     W_ba += pxdx*x;
1905                 }
1906             }
1907         }
1908     }
1909     W_ba /= n2;
1910
1911     /* then calculate the relative entropies */
1912     *sa = (W_ab - dg);
1913     *sb = (W_ba + dg);
1914 }
1915
1916 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1917                            double temp, double dg, double *stddev)
1918 {
1919     int    i, j;
1920     double M;
1921     double sigmafact = 0.;
1922     double kT, beta;
1923     double Wfac1, Wfac2;
1924     double n1, n2;
1925
1926     kT   = BOLTZ*temp;
1927     beta = 1/kT;
1928
1929     /* count the numbers of samples */
1930     n1 = ca->ntot;
1931     n2 = cb->ntot;
1932
1933     /* to ensure the work values are the same as during the delta_G */
1934     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1935     if (ca->foreign_lambda->dhdl < 0)
1936     {
1937         /* this is the case when the delta U were calculated directly
1938            (i.e. we're not scaling dhdl) */
1939         Wfac1 = beta;
1940         Wfac2 = beta;
1941     }
1942     else
1943     {
1944         /* we're using dhdl, so delta_lambda needs to be a
1945            multiplication factor.  */
1946         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1947                                                   ca->native_lambda);
1948         Wfac1 =  beta*delta_lambda;
1949         Wfac2 = -beta*delta_lambda;
1950     }
1951
1952     M = log(n1/n2);
1953
1954
1955     /* calculate average in both directions */
1956     for (i = 0; i < ca->nsamples; i++)
1957     {
1958         samples_t      *s = ca->s[i];
1959         sample_range_t *r = &(ca->r[i]);
1960         if (r->use)
1961         {
1962             if (!s->hist)
1963             {
1964                 for (j = r->start; j < r->end; j++)
1965                 {
1966                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1967                 }
1968             }
1969             else
1970             {
1971                 /* normalization factor multiplied with bin width and
1972                    number of samples (we normalize through M): */
1973                 double normdx = 1.;
1974                 double dx;
1975                 int    hd = 0; /* histogram direction */
1976                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1977                 {
1978                     hd = 1;
1979                 }
1980                 dx = s->hist->dx[hd];
1981
1982                 for (j = 0; j < s->hist->nbin[0]; j++)
1983                 {
1984                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1985                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1986
1987                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1988                 }
1989             }
1990         }
1991     }
1992     for (i = 0; i < cb->nsamples; i++)
1993     {
1994         samples_t      *s = cb->s[i];
1995         sample_range_t *r = &(cb->r[i]);
1996         if (r->use)
1997         {
1998             if (!s->hist)
1999             {
2000                 for (j = r->start; j < r->end; j++)
2001                 {
2002                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2003                 }
2004             }
2005             else
2006             {
2007                 /* normalization factor multiplied with bin width and
2008                    number of samples (we normalize through M): */
2009                 double normdx = 1.;
2010                 double dx;
2011                 int    hd = 0; /* histogram direction */
2012                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2013                 {
2014                     hd = 1;
2015                 }
2016                 dx = s->hist->dx[hd];
2017
2018                 for (j = 0; j < s->hist->nbin[0]; j++)
2019                 {
2020                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2021                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2022
2023                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2024                 }
2025             }
2026         }
2027     }
2028
2029     sigmafact /= (n1 + n2);
2030
2031
2032     /* Eq. 10 from
2033        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2034     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2035 }
2036
2037
2038
2039 static void calc_bar(barres_t *br, double tol,
2040                      int npee_min, int npee_max, gmx_bool *bEE,
2041                      double *partsum)
2042 {
2043     int      npee, p;
2044     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2045                                                         for calculated quantities */
2046     int      nsample1, nsample2;
2047     double   temp = br->a->temp;
2048     int      i, j;
2049     double   dg_min, dg_max;
2050     gmx_bool have_hist = FALSE;
2051
2052     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2053
2054     br->dg_disc_err      = 0.;
2055     br->dg_histrange_err = 0.;
2056
2057     /* check if there are histograms */
2058     for (i = 0; i < br->a->nsamples; i++)
2059     {
2060         if (br->a->r[i].use && br->a->s[i]->hist)
2061         {
2062             have_hist = TRUE;
2063             break;
2064         }
2065     }
2066     if (!have_hist)
2067     {
2068         for (i = 0; i < br->b->nsamples; i++)
2069         {
2070             if (br->b->r[i].use && br->b->s[i]->hist)
2071             {
2072                 have_hist = TRUE;
2073                 break;
2074             }
2075         }
2076     }
2077
2078     /* calculate histogram-specific errors */
2079     if (have_hist)
2080     {
2081         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2082         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2083
2084         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2085         {
2086             /* the histogram range  error is the biggest of the differences
2087                between the best estimate and the extremes */
2088             br->dg_histrange_err = fabs(dg_max - dg_min);
2089         }
2090         br->dg_disc_err = 0.;
2091         for (i = 0; i < br->a->nsamples; i++)
2092         {
2093             if (br->a->s[i]->hist)
2094             {
2095                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2096             }
2097         }
2098         for (i = 0; i < br->b->nsamples; i++)
2099         {
2100             if (br->b->s[i]->hist)
2101             {
2102                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2103             }
2104         }
2105     }
2106     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2107
2108     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2109
2110     dg_sig2     = 0;
2111     sa_sig2     = 0;
2112     sb_sig2     = 0;
2113     stddev_sig2 = 0;
2114
2115     *bEE = TRUE;
2116     {
2117         sample_coll_t ca, cb;
2118
2119         /* initialize the samples */
2120         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2121                          br->a->temp);
2122         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2123                          br->b->temp);
2124
2125         for (npee = npee_min; npee <= npee_max; npee++)
2126         {
2127             double dgs      = 0;
2128             double dgs2     = 0;
2129             double dsa      = 0;
2130             double dsb      = 0;
2131             double dsa2     = 0;
2132             double dsb2     = 0;
2133             double dstddev  = 0;
2134             double dstddev2 = 0;
2135
2136
2137             for (p = 0; p < npee; p++)
2138             {
2139                 double   dgp;
2140                 double   stddevc;
2141                 double   sac, sbc;
2142                 gmx_bool cac, cbc;
2143
2144                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2145                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2146
2147                 if (!cac || !cbc)
2148                 {
2149                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2150                     *bEE = FALSE;
2151                     if (cac)
2152                     {
2153                         sample_coll_destroy(&ca);
2154                     }
2155                     if (cbc)
2156                     {
2157                         sample_coll_destroy(&cb);
2158                     }
2159                     return;
2160                 }
2161
2162                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2163                 dgs  += dgp;
2164                 dgs2 += dgp*dgp;
2165
2166                 partsum[npee*(npee_max+1)+p] += dgp;
2167
2168                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2169                 dsa  += sac;
2170                 dsa2 += sac*sac;
2171                 dsb  += sbc;
2172                 dsb2 += sbc*sbc;
2173                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2174
2175                 dstddev  += stddevc;
2176                 dstddev2 += stddevc*stddevc;
2177
2178                 sample_coll_destroy(&ca);
2179                 sample_coll_destroy(&cb);
2180             }
2181             dgs     /= npee;
2182             dgs2    /= npee;
2183             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2184
2185             dsa     /= npee;
2186             dsa2    /= npee;
2187             dsb     /= npee;
2188             dsb2    /= npee;
2189             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2190             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2191
2192             dstddev     /= npee;
2193             dstddev2    /= npee;
2194             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2195         }
2196         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2197         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2198         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2199         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2200     }
2201 }
2202
2203
2204 static double bar_err(int nbmin, int nbmax, const double *partsum)
2205 {
2206     int    nb, b;
2207     double svar, s, s2, dg;
2208
2209     svar = 0;
2210     for (nb = nbmin; nb <= nbmax; nb++)
2211     {
2212         s  = 0;
2213         s2 = 0;
2214         for (b = 0; b < nb; b++)
2215         {
2216             dg  = partsum[nb*(nbmax+1)+b];
2217             s  += dg;
2218             s2 += dg*dg;
2219         }
2220         s    /= nb;
2221         s2   /= nb;
2222         svar += (s2 - s*s)/(nb - 1);
2223     }
2224
2225     return sqrt(svar/(nbmax + 1 - nbmin));
2226 }
2227
2228
2229 /* Seek the end of an identifier (consecutive non-spaces), followed by
2230    an optional number of spaces or '='-signs. Returns a pointer to the
2231    first non-space value found after that. Returns NULL if the string
2232    ends before that.
2233  */
2234 static const char *find_value(const char *str)
2235 {
2236     gmx_bool name_end_found = FALSE;
2237
2238     /* if the string is a NULL pointer, return a NULL pointer. */
2239     if (str == NULL)
2240     {
2241         return NULL;
2242     }
2243     while (*str != '\0')
2244     {
2245         /* first find the end of the name */
2246         if (!name_end_found)
2247         {
2248             if (isspace(*str) || (*str == '=') )
2249             {
2250                 name_end_found = TRUE;
2251             }
2252         }
2253         else
2254         {
2255             if (!( isspace(*str) || (*str == '=') ))
2256             {
2257                 return str;
2258             }
2259         }
2260         str++;
2261     }
2262     return NULL;
2263 }
2264
2265
2266
2267 /* read a vector-notation description of a lambda vector */
2268 static gmx_bool read_lambda_compvec(const char                *str,
2269                                     lambda_vec_t              *lv,
2270                                     const lambda_components_t *lc_in,
2271                                     lambda_components_t       *lc_out,
2272                                     const char               **end,
2273                                     const char                *fn)
2274 {
2275     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2276                                           components, or to check them */
2277     gmx_bool    start_reached = FALSE; /* whether the start of component names
2278                                           has been reached */
2279     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2280     int         n             = 0;     /* current component number */
2281     const char *val_start     = NULL;  /* start of the component name, or NULL
2282                                           if not in a value */
2283     char       *strtod_end;
2284     gmx_bool    OK = TRUE;
2285
2286     if (end)
2287     {
2288         *end = str;
2289     }
2290
2291
2292     if (lc_out && lc_out->N == 0)
2293     {
2294         initialize_lc = TRUE;
2295     }
2296
2297     if (lc_in == NULL)
2298     {
2299         lc_in = lc_out;
2300     }
2301
2302     while (1)
2303     {
2304         if (!start_reached)
2305         {
2306             if (isalnum(*str))
2307             {
2308                 vector        = FALSE;
2309                 start_reached = TRUE;
2310                 val_start     = str;
2311             }
2312             else if (*str == '(')
2313             {
2314                 vector        = TRUE;
2315                 start_reached = TRUE;
2316             }
2317             else if (!isspace(*str))
2318             {
2319                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2320             }
2321         }
2322         else
2323         {
2324             if (val_start)
2325             {
2326                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2327                 {
2328                     /* end of value */
2329                     if (lv == NULL)
2330                     {
2331                         if (initialize_lc)
2332                         {
2333                             lambda_components_add(lc_out, val_start,
2334                                                   (str-val_start));
2335                         }
2336                         else
2337                         {
2338                             if (!lambda_components_check(lc_out, n, val_start,
2339                                                          (str-val_start)))
2340                             {
2341                                 return FALSE;
2342                             }
2343                         }
2344                     }
2345                     else
2346                     {
2347                         /* add a vector component to lv */
2348                         lv->val[n] = strtod(val_start, &strtod_end);
2349                         if (val_start == strtod_end)
2350                         {
2351                             gmx_fatal(FARGS,
2352                                       "Error reading lambda vector in %s", fn);
2353                         }
2354                     }
2355                     /* reset for the next identifier */
2356                     val_start = NULL;
2357                     n++;
2358                     if (!vector)
2359                     {
2360                         return OK;
2361                     }
2362                 }
2363             }
2364             else if (isalnum(*str))
2365             {
2366                 val_start = str;
2367             }
2368             if (*str == ')')
2369             {
2370                 str++;
2371                 if (end)
2372                 {
2373                     *end = str;
2374                 }
2375                 if (!vector)
2376                 {
2377                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2378                 }
2379                 else
2380                 {
2381                     if (n == lc_in->N)
2382                     {
2383                         return OK;
2384                     }
2385                     else if (lv == NULL)
2386                     {
2387                         return FALSE;
2388                     }
2389                     else
2390                     {
2391                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2392                                   fn);
2393                         return FALSE;
2394                     }
2395
2396                 }
2397             }
2398         }
2399         if (*str == '\0')
2400         {
2401             break;
2402         }
2403         str++;
2404         if (end)
2405         {
2406             *end = str;
2407         }
2408     }
2409     if (vector)
2410     {
2411         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2412         return FALSE;
2413     }
2414     return OK;
2415 }
2416
2417 /* read and check the component names from a string */
2418 static gmx_bool read_lambda_components(const char          *str,
2419                                        lambda_components_t *lc,
2420                                        const char         **end,
2421                                        const char          *fn)
2422 {
2423     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2424 }
2425
2426 /* read an initialized lambda vector from a string */
2427 static gmx_bool read_lambda_vector(const char   *str,
2428                                    lambda_vec_t *lv,
2429                                    const char  **end,
2430                                    const char   *fn)
2431 {
2432     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2433 }
2434
2435
2436
2437 /* deduce lambda value from legend.
2438     fn = the file name
2439     legend = the legend string
2440     ba = the xvg data
2441     lam = the initialized lambda vector
2442    returns whether to use the data in this set.
2443  */
2444 static gmx_bool legend2lambda(const char   *fn,
2445                               const char   *legend,
2446                               xvg_t        *ba,
2447                               lambda_vec_t *lam)
2448 {
2449     double        lambda = 0;
2450     const char   *ptr    = NULL, *ptr2 = NULL;
2451     gmx_bool      ok     = FALSE;
2452     gmx_bool      bdhdl  = FALSE;
2453     const char   *tostr  = " to ";
2454
2455     if (legend == NULL)
2456     {
2457         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2458     }
2459
2460     /* look for the last 'to': */
2461     ptr2 = legend;
2462     do
2463     {
2464         ptr2 = strstr(ptr2, tostr);
2465         if (ptr2 != NULL)
2466         {
2467             ptr = ptr2;
2468             ptr2++;
2469         }
2470     }
2471     while (ptr2 != NULL && *ptr2 != '\0');
2472
2473     if (ptr)
2474     {
2475         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2476     }
2477     else
2478     {
2479         /* look for the = sign */
2480         ptr = strrchr(legend, '=');
2481         if (!ptr)
2482         {
2483             /* otherwise look for the last space */
2484             ptr = strrchr(legend, ' ');
2485         }
2486     }
2487
2488     if (strstr(legend, "dH"))
2489     {
2490         ok    = TRUE;
2491         bdhdl = TRUE;
2492     }
2493     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2494     {
2495         ok    = TRUE;
2496         bdhdl = FALSE;
2497     }
2498     else /*if (strstr(legend, "pV"))*/
2499     {
2500         return FALSE;
2501     }
2502     if (!ptr)
2503     {
2504         ok = FALSE;
2505     }
2506
2507     if (!ok)
2508     {
2509         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2510     }
2511     if (!bdhdl)
2512     {
2513         ptr = find_value(ptr);
2514         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2515         {
2516             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2517         }
2518     }
2519     else
2520     {
2521         int         dhdl_index;
2522         const char *end;
2523         char        buf[STRLEN];
2524
2525         ptr = strrchr(legend, '=');
2526         end = ptr;
2527         if (ptr)
2528         {
2529             /* there must be a component name */
2530             ptr--;
2531             if (ptr < legend)
2532             {
2533                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2534             }
2535             /* now backtrack to the start of the identifier */
2536             while (isspace(*ptr))
2537             {
2538                 end = ptr;
2539                 ptr--;
2540                 if (ptr < legend)
2541                 {
2542                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2543                 }
2544             }
2545             while (!isspace(*ptr))
2546             {
2547                 ptr--;
2548                 if (ptr < legend)
2549                 {
2550                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2551                 }
2552             }
2553             ptr++;
2554             strncpy(buf, ptr, (end-ptr));
2555             buf[(end-ptr)] = '\0';
2556             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2557             if (dhdl_index < 0)
2558             {
2559                 char buf[STRLEN];
2560                 strncpy(buf, ptr, (end-ptr));
2561                 buf[(end-ptr)] = '\0';
2562                 gmx_fatal(FARGS,
2563                           "Did not find lambda component for '%s' in %s",
2564                           buf, fn);
2565             }
2566         }
2567         else
2568         {
2569             if (lam->lc->N > 1)
2570             {
2571                 gmx_fatal(FARGS,
2572                           "dhdl without component name with >1 lambda component in %s",
2573                           fn);
2574             }
2575             dhdl_index = 0;
2576         }
2577         lam->dhdl = dhdl_index;
2578     }
2579     return TRUE;
2580 }
2581
2582 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2583                                 lambda_components_t *lc)
2584 {
2585     gmx_bool    bFound;
2586     const char *ptr;
2587     char       *end;
2588     double      native_lambda;
2589
2590     bFound = FALSE;
2591
2592     /* first check for a state string */
2593     ptr = strstr(subtitle, "state");
2594     if (ptr)
2595     {
2596         int         index = -1;
2597         const char *val_end;
2598
2599         /* the new 4.6 style lambda vectors */
2600         ptr = find_value(ptr);
2601         if (ptr)
2602         {
2603             index = strtol(ptr, &end, 10);
2604             if (ptr == end)
2605             {
2606                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2607                 return FALSE;
2608             }
2609             ptr = end;
2610         }
2611         else
2612         {
2613             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2614             return FALSE;
2615         }
2616         /* now find the lambda vector component names */
2617         while (*ptr != '(' && !isalnum(*ptr))
2618         {
2619             ptr++;
2620             if (*ptr == '\0')
2621             {
2622                 gmx_fatal(FARGS,
2623                           "Incomplete lambda vector component data in %s", fn);
2624                 return FALSE;
2625             }
2626         }
2627         val_end = ptr;
2628         if (!read_lambda_components(ptr, lc, &val_end, fn))
2629         {
2630             gmx_fatal(FARGS,
2631                       "lambda vector components in %s don't match those previously read",
2632                       fn);
2633         }
2634         ptr = find_value(val_end);
2635         if (!ptr)
2636         {
2637             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2638             return FALSE;
2639         }
2640         lambda_vec_init(&(ba->native_lambda), lc);
2641         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2642         {
2643             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2644         }
2645         ba->native_lambda.index = index;
2646         bFound                  = TRUE;
2647     }
2648     else
2649     {
2650         /* compatibility mode: check for lambda in other ways. */
2651         /* plain text lambda string */
2652         ptr = strstr(subtitle, "lambda");
2653         if (ptr == NULL)
2654         {
2655             /* xmgrace formatted lambda string */
2656             ptr = strstr(subtitle, "\\xl\\f{}");
2657         }
2658         if (ptr == NULL)
2659         {
2660             /* xmgr formatted lambda string */
2661             ptr = strstr(subtitle, "\\8l\\4");
2662         }
2663         if (ptr != NULL)
2664         {
2665             ptr = strstr(ptr, "=");
2666         }
2667         if (ptr != NULL)
2668         {
2669             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2670             /* add the lambda component name as an empty string */
2671             if (lc->N > 0)
2672             {
2673                 if (!lambda_components_check(lc, 0, "", 0))
2674                 {
2675                     gmx_fatal(FARGS,
2676                               "lambda vector components in %s don't match those previously read",
2677                               fn);
2678                 }
2679             }
2680             else
2681             {
2682                 lambda_components_add(lc, "", 0);
2683             }
2684             lambda_vec_init(&(ba->native_lambda), lc);
2685             ba->native_lambda.val[0] = native_lambda;
2686         }
2687     }
2688
2689     return bFound;
2690 }
2691
2692 static void filename2lambda(const char *fn, xvg_t *ba)
2693 {
2694     double        lambda;
2695     const char   *ptr, *digitptr;
2696     char         *endptr;
2697     int           dirsep;
2698     ptr = fn;
2699     /* go to the end of the path string and search backward to find the last
2700        directory in the path which has to contain the value of lambda
2701      */
2702     while (ptr[1] != '\0')
2703     {
2704         ptr++;
2705     }
2706     /* searching backward to find the second directory separator */
2707     dirsep   = 0;
2708     digitptr = NULL;
2709     while (ptr >= fn)
2710     {
2711         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2712         {
2713             if (dirsep == 1)
2714             {
2715                 break;
2716             }
2717             dirsep++;
2718         }
2719         /* save the last position of a digit between the last two
2720            separators = in the last dirname */
2721         if (dirsep > 0 && isdigit(*ptr))
2722         {
2723             digitptr = ptr;
2724         }
2725         ptr--;
2726     }
2727     if (!digitptr)
2728     {
2729         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2730                   " last directory in the path '%s' does not contain a number", fn);
2731     }
2732     if (digitptr[-1] == '-')
2733     {
2734         digitptr--;
2735     }
2736     lambda = strtod(digitptr, &endptr);
2737     if (endptr == digitptr)
2738     {
2739         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2740     }
2741 }
2742
2743 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2744                                   lambda_components_t *lc)
2745 {
2746     int          i;
2747     char        *subtitle, **legend, *ptr;
2748     int          np;
2749     gmx_bool     native_lambda_read = FALSE;
2750     char         buf[STRLEN];
2751     lambda_vec_t lv;
2752
2753     xvg_init(ba);
2754
2755     ba->filename = fn;
2756
2757     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2758     if (!ba->y)
2759     {
2760         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2761     }
2762     /* Reorder the data */
2763     ba->t  = ba->y[0];
2764     for (i = 1; i < ba->nset; i++)
2765     {
2766         ba->y[i-1] = ba->y[i];
2767     }
2768     ba->nset--;
2769
2770     snew(ba->np, ba->nset);
2771     for (i = 0; i < ba->nset; i++)
2772     {
2773         ba->np[i] = np;
2774     }
2775
2776     ba->temp = -1;
2777     if (subtitle != NULL)
2778     {
2779         /* try to extract temperature */
2780         ptr = strstr(subtitle, "T =");
2781         if (ptr != NULL)
2782         {
2783             ptr += 3;
2784             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2785             {
2786                 if (ba->temp <= 0)
2787                 {
2788                     gmx_fatal(FARGS, "Found temperature of %g in file '%s'",
2789                               ba->temp, fn);
2790                 }
2791             }
2792         }
2793     }
2794     if (ba->temp < 0)
2795     {
2796         if (*temp <= 0)
2797         {
2798             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2799         }
2800         ba->temp = *temp;
2801     }
2802
2803     /* Try to deduce lambda from the subtitle */
2804     if (subtitle)
2805     {
2806         if (subtitle2lambda(subtitle, ba, fn, lc))
2807         {
2808             native_lambda_read = TRUE;
2809         }
2810     }
2811     snew(ba->lambda, ba->nset);
2812     if (legend == NULL)
2813     {
2814         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2815         if (ba->nset == 1)
2816         {
2817             if (!native_lambda_read)
2818             {
2819                 /* Deduce lambda from the file name */
2820                 filename2lambda(fn, ba);
2821                 native_lambda_read = TRUE;
2822             }
2823             ba->lambda[0] = ba->native_lambda;
2824         }
2825         else
2826         {
2827             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2828         }
2829     }
2830     else
2831     {
2832         for (i = 0; i < ba->nset; )
2833         {
2834             gmx_bool use = FALSE;
2835             /* Read lambda from the legend */
2836             lambda_vec_init( &(ba->lambda[i]), lc );
2837             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2838             use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2839             if (use)
2840             {
2841                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2842                 i++;
2843             }
2844             else
2845             {
2846                 int j;
2847                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2848                 for (j = i+1; j < ba->nset; j++)
2849                 {
2850                     ba->y[j-1]  = ba->y[j];
2851                     legend[j-1] = legend[j];
2852                 }
2853                 ba->nset--;
2854             }
2855         }
2856     }
2857
2858     if (!native_lambda_read)
2859     {
2860         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2861     }
2862
2863     if (legend != NULL)
2864     {
2865         for (i = 0; i < ba->nset-1; i++)
2866         {
2867             sfree(legend[i]);
2868         }
2869         sfree(legend);
2870     }
2871 }
2872
2873 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2874 {
2875     xvg_t     *barsim;
2876     samples_t *s;
2877     int        i;
2878     double    *lambda;
2879
2880     snew(barsim, 1);
2881
2882     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2883
2884     if (barsim->nset < 1)
2885     {
2886         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2887     }
2888
2889     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2890     {
2891         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2892     }
2893     *temp = barsim->temp;
2894
2895     /* now create a series of samples_t */
2896     snew(s, barsim->nset);
2897     for (i = 0; i < barsim->nset; i++)
2898     {
2899         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2900                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2901                                                    &(barsim->lambda[i])),
2902                      fn);
2903         s[i].du  = barsim->y[i];
2904         s[i].ndu = barsim->np[i];
2905         s[i].t   = barsim->t;
2906
2907         lambda_data_list_insert_sample(sd->lb, s+i);
2908     }
2909     {
2910         char buf[STRLEN];
2911
2912         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2913         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2914                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2915         for (i = 0; i < barsim->nset; i++)
2916         {
2917             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2918             printf("        %s (%d pts)\n", buf, s[i].ndu);
2919         }
2920     }
2921     printf("\n\n");
2922 }
2923
2924 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2925                                  double start_time, double delta_time,
2926                                  lambda_vec_t *native_lambda, double temp,
2927                                  double *last_t, const char *filename)
2928 {
2929     int           i, j;
2930     gmx_bool      allocated;
2931     double        old_foreign_lambda;
2932     lambda_vec_t *foreign_lambda;
2933     int           type;
2934     samples_t    *s; /* convenience pointer */
2935     int           startj;
2936
2937     /* check the block types etc. */
2938     if ( (blk->nsub < 3) ||
2939          (blk->sub[0].type != xdr_datatype_int) ||
2940          (blk->sub[1].type != xdr_datatype_double) ||
2941          (
2942              (blk->sub[2].type != xdr_datatype_float) &&
2943              (blk->sub[2].type != xdr_datatype_double)
2944          ) ||
2945          (blk->sub[0].nr < 1) ||
2946          (blk->sub[1].nr < 1) )
2947     {
2948         gmx_fatal(FARGS,
2949                   "Unexpected/corrupted block data in file %s around time %g.",
2950                   filename, start_time);
2951     }
2952
2953     snew(foreign_lambda, 1);
2954     lambda_vec_init(foreign_lambda, native_lambda->lc);
2955     lambda_vec_copy(foreign_lambda, native_lambda);
2956     type = blk->sub[0].ival[0];
2957     if (type == dhbtDH)
2958     {
2959         for (i = 0; i < native_lambda->lc->N; i++)
2960         {
2961             foreign_lambda->val[i] = blk->sub[1].dval[i];
2962         }
2963     }
2964     else
2965     {
2966         if (blk->sub[0].nr > 1)
2967         {
2968             foreign_lambda->dhdl = blk->sub[0].ival[1];
2969         }
2970         else
2971         {
2972             foreign_lambda->dhdl = 0;
2973         }
2974     }
2975
2976     if (!*smp)
2977     {
2978         /* initialize the samples structure if it's empty. */
2979         snew(*smp, 1);
2980         samples_init(*smp, native_lambda, foreign_lambda, temp,
2981                      type == dhbtDHDL, filename);
2982         (*smp)->start_time = start_time;
2983         (*smp)->delta_time = delta_time;
2984     }
2985
2986     /* set convenience pointer */
2987     s = *smp;
2988
2989     /* now double check */
2990     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2991     {
2992         char buf[STRLEN], buf2[STRLEN];
2993         lambda_vec_print(foreign_lambda, buf, FALSE);
2994         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2995         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2996         gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2997                   filename, start_time);
2998     }
2999
3000     /* make room for the data */
3001     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3002     {
3003         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3004             blk->sub[2].nr*2 : s->ndu_alloc;
3005         srenew(s->du_alloc, s->ndu_alloc);
3006         s->du = s->du_alloc;
3007     }
3008     startj   = s->ndu;
3009     s->ndu  += blk->sub[2].nr;
3010     s->ntot += blk->sub[2].nr;
3011     *ndu     = blk->sub[2].nr;
3012
3013     /* and copy the data*/
3014     for (j = 0; j < blk->sub[2].nr; j++)
3015     {
3016         if (blk->sub[2].type == xdr_datatype_float)
3017         {
3018             s->du[startj+j] = blk->sub[2].fval[j];
3019         }
3020         else
3021         {
3022             s->du[startj+j] = blk->sub[2].dval[j];
3023         }
3024     }
3025     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3026     {
3027         *last_t = start_time + blk->sub[2].nr*delta_time;
3028     }
3029 }
3030
3031 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3032                                       double start_time, double delta_time,
3033                                       lambda_vec_t *native_lambda, double temp,
3034                                       double *last_t, const char *filename)
3035 {
3036     int           i, j;
3037     samples_t    *s;
3038     int           nhist;
3039     double        old_foreign_lambda;
3040     lambda_vec_t *foreign_lambda;
3041     int           type;
3042     int           nbins[2];
3043
3044     /* check the block types etc. */
3045     if ( (blk->nsub < 2) ||
3046          (blk->sub[0].type != xdr_datatype_double) ||
3047          (blk->sub[1].type != xdr_datatype_large_int) ||
3048          (blk->sub[0].nr < 2)  ||
3049          (blk->sub[1].nr < 2) )
3050     {
3051         gmx_fatal(FARGS,
3052                   "Unexpected/corrupted block data in file %s around time %g",
3053                   filename, start_time);
3054     }
3055
3056     nhist = blk->nsub-2;
3057     if (nhist == 0)
3058     {
3059         return NULL;
3060     }
3061     if (nhist > 2)
3062     {
3063         gmx_fatal(FARGS,
3064                   "Unexpected/corrupted block data in file %s around time %g",
3065                   filename, start_time);
3066     }
3067
3068     snew(s, 1);
3069     *nsamples = 1;
3070
3071     snew(foreign_lambda, 1);
3072     lambda_vec_init(foreign_lambda, native_lambda->lc);
3073     lambda_vec_copy(foreign_lambda, native_lambda);
3074     type = (int)(blk->sub[1].lval[1]);
3075     if (type == dhbtDH)
3076     {
3077         double old_foreign_lambda;
3078
3079         old_foreign_lambda = blk->sub[0].dval[0];
3080         if (old_foreign_lambda >= 0)
3081         {
3082             foreign_lambda->val[0] = old_foreign_lambda;
3083             if (foreign_lambda->lc->N > 1)
3084             {
3085                 gmx_fatal(FARGS,
3086                           "Single-component lambda in multi-component file %s",
3087                           filename);
3088             }
3089         }
3090         else
3091         {
3092             for (i = 0; i < native_lambda->lc->N; i++)
3093             {
3094                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3095             }
3096         }
3097     }
3098     else
3099     {
3100         if (foreign_lambda->lc->N > 1)
3101         {
3102             if (blk->sub[1].nr < 3 + nhist)
3103             {
3104                 gmx_fatal(FARGS,
3105                           "Missing derivative coord in multi-component file %s",
3106                           filename);
3107             }
3108             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3109         }
3110         else
3111         {
3112             foreign_lambda->dhdl = 0;
3113         }
3114     }
3115
3116     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3117                  filename);
3118     snew(s->hist, 1);
3119
3120     for (i = 0; i < nhist; i++)
3121     {
3122         nbins[i] = blk->sub[i+2].nr;
3123     }
3124
3125     hist_init(s->hist, nhist, nbins);
3126
3127     for (i = 0; i < nhist; i++)
3128     {
3129         s->hist->x0[i] = blk->sub[1].lval[2+i];
3130         s->hist->dx[i] = blk->sub[0].dval[1];
3131         if (i == 1)
3132         {
3133             s->hist->dx[i] = -s->hist->dx[i];
3134         }
3135     }
3136
3137     s->hist->start_time = start_time;
3138     s->hist->delta_time = delta_time;
3139     s->start_time       = start_time;
3140     s->delta_time       = delta_time;
3141
3142     for (i = 0; i < nhist; i++)
3143     {
3144         int             nbin;
3145         gmx_large_int_t sum = 0;
3146
3147         for (j = 0; j < s->hist->nbin[i]; j++)
3148         {
3149             int binv = (int)(blk->sub[i+2].ival[j]);
3150
3151             s->hist->bin[i][j] = binv;
3152             sum               += binv;
3153
3154         }
3155         if (i == 0)
3156         {
3157             s->ntot      = sum;
3158             s->hist->sum = sum;
3159         }
3160         else
3161         {
3162             if (s->ntot != sum)
3163             {
3164                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3165                           filename);
3166             }
3167         }
3168     }
3169
3170     if (start_time + s->hist->sum*delta_time > *last_t)
3171     {
3172         *last_t = start_time + s->hist->sum*delta_time;
3173     }
3174     return s;
3175 }
3176
3177
3178 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3179 {
3180     int            i, j;
3181     ener_file_t    fp;
3182     t_enxframe    *fr;
3183     int            nre;
3184     gmx_enxnm_t   *enm           = NULL;
3185     double         first_t       = -1;
3186     double         last_t        = -1;
3187     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3188     int           *nhists        = NULL; /* array to keep count & print at end */
3189     int           *npts          = NULL; /* array to keep count & print at end */
3190     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3191     lambda_vec_t  *native_lambda;
3192     double         end_time;             /* the end time of the last batch of samples */
3193     int            nsamples = 0;
3194     lambda_vec_t   start_lambda;
3195
3196     fp = open_enx(fn, "r");
3197     do_enxnms(fp, &nre, &enm);
3198     snew(fr, 1);
3199
3200     snew(native_lambda, 1);
3201     start_lambda.lc = NULL;
3202
3203     while (do_enx(fp, fr))
3204     {
3205         /* count the data blocks */
3206         int    nblocks_raw  = 0;
3207         int    nblocks_hist = 0;
3208         int    nlam         = 0;
3209         int    k;
3210         /* DHCOLL block information: */
3211         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3212         double rtemp      = 0;
3213
3214         /* count the blocks and handle collection information: */
3215         for (i = 0; i < fr->nblock; i++)
3216         {
3217             if (fr->block[i].id == enxDHHIST)
3218             {
3219                 nblocks_hist++;
3220             }
3221             if (fr->block[i].id == enxDH)
3222             {
3223                 nblocks_raw++;
3224             }
3225             if (fr->block[i].id == enxDHCOLL)
3226             {
3227                 nlam++;
3228                 if ( (fr->block[i].nsub < 1) ||
3229                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3230                      (fr->block[i].sub[0].nr < 5))
3231                 {
3232                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3233                 }
3234
3235                 /* read the data from the DHCOLL block */
3236                 rtemp            =        fr->block[i].sub[0].dval[0];
3237                 start_time       =   fr->block[i].sub[0].dval[1];
3238                 delta_time       =   fr->block[i].sub[0].dval[2];
3239                 old_start_lambda = fr->block[i].sub[0].dval[3];
3240                 delta_lambda     = fr->block[i].sub[0].dval[4];
3241
3242                 if (delta_lambda > 0)
3243                 {
3244                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3245                 }
3246                 if ( ( *temp != rtemp) && (*temp > 0) )
3247                 {
3248                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3249                 }
3250                 *temp = rtemp;
3251
3252                 if (old_start_lambda >= 0)
3253                 {
3254                     if (sd->lc.N > 0)
3255                     {
3256                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3257                         {
3258                             gmx_fatal(FARGS,
3259                                       "lambda vector components in %s don't match those previously read",
3260                                       fn);
3261                         }
3262                     }
3263                     else
3264                     {
3265                         lambda_components_add(&(sd->lc), "", 0);
3266                     }
3267                     if (!start_lambda.lc)
3268                     {
3269                         lambda_vec_init(&start_lambda, &(sd->lc));
3270                     }
3271                     start_lambda.val[0] = old_start_lambda;
3272                 }
3273                 else
3274                 {
3275                     /* read lambda vector */
3276                     int      n_lambda_vec;
3277                     gmx_bool check = (sd->lc.N > 0);
3278                     if (fr->block[i].nsub < 2)
3279                     {
3280                         gmx_fatal(FARGS,
3281                                   "No lambda vector, but start_lambda=%g\n",
3282                                   old_start_lambda);
3283                     }
3284                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3285                     for (j = 0; j < n_lambda_vec; j++)
3286                     {
3287                         const char *name =
3288                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3289                         if (check)
3290                         {
3291                             /* check the components */
3292                             lambda_components_check(&(sd->lc), j, name,
3293                                                     strlen(name));
3294                         }
3295                         else
3296                         {
3297                             lambda_components_add(&(sd->lc), name,
3298                                                   strlen(name));
3299                         }
3300                     }
3301                     lambda_vec_init(&start_lambda, &(sd->lc));
3302                     start_lambda.index = fr->block[i].sub[1].ival[0];
3303                     for (j = 0; j < n_lambda_vec; j++)
3304                     {
3305                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3306                     }
3307                 }
3308                 if (first_t < 0)
3309                 {
3310                     first_t = start_time;
3311                 }
3312             }
3313         }
3314
3315         if (nlam != 1)
3316         {
3317             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3318         }
3319         if (nblocks_raw > 0 && nblocks_hist > 0)
3320         {
3321             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3322         }
3323
3324         if (nsamples > 0)
3325         {
3326             /* check the native lambda */
3327             if (!lambda_vec_same(&start_lambda, native_lambda) )
3328             {
3329                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
3330                           fn, native_lambda, start_lambda, start_time);
3331             }
3332             /* check the number of samples against the previous number */
3333             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3334             {
3335                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3336                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3337             }
3338             /* check whether last iterations's end time matches with
3339                the currrent start time */
3340             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3341             {
3342                 /* it didn't. We need to store our samples and reallocate */
3343                 for (i = 0; i < nsamples; i++)
3344                 {
3345                     if (samples_rawdh[i])
3346                     {
3347                         /* insert it into the existing list */
3348                         lambda_data_list_insert_sample(sd->lb,
3349                                                        samples_rawdh[i]);
3350                         /* and make sure we'll allocate a new one this time
3351                            around */
3352                         samples_rawdh[i] = NULL;
3353                     }
3354                 }
3355             }
3356         }
3357         else
3358         {
3359             /* this is the first round; allocate the associated data
3360                structures */
3361             /*native_lambda=start_lambda;*/
3362             lambda_vec_init(native_lambda, &(sd->lc));
3363             lambda_vec_copy(native_lambda, &start_lambda);
3364             nsamples = nblocks_raw+nblocks_hist;
3365             snew(nhists, nsamples);
3366             snew(npts, nsamples);
3367             snew(lambdas, nsamples);
3368             snew(samples_rawdh, nsamples);
3369             for (i = 0; i < nsamples; i++)
3370             {
3371                 nhists[i]        = 0;
3372                 npts[i]          = 0;
3373                 lambdas[i]       = NULL;
3374                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3375                                             ones contain values */
3376             }
3377         }
3378
3379         /* and read them */
3380         k = 0; /* counter for the lambdas, etc. arrays */
3381         for (i = 0; i < fr->nblock; i++)
3382         {
3383             if (fr->block[i].id == enxDH)
3384             {
3385                 int type = (fr->block[i].sub[0].ival[0]);
3386                 if (type == dhbtDH || type == dhbtDHDL)
3387                 {
3388                     int ndu;
3389                     read_edr_rawdh_block(&(samples_rawdh[k]),
3390                                          &ndu,
3391                                          &(fr->block[i]),
3392                                          start_time, delta_time,
3393                                          native_lambda, rtemp,
3394                                          &last_t, fn);
3395                     npts[k] += ndu;
3396                     if (samples_rawdh[k])
3397                     {
3398                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3399                     }
3400                     k++;
3401                 }
3402             }
3403             else if (fr->block[i].id == enxDHHIST)
3404             {
3405                 int type = (int)(fr->block[i].sub[1].lval[1]);
3406                 if (type == dhbtDH || type == dhbtDHDL)
3407                 {
3408                     int        j;
3409                     int        nb = 0;
3410                     samples_t *s; /* this is where the data will go */
3411                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3412                                             start_time, delta_time,
3413                                             native_lambda, rtemp,
3414                                             &last_t, fn);
3415                     nhists[k] += nb;
3416                     if (nb > 0)
3417                     {
3418                         lambdas[k] = s->foreign_lambda;
3419                     }
3420                     k++;
3421                     /* and insert the new sample immediately */
3422                     for (j = 0; j < nb; j++)
3423                     {
3424                         lambda_data_list_insert_sample(sd->lb, s+j);
3425                     }
3426                 }
3427             }
3428         }
3429     }
3430     /* Now store all our extant sample collections */
3431     for (i = 0; i < nsamples; i++)
3432     {
3433         if (samples_rawdh[i])
3434         {
3435             /* insert it into the existing list */
3436             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3437         }
3438     }
3439
3440
3441     {
3442         char buf[STRLEN];
3443         printf("\n");
3444         lambda_vec_print(native_lambda, buf, FALSE);
3445         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3446                fn, first_t, last_t, buf);
3447         for (i = 0; i < nsamples; i++)
3448         {
3449             if (lambdas[i])
3450             {
3451                 lambda_vec_print(lambdas[i], buf, TRUE);
3452                 if (nhists[i] > 0)
3453                 {
3454                     printf("        %s (%d hists)\n", buf, nhists[i]);
3455                 }
3456                 else
3457                 {
3458                     printf("        %s (%d pts)\n", buf, npts[i]);
3459                 }
3460             }
3461         }
3462     }
3463     printf("\n\n");
3464     sfree(npts);
3465     sfree(nhists);
3466     sfree(lambdas);
3467 }
3468
3469
3470 int gmx_bar(int argc, char *argv[])
3471 {
3472     static const char *desc[] = {
3473         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3474         "Bennett's acceptance ratio method (BAR). It also automatically",
3475         "adds series of individual free energies obtained with BAR into",
3476         "a combined free energy estimate.[PAR]",
3477
3478         "Every individual BAR free energy difference relies on two ",
3479         "simulations at different states: say state A and state B, as",
3480         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3481         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3482         "average of the Hamiltonian difference of state B given state A and",
3483         "vice versa.",
3484         "The energy differences to the other state must be calculated",
3485         "explicitly during the simulation. This can be done with",
3486         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3487
3488         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3489         "Two types of input files are supported:[BR]",
3490         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3491         "The files should have columns ",
3492         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3493         "The [GRK]lambda[grk] values are inferred ",
3494         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3495         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3496         "legends of Delta H",
3497         "[BR]",
3498         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3499         "[TT]-extp[tt] option for these files, it is assumed",
3500         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3501         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3502         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3503         "subtitle (if present), otherwise from a number in the subdirectory ",
3504         "in the file name.[PAR]",
3505
3506         "The [GRK]lambda[grk] of the simulation is parsed from ",
3507         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3508         "foreign [GRK]lambda[grk] values from the legend containing the ",
3509         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3510         "the legend line containing 'T ='.[PAR]",
3511
3512         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3513         "These can contain either lists of energy differences (see the ",
3514         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3515         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3516         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3517         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3518
3519         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3520         "the energy difference can also be extrapolated from the ",
3521         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3522         "option, which assumes that the system's Hamiltonian depends linearly",
3523         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3524
3525         "The free energy estimates are determined using BAR with bisection, ",
3526         "with the precision of the output set with [TT]-prec[tt]. ",
3527         "An error estimate taking into account time correlations ",
3528         "is made by splitting the data into blocks and determining ",
3529         "the free energy differences over those blocks and assuming ",
3530         "the blocks are independent. ",
3531         "The final error estimate is determined from the average variance ",
3532         "over 5 blocks. A range of block numbers for error estimation can ",
3533         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3534
3535         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3536         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3537         "samples. [BB]Note[bb] that when aggregating energy ",
3538         "differences/derivatives with different sampling intervals, this is ",
3539         "almost certainly not correct. Usually subsequent energies are ",
3540         "correlated and different time intervals mean different degrees ",
3541         "of correlation between samples.[PAR]",
3542
3543         "The results are split in two parts: the last part contains the final ",
3544         "results in kJ/mol, together with the error estimate for each part ",
3545         "and the total. The first part contains detailed free energy ",
3546         "difference estimates and phase space overlap measures in units of ",
3547         "kT (together with their computed error estimate). The printed ",
3548         "values are:[BR]",
3549         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3550         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3551         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3552         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3553         "[TT]*[tt]    s_A: an estimate of the relative entropy of A in B.[BR]",
3554         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3555
3556         "The relative entropy of both states in each other's ensemble can be ",
3557         "interpreted as a measure of phase space overlap: ",
3558         "the relative entropy s_A of the work samples of lambda_B in the ",
3559         "ensemble of lambda_A (and vice versa for s_B), is a ",
3560         "measure of the 'distance' between Boltzmann distributions of ",
3561         "the two states, that goes to zero for identical distributions. See ",
3562         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3563         "[PAR]",
3564         "The estimate of the expected per-sample standard deviation, as given ",
3565         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3566         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3567         "of the actual statistical error, because it assumes independent samples).[PAR]",
3568
3569         "To get a visual estimate of the phase space overlap, use the ",
3570         "[TT]-oh[tt] option to write series of histograms, together with the ",
3571         "[TT]-nbin[tt] option.[PAR]"
3572     };
3573     static real        begin    = 0, end = -1, temp = -1;
3574     int                nd       = 2, nbmin = 5, nbmax = 5;
3575     int                nbin     = 100;
3576     gmx_bool           use_dhdl = FALSE;
3577     gmx_bool           calc_s, calc_v;
3578     t_pargs            pa[] = {
3579         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3580         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3581         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3582         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3583         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3584         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3585         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3586         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3587     };
3588
3589     t_filenm           fnm[] = {
3590         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3591         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3592         { efXVG, "-o",  "bar",    ffOPTWR },
3593         { efXVG, "-oi", "barint", ffOPTWR },
3594         { efXVG, "-oh", "histogram", ffOPTWR }
3595     };
3596 #define NFILE asize(fnm)
3597
3598     int          f, i, j;
3599     int          nf = 0;    /* file counter */
3600     int          nbs;
3601     int          nfile_tot; /* total number of input files */
3602     int          nxvgfile = 0;
3603     int          nedrfile = 0;
3604     char       **fxvgnms;
3605     char       **fedrnms;
3606     sim_data_t   sim_data; /* the simulation data */
3607     barres_t    *results;  /* the results */
3608     int          nresults; /* number of results in results array */
3609
3610     double      *partsum;
3611     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3612     FILE        *fpb, *fpi;
3613     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3614     char         buf[STRLEN], buf2[STRLEN];
3615     char         ktformat[STRLEN], sktformat[STRLEN];
3616     char         kteformat[STRLEN], skteformat[STRLEN];
3617     output_env_t oenv;
3618     double       kT, beta;
3619     gmx_bool     result_OK = TRUE, bEE = TRUE;
3620
3621     gmx_bool     disc_err          = FALSE;
3622     double       sum_disc_err      = 0.; /* discretization error */
3623     gmx_bool     histrange_err     = FALSE;
3624     double       sum_histrange_err = 0.; /* histogram range error */
3625     double       stat_err          = 0.; /* statistical error */
3626
3627     CopyRight(stderr, argv[0]);
3628     parse_common_args(&argc, argv,
3629                       PCA_CAN_VIEW,
3630                       NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3631
3632     if (opt2bSet("-f", NFILE, fnm))
3633     {
3634         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3635     }
3636     if (opt2bSet("-g", NFILE, fnm))
3637     {
3638         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3639     }
3640
3641     sim_data_init(&sim_data);
3642 #if 0
3643     /* make linked list */
3644     lb = &lambda_head;
3645     lambda_data_init(lb, 0, 0);
3646     lb->next = lb;
3647     lb->prev = lb;
3648 #endif
3649
3650
3651     nfile_tot = nxvgfile + nedrfile;
3652
3653     if (nfile_tot == 0)
3654     {
3655         gmx_fatal(FARGS, "No input files!");
3656     }
3657
3658     if (nd < 0)
3659     {
3660         gmx_fatal(FARGS, "Can not have negative number of digits");
3661     }
3662     prec = pow(10, -nd);
3663
3664     snew(partsum, (nbmax+1)*(nbmax+1));
3665     nf = 0;
3666
3667     /* read in all files. First xvg files */
3668     for (f = 0; f < nxvgfile; f++)
3669     {
3670         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3671         nf++;
3672     }
3673     /* then .edr files */
3674     for (f = 0; f < nedrfile; f++)
3675     {
3676         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3677         nf++;
3678     }
3679
3680     /* fix the times to allow for equilibration */
3681     sim_data_impose_times(&sim_data, begin, end);
3682
3683     if (opt2bSet("-oh", NFILE, fnm))
3684     {
3685         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3686     }
3687
3688     /* assemble the output structures from the lambdas */
3689     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3690
3691     sum_disc_err = barres_list_max_disc_err(results, nresults);
3692
3693     if (nresults == 0)
3694     {
3695         printf("\nNo results to calculate.\n");
3696         return 0;
3697     }
3698
3699     if (sum_disc_err > prec)
3700     {
3701         prec = sum_disc_err;
3702         nd   = ceil(-log10(prec));
3703         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3704     }
3705
3706
3707     /*sprintf(lamformat,"%%6.3f");*/
3708     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3709     /* the format strings of the results in kT */
3710     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3711     sprintf( sktformat, "%%%ds", 6+nd);
3712     /* the format strings of the errors in kT */
3713     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3714     sprintf( skteformat, "%%%ds", 4+nd);
3715     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3716     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3717
3718
3719
3720     fpb = NULL;
3721     if (opt2bSet("-o", NFILE, fnm))
3722     {
3723         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3724         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3725                             "\\lambda", buf, exvggtXYDY, oenv);
3726     }
3727
3728     fpi = NULL;
3729     if (opt2bSet("-oi", NFILE, fnm))
3730     {
3731         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3732         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3733                        "\\lambda", buf, oenv);
3734     }
3735
3736
3737
3738     if (nbmin > nbmax)
3739     {
3740         nbmin = nbmax;
3741     }
3742
3743     /* first calculate results */
3744     bEE      = TRUE;
3745     disc_err = FALSE;
3746     for (f = 0; f < nresults; f++)
3747     {
3748         /* Determine the free energy difference with a factor of 10
3749          * more accuracy than requested for printing.
3750          */
3751         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3752                  &bEE, partsum);
3753
3754         if (results[f].dg_disc_err > prec/10.)
3755         {
3756             disc_err = TRUE;
3757         }
3758         if (results[f].dg_histrange_err > prec/10.)
3759         {
3760             histrange_err = TRUE;
3761         }
3762     }
3763
3764     /* print results in kT */
3765     kT   = BOLTZ*temp;
3766     beta = 1/kT;
3767
3768     printf("\nTemperature: %g K\n", temp);
3769
3770     printf("\nDetailed results in kT (see help for explanation):\n\n");
3771     printf("%6s ", " lam_A");
3772     printf("%6s ", " lam_B");
3773     printf(sktformat,  "DG ");
3774     if (bEE)
3775     {
3776         printf(skteformat, "+/- ");
3777     }
3778     if (disc_err)
3779     {
3780         printf(skteformat, "disc ");
3781     }
3782     if (histrange_err)
3783     {
3784         printf(skteformat, "range ");
3785     }
3786     printf(sktformat,  "s_A ");
3787     if (bEE)
3788     {
3789         printf(skteformat, "+/- " );
3790     }
3791     printf(sktformat,  "s_B ");
3792     if (bEE)
3793     {
3794         printf(skteformat, "+/- " );
3795     }
3796     printf(sktformat,  "stdev ");
3797     if (bEE)
3798     {
3799         printf(skteformat, "+/- ");
3800     }
3801     printf("\n");
3802     for (f = 0; f < nresults; f++)
3803     {
3804         lambda_vec_print_short(results[f].a->native_lambda, buf);
3805         printf("%s ", buf);
3806         lambda_vec_print_short(results[f].b->native_lambda, buf);
3807         printf("%s ", buf);
3808         printf(ktformat,  results[f].dg);
3809         printf(" ");
3810         if (bEE)
3811         {
3812             printf(kteformat, results[f].dg_err);
3813             printf(" ");
3814         }
3815         if (disc_err)
3816         {
3817             printf(kteformat, results[f].dg_disc_err);
3818             printf(" ");
3819         }
3820         if (histrange_err)
3821         {
3822             printf(kteformat, results[f].dg_histrange_err);
3823             printf(" ");
3824         }
3825         printf(ktformat,  results[f].sa);
3826         printf(" ");
3827         if (bEE)
3828         {
3829             printf(kteformat, results[f].sa_err);
3830             printf(" ");
3831         }
3832         printf(ktformat,  results[f].sb);
3833         printf(" ");
3834         if (bEE)
3835         {
3836             printf(kteformat, results[f].sb_err);
3837             printf(" ");
3838         }
3839         printf(ktformat,  results[f].dg_stddev);
3840         printf(" ");
3841         if (bEE)
3842         {
3843             printf(kteformat, results[f].dg_stddev_err);
3844         }
3845         printf("\n");
3846
3847         /* Check for negative relative entropy with a 95% certainty. */
3848         if (results[f].sa < -2*results[f].sa_err ||
3849             results[f].sb < -2*results[f].sb_err)
3850         {
3851             result_OK = FALSE;
3852         }
3853     }
3854
3855     if (!result_OK)
3856     {
3857         printf("\nWARNING: Some of these results violate the Second Law of "
3858                "Thermodynamics: \n"
3859                "         This is can be the result of severe undersampling, or "
3860                "(more likely)\n"
3861                "         there is something wrong with the simulations.\n");
3862     }
3863
3864
3865     /* final results in kJ/mol */
3866     printf("\n\nFinal results in kJ/mol:\n\n");
3867     dg_tot  = 0;
3868     for (f = 0; f < nresults; f++)
3869     {
3870
3871         if (fpi != NULL)
3872         {
3873             lambda_vec_print_short(results[f].a->native_lambda, buf);
3874             fprintf(fpi, xvg2format, buf, dg_tot);
3875         }
3876
3877
3878         if (fpb != NULL)
3879         {
3880             lambda_vec_print_intermediate(results[f].a->native_lambda,
3881                                           results[f].b->native_lambda,
3882                                           buf);
3883
3884             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3885         }
3886
3887         printf("point ");
3888         lambda_vec_print_short(results[f].a->native_lambda, buf);
3889         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3890         printf("%s - %s", buf, buf2);
3891         printf(",   DG ");
3892
3893         printf(dgformat, results[f].dg*kT);
3894         if (bEE)
3895         {
3896             printf(" +/- ");
3897             printf(dgformat, results[f].dg_err*kT);
3898         }
3899         if (histrange_err)
3900         {
3901             printf(" (max. range err. = ");
3902             printf(dgformat, results[f].dg_histrange_err*kT);
3903             printf(")");
3904             sum_histrange_err += results[f].dg_histrange_err*kT;
3905         }
3906
3907         printf("\n");
3908         dg_tot += results[f].dg;
3909     }
3910     printf("\n");
3911     printf("total ");
3912     lambda_vec_print_short(results[0].a->native_lambda, buf);
3913     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3914     printf("%s - %s", buf, buf2);
3915     printf(",   DG ");
3916
3917     printf(dgformat, dg_tot*kT);
3918     if (bEE)
3919     {
3920         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3921         printf(" +/- ");
3922         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3923     }
3924     printf("\n");
3925     if (disc_err)
3926     {
3927         printf("\nmaximum discretization error = ");
3928         printf(dgformat, sum_disc_err);
3929         if (bEE && stat_err < sum_disc_err)
3930         {
3931             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3932         }
3933     }
3934     if (histrange_err)
3935     {
3936         printf("\nmaximum histogram range error = ");
3937         printf(dgformat, sum_histrange_err);
3938         if (bEE && stat_err < sum_histrange_err)
3939         {
3940             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3941         }
3942
3943     }
3944     printf("\n");
3945
3946
3947     if (fpi != NULL)
3948     {
3949         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3950         fprintf(fpi, xvg2format, buf, dg_tot);
3951         ffclose(fpi);
3952     }
3953
3954     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3955     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3956
3957     thanx(stderr);
3958
3959     return 0;
3960 }