src/gromacs/gmxana/gmx_bar.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <math.h>
  44 #include <float.h>
  45
  46 #include "sysstuff.h"
  47 #include "typedefs.h"
  48 #include "smalloc.h"
  49 #include "futil.h"
  50 #include "statutil.h"
  51 #include "copyrite.h"
  52 #include "macros.h"
  53 #include "enxio.h"
  54 #include "physics.h"
  55 #include "gmx_fatal.h"
  56 #include "xvgr.h"
  57 #include "gmx_ana.h"
  58 #include "maths.h"
  59 #include "string2.h"
  60 #include "names.h"
  61 #include "mdebin.h"
  62
  63
  64 /* Structure for the names of lambda vector components */
  65 typedef struct lambda_components_t
  66 {
  67     char **names;  /* Array of strings with names for the lambda vector
  68                       components */
  69     int    N;      /* The number of components */
  70     int    Nalloc; /* The number of allocated components */
  71 } lambda_components_t;
  72
  73 /* Structure for a lambda vector or a dhdl derivative direction */
  74 typedef struct lambda_vec_t
  75 {
  76     double                    *val;   /* The lambda vector component values. Only valid if
  77                                          dhdl == -1 */
  78     int                        dhdl;  /* The coordinate index for the derivative described by this
  79                                          structure, or -1 */
  80     const lambda_components_t *lc;    /* the associated lambda_components
  81                                          structure */
  82     int                        index; /* The state number (init-lambda-state) of this lambda
  83                                          vector, if known. If not, it is set to -1 */
  84 } lambda_vec_t;
  85
  86 /* the dhdl.xvg data from a simulation */
  87 typedef struct xvg_t
  88 {
  89     const char   *filename;
  90     int           ftp;           /* file type */
  91     int           nset;          /* number of lambdas, including dhdl */
  92     int          *np;            /* number of data points (du or hists) per lambda */
  93     int           np_alloc;      /* number of points (du or hists) allocated */
  94     double        temp;          /* temperature */
  95     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  96     double       *t;             /* the times (of second index for y) */
  97     double      **y;             /* the dU values. y[0] holds the derivative, while
  98                                     further ones contain the energy differences between
  99                                     the native lambda and the 'foreign' lambdas. */
 100     lambda_vec_t  native_lambda; /* the native lambda */
 101
 102     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 103 } xvg_t;
 104
 105
 106 typedef struct hist_t
 107 {
 108     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 109     double          dx[2];                  /* the histogram spacing. The reverse
 110                                                dx is the negative of the forward dx.*/
 111     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 112                                                point(s) as int */
 113
 114     int             nbin[2];                /* the (forward+reverse) number of bins */
 115     gmx_large_int_t sum;                    /* the total number of counts. Must be
 116                                                the same for forward + reverse.  */
 117     int             nhist;                  /* number of hist datas (forward or reverse) */
 118
 119     double          start_time, delta_time; /* start time, end time of histogram */
 120 } hist_t;
 121
 122
 123 /* an aggregate of samples for partial free energy calculation */
 124 typedef struct samples_t
 125 {
 126     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 127     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 128     double        temp;           /* the temperature */
 129     gmx_bool      derivative;     /* whether this sample is a derivative */
 130
 131     /* The samples come either as either delta U lists: */
 132     int     ndu;                    /* the number of delta U samples */
 133     double *du;                     /* the delta u's */
 134     double *t;                      /* the times associated with those samples, or: */
 135     double  start_time, delta_time; /*start time and delta time for linear time*/
 136
 137     /* or as histograms: */
 138     hist_t *hist; /* a histogram */
 139
 140     /* allocation data: (not NULL for data 'owned' by this struct) */
 141     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 142     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 143     hist_t         *hist_alloc;          /* allocated hist */
 144
 145     gmx_large_int_t ntot;                /* total number of samples */
 146     const char     *filename;            /* the file name this sample comes from */
 147 } samples_t;
 148
 149 /* a sample range (start to end for du-style data, or boolean
 150     for both du-style data and histograms */
 151 typedef struct sample_range_t
 152 {
 153     int        start, end; /* start and end index for du style data */
 154     gmx_bool   use;        /* whether to use this sample */
 155
 156     samples_t *s;          /* the samples this range belongs to */
 157 } sample_range_t;
 158
 159
 160 /* a collection of samples for a partial free energy calculation
 161     (i.e. the collection of samples from one native lambda to one
 162     foreign lambda) */
 163 typedef struct sample_coll_t
 164 {
 165     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 166                                           in the histogram */
 167     lambda_vec_t   *foreign_lambda;    /* collection */
 168     double          temp;              /* the temperature */
 169
 170     int             nsamples;          /* the number of samples */
 171     samples_t     **s;                 /* the samples themselves */
 172     sample_range_t *r;                 /* the sample ranges */
 173     int             nsamples_alloc;    /* number of allocated samples */
 174
 175     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 176                                           this collection */
 177
 178     struct sample_coll_t *next, *prev; /* next and previous in the list */
 179 } sample_coll_t;
 180
 181 /* all the samples associated with a lambda point */
 182 typedef struct lambda_data_t
 183 {
 184     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 185     double                temp;        /* temperature */
 186
 187     sample_coll_t        *sc;          /* the samples */
 188
 189     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 190
 191     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 192 } lambda_data_t;
 193
 194 /* Top-level data structure of simulation data */
 195 typedef struct sim_data_t
 196 {
 197     lambda_data_t      *lb;      /* a lambda data linked list */
 198     lambda_data_t       lb_head; /* The head element of the linked list */
 199
 200     lambda_components_t lc;      /* the allowed components of the lambda
 201                                     vectors */
 202 } sim_data_t;
 203
 204 /* Top-level data structure with calculated values. */
 205 typedef struct {
 206     sample_coll_t *a, *b;            /* the simulation data */
 207
 208     double         dg;               /* the free energy difference */
 209     double         dg_err;           /* the free energy difference */
 210
 211     double         dg_disc_err;      /* discretization error */
 212     double         dg_histrange_err; /* histogram range error */
 213
 214     double         sa;               /* relative entropy of b in state a */
 215     double         sa_err;           /* error in sa */
 216     double         sb;               /* relative entropy of a in state b */
 217     double         sb_err;           /* error in sb */
 218
 219     double         dg_stddev;        /* expected dg stddev per sample */
 220     double         dg_stddev_err;    /* error in dg_stddev */
 221 } barres_t;
 222
 223
 224 /* Initialize a lambda_components structure */
 225 static void lambda_components_init(lambda_components_t *lc)
 226 {
 227     lc->N      = 0;
 228     lc->Nalloc = 2;
 229     snew(lc->names, lc->Nalloc);
 230 }
 231
 232 /* Add a component to a lambda_components structure */
 233 static void lambda_components_add(lambda_components_t *lc,
 234                                   const char *name, size_t name_length)
 235 {
 236     while (lc->N + 1 > lc->Nalloc)
 237     {
 238         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 239         srealloc( lc->names, lc->Nalloc );
 240     }
 241     snew(lc->names[lc->N], name_length+1);
 242     strncpy(lc->names[lc->N], name, name_length);
 243     lc->N++;
 244 }
 245
 246 /* check whether a component with index 'index' matches the given name, or
 247    is also NULL. Returns TRUE if this is the case.
 248    the string name does not need to end */
 249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 250                                         int                        index,
 251                                         const char                *name,
 252                                         size_t                     name_length)
 253 {
 254     size_t len;
 255     if (index >= lc->N)
 256     {
 257         return FALSE;
 258     }
 259     if (name == NULL && lc->names[index] == NULL)
 260     {
 261         return TRUE;
 262     }
 263     if ((name == NULL) != (lc->names[index] == NULL))
 264     {
 265         return FALSE;
 266     }
 267     len = strlen(lc->names[index]);
 268     if (len != name_length)
 269     {
 270         return FALSE;
 271     }
 272     if (strncmp(lc->names[index], name, name_length) == 0)
 273     {
 274         return TRUE;
 275     }
 276     return FALSE;
 277 }
 278
 279 /* Find the index of a given lambda component name, or -1 if not found */
 280 static int lambda_components_find(const lambda_components_t *lc,
 281                                   const char                *name,
 282                                   size_t                     name_length)
 283 {
 284     int i;
 285
 286     for (i = 0; i < lc->N; i++)
 287     {
 288         if (strncmp(lc->names[i], name, name_length) == 0)
 289         {
 290             return i;
 291         }
 292     }
 293     return -1;
 294 }
 295
 296
 297
 298 /* initialize a lambda vector */
 299 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 300 {
 301     snew(lv->val, lc->N);
 302     lv->index = -1;
 303     lv->dhdl  = -1;
 304     lv->lc    = lc;
 305 }
 306
 307 static void lambda_vec_destroy(lambda_vec_t *lv)
 308 {
 309     sfree(lv->val);
 310 }
 311
 312 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 313 {
 314     int i;
 315
 316     lambda_vec_init(lv, orig->lc);
 317     lv->dhdl  = orig->dhdl;
 318     lv->index = orig->index;
 319     for (i = 0; i < lv->lc->N; i++)
 320     {
 321         lv->val[i] = orig->val[i];
 322     }
 323 }
 324
 325 /* write a lambda vec to a preallocated string */
 326 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 327 {
 328     int    i;
 329     size_t np;
 330
 331     str[0] = 0; /* reset the string */
 332     if (lv->dhdl < 0)
 333     {
 334         if (named)
 335         {
 336             str += sprintf(str, "delta H to ");
 337         }
 338         if (lv->lc->N > 1)
 339         {
 340             str += sprintf(str, "(");
 341         }
 342         for (i = 0; i < lv->lc->N; i++)
 343         {
 344             str += sprintf(str, "%g", lv->val[i]);
 345             if (i < lv->lc->N-1)
 346             {
 347                 str += sprintf(str, ", ");
 348             }
 349         }
 350         if (lv->lc->N > 1)
 351         {
 352             str += sprintf(str, ")");
 353         }
 354     }
 355     else
 356     {
 357         /* this lambda vector describes a derivative */
 358         str += sprintf(str, "dH/dl");
 359         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 360         {
 361             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 362         }
 363     }
 364 }
 365
 366 /* write a shortened version of the lambda vec to a preallocated string */
 367 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 368 {
 369     int    i;
 370     size_t np;
 371
 372     if (lv->index >= 0)
 373     {
 374         sprintf(str, "%6d", lv->index);
 375     }
 376     else
 377     {
 378         if (lv->dhdl < 0)
 379         {
 380             sprintf(str, "%6.3f", lv->val[0]);
 381         }
 382         else
 383         {
 384             sprintf(str, "dH/dl[%d]", lv->dhdl);
 385         }
 386     }
 387 }
 388
 389 /* write an intermediate version of two lambda vecs to a preallocated string */
 390 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 391                                           const lambda_vec_t *b, char *str)
 392 {
 393     int    i;
 394     size_t np;
 395
 396     str[0] = 0;
 397     if ( (a->index >= 0) && (b->index >= 0) )
 398     {
 399         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 400     }
 401     else
 402     {
 403         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 404         {
 405             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 406         }
 407     }
 408 }
 409
 410
 411
 412 /* calculate the difference in lambda vectors: c = a-b.
 413    c must be initialized already, and a and b must describe non-derivative
 414    lambda points */
 415 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 416                             lambda_vec_t *c)
 417 {
 418     int i;
 419
 420     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 421     {
 422         gmx_fatal(FARGS,
 423                   "Trying to calculate the difference between derivatives instead of lambda points");
 424     }
 425     if ((a->lc != b->lc) || (a->lc != c->lc) )
 426     {
 427         gmx_fatal(FARGS,
 428                   "Trying to calculate the difference lambdas with differing basis set");
 429     }
 430     for (i = 0; i < a->lc->N; i++)
 431     {
 432         c->val[i] = a->val[i] - b->val[i];
 433     }
 434 }
 435
 436 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 437    a and b must describe non-derivative lambda points */
 438 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 439 {
 440     int    i;
 441     double ret = 0.;
 442
 443     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 444     {
 445         gmx_fatal(FARGS,
 446                   "Trying to calculate the difference between derivatives instead of lambda points");
 447     }
 448     if (a->lc != b->lc)
 449     {
 450         gmx_fatal(FARGS,
 451                   "Trying to calculate the difference lambdas with differing basis set");
 452     }
 453     for (i = 0; i < a->lc->N; i++)
 454     {
 455         double df = a->val[i] - b->val[i];
 456         ret += df*df;
 457     }
 458     return sqrt(ret);
 459 }
 460
 461
 462 /* check whether two lambda vectors are the same */
 463 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 464 {
 465     int i;
 466
 467     if (a->lc != b->lc)
 468     {
 469         return FALSE;
 470     }
 471     if (a->dhdl < 0)
 472     {
 473         for (i = 0; i < a->lc->N; i++)
 474         {
 475             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 476             {
 477                 return FALSE;
 478             }
 479         }
 480         return TRUE;
 481     }
 482     else
 483     {
 484         /* they're derivatives, so we check whether the indices match */
 485         return (a->dhdl == b->dhdl);
 486     }
 487 }
 488
 489 /* Compare the sort order of two foreign lambda vectors
 490
 491     returns 1 if a is 'bigger' than b,
 492     returns 0 if they're the same,
 493     returns -1 if a is 'smaller' than b.*/
 494 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 495                                        const lambda_vec_t *b)
 496 {
 497     int      i;
 498     double   norm_a    = 0, norm_b = 0;
 499     gmx_bool different = FALSE;
 500
 501     if (a->lc != b->lc)
 502     {
 503         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 504     }
 505     /* if either one has an index we sort based on that */
 506     if ((a->index >= 0) || (b->index >= 0))
 507     {
 508         if (a->index == b->index)
 509         {
 510             return 0;
 511         }
 512         return (a->index > b->index) ? 1 : -1;
 513     }
 514     if (a->dhdl >= 0 || b->dhdl >= 0)
 515     {
 516         /* lambda vectors that are derivatives always sort higher than those
 517            without derivatives */
 518         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 519         {
 520             return (a->dhdl >= 0) ? 1 : -1;
 521         }
 522         return a->dhdl > b->dhdl;
 523     }
 524
 525     /* neither has an index, so we can only sort on the lambda components,
 526        which is only valid if there is one component */
 527     for (i = 0; i < a->lc->N; i++)
 528     {
 529         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 530         {
 531             different = TRUE;
 532         }
 533         norm_a += a->val[i]*a->val[i];
 534         norm_b += b->val[i]*b->val[i];
 535     }
 536     if (!different)
 537     {
 538         return 0;
 539     }
 540     return norm_a > norm_b;
 541 }
 542
 543 /* Compare the sort order of two native lambda vectors
 544
 545     returns 1 if a is 'bigger' than b,
 546     returns 0 if they're the same,
 547     returns -1 if a is 'smaller' than b.*/
 548 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 549                                       const lambda_vec_t *b)
 550 {
 551     int i;
 552
 553     if (a->lc != b->lc)
 554     {
 555         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 556     }
 557     /* if either one has an index we sort based on that */
 558     if ((a->index >= 0) || (b->index >= 0))
 559     {
 560         if (a->index == b->index)
 561         {
 562             return 0;
 563         }
 564         return (a->index > b->index) ? 1 : -1;
 565     }
 566     /* neither has an index, so we can only sort on the lambda components,
 567        which is only valid if there is one component */
 568     if (a->lc->N > 1)
 569     {
 570         gmx_fatal(FARGS,
 571                   "Can't compare lambdas with no index and > 1 component");
 572     }
 573     if (a->dhdl >= 0 || b->dhdl >= 0)
 574     {
 575         gmx_fatal(FARGS,
 576                   "Can't compare native lambdas that are derivatives");
 577     }
 578     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 579     {
 580         return 0;
 581     }
 582     return a->val[0] > b->val[0] ? 1 : -1;
 583 }
 584
 585
 586
 587
 588 static void hist_init(hist_t *h, int nhist, int *nbin)
 589 {
 590     int i;
 591     if (nhist > 2)
 592     {
 593         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 594     }
 595     for (i = 0; i < nhist; i++)
 596     {
 597         snew(h->bin[i], nbin[i]);
 598         h->x0[i]      = 0;
 599         h->nbin[i]    = nbin[i];
 600         h->start_time = h->delta_time = 0;
 601         h->dx[i]      = 0;
 602     }
 603     h->sum   = 0;
 604     h->nhist = nhist;
 605 }
 606
 607 static void hist_destroy(hist_t *h)
 608 {
 609     sfree(h->bin);
 610 }
 611
 612
 613 static void xvg_init(xvg_t *ba)
 614 {
 615     ba->filename = NULL;
 616     ba->nset     = 0;
 617     ba->np_alloc = 0;
 618     ba->np       = NULL;
 619     ba->y        = NULL;
 620 }
 621
 622 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 623                          lambda_vec_t *foreign_lambda, double temp,
 624                          gmx_bool derivative, const char *filename)
 625 {
 626     s->native_lambda  = native_lambda;
 627     s->foreign_lambda = foreign_lambda;
 628     s->temp           = temp;
 629     s->derivative     = derivative;
 630
 631     s->ndu        = 0;
 632     s->du         = NULL;
 633     s->t          = NULL;
 634     s->start_time = s->delta_time = 0;
 635     s->hist       = NULL;
 636     s->du_alloc   = NULL;
 637     s->t_alloc    = NULL;
 638     s->hist_alloc = NULL;
 639     s->ndu_alloc  = 0;
 640     s->nt_alloc   = 0;
 641
 642     s->ntot     = 0;
 643     s->filename = filename;
 644 }
 645
 646 static void sample_range_init(sample_range_t *r, samples_t *s)
 647 {
 648     r->start = 0;
 649     r->end   = s->ndu;
 650     r->use   = TRUE;
 651     r->s     = NULL;
 652 }
 653
 654 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 655                              lambda_vec_t *foreign_lambda, double temp)
 656 {
 657     sc->native_lambda  = native_lambda;
 658     sc->foreign_lambda = foreign_lambda;
 659     sc->temp           = temp;
 660
 661     sc->nsamples       = 0;
 662     sc->s              = NULL;
 663     sc->r              = NULL;
 664     sc->nsamples_alloc = 0;
 665
 666     sc->ntot = 0;
 667     sc->next = sc->prev = NULL;
 668 }
 669
 670 static void sample_coll_destroy(sample_coll_t *sc)
 671 {
 672     /* don't free the samples themselves */
 673     sfree(sc->r);
 674     sfree(sc->s);
 675 }
 676
 677
 678 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 679                              double temp)
 680 {
 681     l->lambda = native_lambda;
 682     l->temp   = temp;
 683
 684     l->next = NULL;
 685     l->prev = NULL;
 686
 687     l->sc = &(l->sc_head);
 688
 689     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 690     l->sc->next = l->sc;
 691     l->sc->prev = l->sc;
 692 }
 693
 694 static void barres_init(barres_t *br)
 695 {
 696     br->dg            = 0;
 697     br->dg_err        = 0;
 698     br->sa            = 0;
 699     br->sa_err        = 0;
 700     br->sb            = 0;
 701     br->sb_err        = 0;
 702     br->dg_stddev     = 0;
 703     br->dg_stddev_err = 0;
 704
 705     br->a = NULL;
 706     br->b = NULL;
 707 }
 708
 709
 710 /* calculate the total number of samples in a sample collection */
 711 static void sample_coll_calc_ntot(sample_coll_t *sc)
 712 {
 713     int i;
 714
 715     sc->ntot = 0;
 716     for (i = 0; i < sc->nsamples; i++)
 717     {
 718         if (sc->r[i].use)
 719         {
 720             if (sc->s[i]->hist)
 721             {
 722                 sc->ntot += sc->s[i]->ntot;
 723             }
 724             else
 725             {
 726                 sc->ntot += sc->r[i].end - sc->r[i].start;
 727             }
 728         }
 729     }
 730 }
 731
 732
 733 /* find the barsamples_t associated with a lambda that corresponds to
 734    a specific foreign lambda */
 735 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 736                                                    lambda_vec_t  *foreign_lambda)
 737 {
 738     sample_coll_t *sc = l->sc->next;
 739
 740     while (sc != l->sc)
 741     {
 742         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 743         {
 744             return sc;
 745         }
 746         sc = sc->next;
 747     }
 748
 749     return NULL;
 750 }
 751
 752 /* insert li into an ordered list of lambda_colls */
 753 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 754 {
 755     sample_coll_t *scn = l->sc->next;
 756     while ( (scn != l->sc) )
 757     {
 758         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 759         {
 760             break;
 761         }
 762         scn = scn->next;
 763     }
 764     /* now insert it before the found scn */
 765     sc->next        = scn;
 766     sc->prev        = scn->prev;
 767     scn->prev->next = sc;
 768     scn->prev       = sc;
 769 }
 770
 771 /* insert li into an ordered list of lambdas */
 772 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 773 {
 774     lambda_data_t *lc = head->next;
 775     while (lc != head)
 776     {
 777         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 778         {
 779             break;
 780         }
 781         lc = lc->next;
 782     }
 783     /* now insert ourselves before the found lc */
 784     li->next       = lc;
 785     li->prev       = lc->prev;
 786     lc->prev->next = li;
 787     lc->prev       = li;
 788 }
 789
 790 /* insert a sample and a sample_range into a sample_coll. The
 791     samples are stored as a pointer, the range is copied. */
 792 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 793                                       sample_range_t *r)
 794 {
 795     /* first check if it belongs here */
 796     if (sc->temp != s->temp)
 797     {
 798         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 799                   s->filename, sc->next->s[0]->filename);
 800     }
 801     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 802     {
 803         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 804                   s->filename, sc->next->s[0]->filename);
 805     }
 806     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 807     {
 808         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 809                   s->filename, sc->next->s[0]->filename);
 810     }
 811
 812     /* check if there's room */
 813     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 814     {
 815         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 816         srenew(sc->s, sc->nsamples_alloc);
 817         srenew(sc->r, sc->nsamples_alloc);
 818     }
 819     sc->s[sc->nsamples] = s;
 820     sc->r[sc->nsamples] = *r;
 821     sc->nsamples++;
 822
 823     sample_coll_calc_ntot(sc);
 824 }
 825
 826 /* insert a sample into a lambda_list, creating the right sample_coll if
 827    neccesary */
 828 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 829 {
 830     gmx_bool       found = FALSE;
 831     sample_coll_t *sc;
 832     sample_range_t r;
 833
 834     lambda_data_t *l = head->next;
 835
 836     /* first search for the right lambda_data_t */
 837     while (l != head)
 838     {
 839         if (lambda_vec_same(l->lambda, s->native_lambda) )
 840         {
 841             found = TRUE;
 842             break;
 843         }
 844         l = l->next;
 845     }
 846
 847     if (!found)
 848     {
 849         snew(l, 1);                                     /* allocate a new one */
 850         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 851         lambda_data_insert_lambda(head, l);             /* add it to the list */
 852     }
 853
 854     /* now look for a sample collection */
 855     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 856     if (!sc)
 857     {
 858         snew(sc, 1); /* allocate a new one */
 859         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 860         lambda_data_insert_sample_coll(l, sc);
 861     }
 862
 863     /* now insert the samples into the sample coll */
 864     sample_range_init(&r, s);
 865     sample_coll_insert_sample(sc, s, &r);
 866 }
 867
 868
 869 /* make a histogram out of a sample collection */
 870 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 871                                   int *nbin_alloc, int *nbin,
 872                                   double *dx, double *xmin, int nbin_default)
 873 {
 874     int      i, j, k;
 875     gmx_bool dx_set   = FALSE;
 876     gmx_bool xmin_set = FALSE;
 877
 878     gmx_bool xmax_set      = FALSE;
 879     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 880                                        limits of a histogram */
 881     double   xmax          = -1;
 882
 883     /* first determine dx and xmin; try the histograms */
 884     for (i = 0; i < sc->nsamples; i++)
 885     {
 886         if (sc->s[i]->hist)
 887         {
 888             hist_t *hist = sc->s[i]->hist;
 889             for (k = 0; k < hist->nhist; k++)
 890             {
 891                 double hdx      = hist->dx[k];
 892                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 893
 894                 /* we use the biggest dx*/
 895                 if ( (!dx_set) || hist->dx[0] > *dx)
 896                 {
 897                     dx_set = TRUE;
 898                     *dx    = hist->dx[0];
 899                 }
 900                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 901                 {
 902                     xmin_set = TRUE;
 903                     *xmin    = (hist->x0[k]*hdx);
 904                 }
 905
 906                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 907                 {
 908                     xmax_set = TRUE;
 909                     xmax     = xmax_now;
 910                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 911                     {
 912                         xmax_set_hard = TRUE;
 913                     }
 914                 }
 915                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 916                 {
 917                     xmax_set_hard = TRUE;
 918                     xmax          = xmax_now;
 919                 }
 920             }
 921         }
 922     }
 923     /* and the delta us */
 924     for (i = 0; i < sc->nsamples; i++)
 925     {
 926         if (sc->s[i]->ndu > 0)
 927         {
 928             /* determine min and max */
 929             int    starti  = sc->r[i].start;
 930             int    endi    = sc->r[i].end;
 931             double du_xmin = sc->s[i]->du[starti];
 932             double du_xmax = sc->s[i]->du[starti];
 933             for (j = starti+1; j < endi; j++)
 934             {
 935                 if (sc->s[i]->du[j] < du_xmin)
 936                 {
 937                     du_xmin = sc->s[i]->du[j];
 938                 }
 939                 if (sc->s[i]->du[j] > du_xmax)
 940                 {
 941                     du_xmax = sc->s[i]->du[j];
 942                 }
 943             }
 944
 945             /* and now change the limits */
 946             if ( (!xmin_set) || (du_xmin < *xmin) )
 947             {
 948                 xmin_set = TRUE;
 949                 *xmin    = du_xmin;
 950             }
 951             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 952             {
 953                 xmax_set = TRUE;
 954                 xmax     = du_xmax;
 955             }
 956         }
 957     }
 958
 959     if (!xmax_set || !xmin_set)
 960     {
 961         *nbin = 0;
 962         return;
 963     }
 964
 965
 966     if (!dx_set)
 967     {
 968         *nbin = nbin_default;
 969         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 970                                                be 0, and we count from 0 */
 971     }
 972     else
 973     {
 974         *nbin = (xmax-(*xmin))/(*dx);
 975     }
 976
 977     if (*nbin > *nbin_alloc)
 978     {
 979         *nbin_alloc = *nbin;
 980         srenew(*bin, *nbin_alloc);
 981     }
 982
 983     /* reset the histogram */
 984     for (i = 0; i < (*nbin); i++)
 985     {
 986         (*bin)[i] = 0;
 987     }
 988
 989     /* now add the actual data */
 990     for (i = 0; i < sc->nsamples; i++)
 991     {
 992         if (sc->s[i]->hist)
 993         {
 994             hist_t *hist = sc->s[i]->hist;
 995             for (k = 0; k < hist->nhist; k++)
 996             {
 997                 double hdx       = hist->dx[k];
 998                 double xmin_hist = hist->x0[k]*hdx;
 999                 for (j = 0; j < hist->nbin[k]; j++)
1000                 {
1001                     /* calculate the bin corresponding to the middle of the
1002                        original bin */
1003                     double x     = hdx*(j+0.5) + xmin_hist;
1004                     int    binnr = (int)((x-(*xmin))/(*dx));
1005
1006                     if (binnr >= *nbin || binnr < 0)
1007                     {
1008                         binnr = (*nbin)-1;
1009                     }
1010
1011                     (*bin)[binnr] += hist->bin[k][j];
1012                 }
1013             }
1014         }
1015         else
1016         {
1017             int starti = sc->r[i].start;
1018             int endi   = sc->r[i].end;
1019             for (j = starti; j < endi; j++)
1020             {
1021                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1022                 if (binnr >= *nbin || binnr < 0)
1023                 {
1024                     binnr = (*nbin)-1;
1025                 }
1026
1027                 (*bin)[binnr]++;
1028             }
1029         }
1030     }
1031 }
1032
1033 /* write a collection of histograms to a file */
1034 void sim_data_histogram(sim_data_t *sd, const char *filename,
1035                         int nbin_default, const output_env_t oenv)
1036 {
1037     char           label_x[STRLEN];
1038     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1039     const char    *title   = "N(\\DeltaH)";
1040     const char    *label_y = "Samples";
1041     FILE          *fp;
1042     lambda_data_t *bl;
1043     int            nsets     = 0;
1044     char         **setnames  = NULL;
1045     gmx_bool       first_set = FALSE;
1046     /* histogram data: */
1047     int           *hist       = NULL;
1048     int            nbin       = 0;
1049     int            nbin_alloc = 0;
1050     double         dx         = 0;
1051     double         min        = 0;
1052     int            i;
1053     lambda_data_t *bl_head = sd->lb;
1054
1055     printf("\nWriting histogram to %s\n", filename);
1056     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1057
1058     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1059
1060     /* first get all the set names */
1061     bl = bl_head->next;
1062     /* iterate over all lambdas */
1063     while (bl != bl_head)
1064     {
1065         sample_coll_t *sc = bl->sc->next;
1066
1067         /* iterate over all samples */
1068         while (sc != bl->sc)
1069         {
1070             char buf[STRLEN], buf2[STRLEN];
1071
1072             nsets++;
1073             srenew(setnames, nsets);
1074             snew(setnames[nsets-1], STRLEN);
1075             if (sc->foreign_lambda->dhdl < 0)
1076             {
1077                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1078                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1079                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1080                         deltag, lambda, buf2, lambda, buf);
1081             }
1082             else
1083             {
1084                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1085                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1086                         dhdl, lambda, buf);
1087             }
1088             sc = sc->next;
1089         }
1090
1091         bl = bl->next;
1092     }
1093     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1094
1095
1096     /* now make the histograms */
1097     bl = bl_head->next;
1098     /* iterate over all lambdas */
1099     while (bl != bl_head)
1100     {
1101         sample_coll_t *sc = bl->sc->next;
1102
1103         /* iterate over all samples */
1104         while (sc != bl->sc)
1105         {
1106             if (!first_set)
1107             {
1108                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1109             }
1110
1111             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1112                                   nbin_default);
1113
1114             for (i = 0; i < nbin; i++)
1115             {
1116                 double xmin = i*dx + min;
1117                 double xmax = (i+1)*dx + min;
1118
1119                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1120             }
1121
1122             first_set = FALSE;
1123             sc        = sc->next;
1124         }
1125
1126         bl = bl->next;
1127     }
1128
1129     if (hist)
1130     {
1131         sfree(hist);
1132     }
1133
1134     xvgrclose(fp);
1135 }
1136
1137 /* create a collection (array) of barres_t object given a ordered linked list
1138    of barlamda_t sample collections */
1139 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1140                                     gmx_bool use_dhdl)
1141 {
1142     lambda_data_t *bl;
1143     int            nlambda = 0;
1144     barres_t      *res;
1145     int            i;
1146     gmx_bool       dhdl    = FALSE;
1147     gmx_bool       first   = TRUE;
1148     lambda_data_t *bl_head = sd->lb;
1149
1150     /* first count the lambdas */
1151     bl = bl_head->next;
1152     while (bl != bl_head)
1153     {
1154         nlambda++;
1155         bl = bl->next;
1156     }
1157     snew(res, nlambda-1);
1158
1159     /* next put the right samples in the res */
1160     *nres = 0;
1161     bl    = bl_head->next->next; /* we start with the second one. */
1162     while (bl != bl_head)
1163     {
1164         sample_coll_t *sc, *scprev;
1165         barres_t      *br = &(res[*nres]);
1166         /* there is always a previous one. we search for that as a foreign
1167            lambda: */
1168         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1169         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1170
1171         barres_init(br);
1172
1173         if (use_dhdl)
1174         {
1175             /* we use dhdl */
1176
1177             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1178             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1179
1180             if (first)
1181             {
1182                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1183                 dhdl = TRUE;
1184             }
1185             if (!dhdl)
1186             {
1187                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1188             }
1189         }
1190         else if (!scprev && !sc)
1191         {
1192             gmx_fatal(FARGS, "There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1193         }
1194
1195         /* normal delta H */
1196         if (!scprev)
1197         {
1198             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %g\nin the files for lambda = %g", bl->lambda, bl->prev->lambda);
1199         }
1200         if (!sc)
1201         {
1202             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %g\nin the files for lambda = %g", bl->prev->lambda, bl->lambda);
1203         }
1204         br->a = scprev;
1205         br->b = sc;
1206
1207         first = FALSE;
1208         (*nres)++;
1209         bl = bl->next;
1210     }
1211     return res;
1212 }
1213
1214 /* estimate the maximum discretization error */
1215 static double barres_list_max_disc_err(barres_t *res, int nres)
1216 {
1217     int    i, j;
1218     double disc_err = 0.;
1219     double delta_lambda;
1220
1221     for (i = 0; i < nres; i++)
1222     {
1223         barres_t *br = &(res[i]);
1224
1225         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1226                                            br->a->native_lambda);
1227
1228         for (j = 0; j < br->a->nsamples; j++)
1229         {
1230             if (br->a->s[j]->hist)
1231             {
1232                 double Wfac = 1.;
1233                 if (br->a->s[j]->derivative)
1234                 {
1235                     Wfac =  delta_lambda;
1236                 }
1237
1238                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1239             }
1240         }
1241         for (j = 0; j < br->b->nsamples; j++)
1242         {
1243             if (br->b->s[j]->hist)
1244             {
1245                 double Wfac = 1.;
1246                 if (br->b->s[j]->derivative)
1247                 {
1248                     Wfac =  delta_lambda;
1249                 }
1250                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1251             }
1252         }
1253     }
1254     return disc_err;
1255 }
1256
1257
1258 /* impose start and end times on a sample collection, updating sample_ranges */
1259 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1260                                      double end_t)
1261 {
1262     int i;
1263     for (i = 0; i < sc->nsamples; i++)
1264     {
1265         samples_t      *s = sc->s[i];
1266         sample_range_t *r = &(sc->r[i]);
1267         if (s->hist)
1268         {
1269             double end_time = s->hist->delta_time*s->hist->sum +
1270                 s->hist->start_time;
1271             if (s->hist->start_time < begin_t || end_time > end_t)
1272             {
1273                 r->use = FALSE;
1274             }
1275         }
1276         else
1277         {
1278             if (!s->t)
1279             {
1280                 double end_time;
1281                 if (s->start_time < begin_t)
1282                 {
1283                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1284                 }
1285                 end_time = s->delta_time*s->ndu + s->start_time;
1286                 if (end_time > end_t)
1287                 {
1288                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1289                 }
1290             }
1291             else
1292             {
1293                 int j;
1294                 for (j = 0; j < s->ndu; j++)
1295                 {
1296                     if (s->t[j] < begin_t)
1297                     {
1298                         r->start = j;
1299                     }
1300
1301                     if (s->t[j] >= end_t)
1302                     {
1303                         r->end = j;
1304                         break;
1305                     }
1306                 }
1307             }
1308             if (r->start > r->end)
1309             {
1310                 r->use = FALSE;
1311             }
1312         }
1313     }
1314     sample_coll_calc_ntot(sc);
1315 }
1316
1317 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1318 {
1319     double         first_t, last_t;
1320     double         begin_t, end_t;
1321     lambda_data_t *lc;
1322     lambda_data_t *head = sd->lb;
1323     int            j;
1324
1325     if (begin <= 0 && end < 0)
1326     {
1327         return;
1328     }
1329
1330     /* first determine the global start and end times */
1331     first_t = -1;
1332     last_t  = -1;
1333     lc      = head->next;
1334     while (lc != head)
1335     {
1336         sample_coll_t *sc = lc->sc->next;
1337         while (sc != lc->sc)
1338         {
1339             for (j = 0; j < sc->nsamples; j++)
1340             {
1341                 double start_t, end_t;
1342
1343                 start_t = sc->s[j]->start_time;
1344                 end_t   =   sc->s[j]->start_time;
1345                 if (sc->s[j]->hist)
1346                 {
1347                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1348                 }
1349                 else
1350                 {
1351                     if (sc->s[j]->t)
1352                     {
1353                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1354                     }
1355                     else
1356                     {
1357                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1358                     }
1359                 }
1360
1361                 if (start_t < first_t || first_t < 0)
1362                 {
1363                     first_t = start_t;
1364                 }
1365                 if (end_t > last_t)
1366                 {
1367                     last_t = end_t;
1368                 }
1369             }
1370             sc = sc->next;
1371         }
1372         lc = lc->next;
1373     }
1374
1375     /* calculate the actual times */
1376     if (begin > 0)
1377     {
1378         begin_t = begin;
1379     }
1380     else
1381     {
1382         begin_t = first_t;
1383     }
1384
1385     if (end > 0)
1386     {
1387         end_t = end;
1388     }
1389     else
1390     {
1391         end_t = last_t;
1392     }
1393     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1394
1395     if (begin_t > end_t)
1396     {
1397         return;
1398     }
1399     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1400
1401     /* then impose them */
1402     lc = head->next;
1403     while (lc != head)
1404     {
1405         sample_coll_t *sc = lc->sc->next;
1406         while (sc != lc->sc)
1407         {
1408             sample_coll_impose_times(sc, begin_t, end_t);
1409             sc = sc->next;
1410         }
1411         lc = lc->next;
1412     }
1413 }
1414
1415
1416 /* create subsample i out of ni from an existing sample_coll */
1417 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1418                                              sample_coll_t *sc_orig,
1419                                              int i, int ni)
1420 {
1421     int             j;
1422     int             hist_start, hist_end;
1423
1424     gmx_large_int_t ntot_start;
1425     gmx_large_int_t ntot_end;
1426     gmx_large_int_t ntot_so_far;
1427
1428     *sc = *sc_orig; /* just copy all fields */
1429
1430     /* allocate proprietary memory */
1431     snew(sc->s, sc_orig->nsamples);
1432     snew(sc->r, sc_orig->nsamples);
1433
1434     /* copy the samples */
1435     for (j = 0; j < sc_orig->nsamples; j++)
1436     {
1437         sc->s[j] = sc_orig->s[j];
1438         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1439     }
1440
1441     /* now fix start and end fields */
1442     /* the casts avoid possible overflows */
1443     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1444     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1445     ntot_so_far = 0;
1446     for (j = 0; j < sc->nsamples; j++)
1447     {
1448         gmx_large_int_t ntot_add;
1449         gmx_large_int_t new_start, new_end;
1450
1451         if (sc->r[j].use)
1452         {
1453             if (sc->s[j]->hist)
1454             {
1455                 ntot_add = sc->s[j]->hist->sum;
1456             }
1457             else
1458             {
1459                 ntot_add = sc->r[j].end - sc->r[j].start;
1460             }
1461         }
1462         else
1463         {
1464             ntot_add = 0;
1465         }
1466
1467         if (!sc->s[j]->hist)
1468         {
1469             if (ntot_so_far < ntot_start)
1470             {
1471                 /* adjust starting point */
1472                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1473             }
1474             else
1475             {
1476                 new_start = sc->r[j].start;
1477             }
1478             /* adjust end point */
1479             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1480             if (new_end > sc->r[j].end)
1481             {
1482                 new_end = sc->r[j].end;
1483             }
1484
1485             /* check if we're in range at all */
1486             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1487             {
1488                 new_start = 0;
1489                 new_end   = 0;
1490             }
1491             /* and write the new range */
1492             sc->r[j].start = (int)new_start;
1493             sc->r[j].end   = (int)new_end;
1494         }
1495         else
1496         {
1497             if (sc->r[j].use)
1498             {
1499                 double overlap;
1500                 double ntot_start_norm, ntot_end_norm;
1501                 /* calculate the amount of overlap of the
1502                    desired range (ntot_start -- ntot_end) onto
1503                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1504
1505                 /* first calculate normalized bounds
1506                    (where 0 is the start of the hist range, and 1 the end) */
1507                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1508                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1509
1510                 /* now fix the boundaries */
1511                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1512                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1513
1514                 /* and calculate the overlap */
1515                 overlap = ntot_end_norm - ntot_start_norm;
1516
1517                 if (overlap > 0.95) /* we allow for 5% slack */
1518                 {
1519                     sc->r[j].use = TRUE;
1520                 }
1521                 else if (overlap < 0.05)
1522                 {
1523                     sc->r[j].use = FALSE;
1524                 }
1525                 else
1526                 {
1527                     return FALSE;
1528                 }
1529             }
1530         }
1531         ntot_so_far += ntot_add;
1532     }
1533     sample_coll_calc_ntot(sc);
1534
1535     return TRUE;
1536 }
1537
1538 /* calculate minimum and maximum work values in sample collection */
1539 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1540                                 double *Wmin, double *Wmax)
1541 {
1542     int i, j;
1543
1544     *Wmin = FLT_MAX;
1545     *Wmax = -FLT_MAX;
1546
1547     for (i = 0; i < sc->nsamples; i++)
1548     {
1549         samples_t      *s = sc->s[i];
1550         sample_range_t *r = &(sc->r[i]);
1551         if (r->use)
1552         {
1553             if (!s->hist)
1554             {
1555                 for (j = r->start; j < r->end; j++)
1556                 {
1557                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1558                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1559                 }
1560             }
1561             else
1562             {
1563                 int    hd = 0; /* determine the histogram direction: */
1564                 double dx;
1565                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1566                 {
1567                     hd = 1;
1568                 }
1569                 dx = s->hist->dx[hd];
1570
1571                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1572                 {
1573                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1574                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1575                     /* look for the highest value bin with values */
1576                     if (s->hist->bin[hd][j] > 0)
1577                     {
1578                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1579                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1580                         break;
1581                     }
1582                 }
1583             }
1584         }
1585     }
1586 }
1587
1588 /* Initialize a sim_data structure */
1589 static void sim_data_init(sim_data_t *sd)
1590 {
1591     /* make linked list */
1592     sd->lb       = &(sd->lb_head);
1593     sd->lb->next = sd->lb;
1594     sd->lb->prev = sd->lb;
1595
1596     lambda_components_init(&(sd->lc));
1597 }
1598
1599
1600 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1601 {
1602     int    i;
1603     double sum;
1604
1605     sum = 0;
1606
1607     for (i = 0; i < n; i++)
1608     {
1609         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1610     }
1611
1612     return sum;
1613 }
1614
1615 /* calculate the BAR average given a histogram
1616
1617     if type== 0, calculate the best estimate for the average,
1618     if type==-1, calculate the minimum possible value given the histogram
1619     if type== 1, calculate the maximum possible value given the histogram */
1620 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1621                                 int type)
1622 {
1623     double sum = 0.;
1624     int    i;
1625     int    max;
1626     /* normalization factor multiplied with bin width and
1627        number of samples (we normalize through M): */
1628     double normdx = 1.;
1629     int    hd     = 0; /* determine the histogram direction: */
1630     double dx;
1631
1632     if ( (hist->nhist > 1) && (Wfac < 0) )
1633     {
1634         hd = 1;
1635     }
1636     dx  = hist->dx[hd];
1637     max = hist->nbin[hd]-1;
1638     if (type == 1)
1639     {
1640         max = hist->nbin[hd]; /* we also add whatever was out of range */
1641     }
1642
1643     for (i = 0; i < max; i++)
1644     {
1645         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1646         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1647
1648         sum += pxdx/(1. + exp(x + sbMmDG));
1649     }
1650
1651     return sum;
1652 }
1653
1654 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1655                                 double temp, double tol, int type)
1656 {
1657     double kT, beta, M;
1658     double DG;
1659     int    i, j;
1660     double Wfac1, Wfac2, Wmin, Wmax;
1661     double DG0, DG1, DG2, dDG1;
1662     double sum1, sum2;
1663     double n1, n2; /* numbers of samples as doubles */
1664
1665     kT   = BOLTZ*temp;
1666     beta = 1/kT;
1667
1668     /* count the numbers of samples */
1669     n1 = ca->ntot;
1670     n2 = cb->ntot;
1671
1672     M = log(n1/n2);
1673
1674     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1675     if (ca->foreign_lambda->dhdl < 0)
1676     {
1677         /* this is the case when the delta U were calculated directly
1678            (i.e. we're not scaling dhdl) */
1679         Wfac1 = beta;
1680         Wfac2 = beta;
1681     }
1682     else
1683     {
1684         /* we're using dhdl, so delta_lambda needs to be a
1685            multiplication factor.  */
1686         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1687         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1688                                                   ca->native_lambda);
1689         if (cb->native_lambda->lc->N > 1)
1690         {
1691             gmx_fatal(FARGS,
1692                       "Can't (yet) do multi-component dhdl interpolation");
1693         }
1694
1695         Wfac1 =  beta*delta_lambda;
1696         Wfac2 = -beta*delta_lambda;
1697     }
1698
1699     if (beta < 1)
1700     {
1701         /* We print the output both in kT and kJ/mol.
1702          * Here we determine DG in kT, so when beta < 1
1703          * the precision has to be increased.
1704          */
1705         tol *= beta;
1706     }
1707
1708     /* Calculate minimum and maximum work to give an initial estimate of
1709      * delta G  as their average.
1710      */
1711     {
1712         double Wmin1, Wmin2, Wmax1, Wmax2;
1713         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1714         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1715
1716         Wmin = min(Wmin1, Wmin2);
1717         Wmax = max(Wmax1, Wmax2);
1718     }
1719
1720     DG0 = Wmin;
1721     DG2 = Wmax;
1722
1723     if (debug)
1724     {
1725         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1726     }
1727     /* We approximate by bisection: given our initial estimates
1728        we keep checking whether the halfway point is greater or
1729        smaller than what we get out of the BAR averages.
1730
1731        For the comparison we can use twice the tolerance. */
1732     while (DG2 - DG0 > 2*tol)
1733     {
1734         DG1 = 0.5*(DG0 + DG2);
1735
1736         /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1737            DG1);*/
1738
1739         /* calculate the BAR averages */
1740         dDG1 = 0.;
1741
1742         for (i = 0; i < ca->nsamples; i++)
1743         {
1744             samples_t      *s = ca->s[i];
1745             sample_range_t *r = &(ca->r[i]);
1746             if (r->use)
1747             {
1748                 if (s->hist)
1749                 {
1750                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1751                 }
1752                 else
1753                 {
1754                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1755                                          Wfac1, (M-DG1));
1756                 }
1757             }
1758         }
1759         for (i = 0; i < cb->nsamples; i++)
1760         {
1761             samples_t      *s = cb->s[i];
1762             sample_range_t *r = &(cb->r[i]);
1763             if (r->use)
1764             {
1765                 if (s->hist)
1766                 {
1767                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1768                 }
1769                 else
1770                 {
1771                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1772                                          Wfac2, -(M-DG1));
1773                 }
1774             }
1775         }
1776
1777         if (dDG1 < 0)
1778         {
1779             DG0 = DG1;
1780         }
1781         else
1782         {
1783             DG2 = DG1;
1784         }
1785         if (debug)
1786         {
1787             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1788         }
1789     }
1790
1791     return 0.5*(DG0 + DG2);
1792 }
1793
1794 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1795                              double temp, double dg, double *sa, double *sb)
1796 {
1797     int    i, j;
1798     double W_ab = 0.;
1799     double W_ba = 0.;
1800     double kT, beta;
1801     double Wfac1, Wfac2;
1802     double n1, n2;
1803
1804     kT   = BOLTZ*temp;
1805     beta = 1/kT;
1806
1807     /* count the numbers of samples */
1808     n1 = ca->ntot;
1809     n2 = cb->ntot;
1810
1811     /* to ensure the work values are the same as during the delta_G */
1812     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1813     if (ca->foreign_lambda->dhdl < 0)
1814     {
1815         /* this is the case when the delta U were calculated directly
1816            (i.e. we're not scaling dhdl) */
1817         Wfac1 = beta;
1818         Wfac2 = beta;
1819     }
1820     else
1821     {
1822         /* we're using dhdl, so delta_lambda needs to be a
1823            multiplication factor.  */
1824         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1825                                                   ca->native_lambda);
1826         Wfac1 =  beta*delta_lambda;
1827         Wfac2 = -beta*delta_lambda;
1828     }
1829
1830     /* first calculate the average work in both directions */
1831     for (i = 0; i < ca->nsamples; i++)
1832     {
1833         samples_t      *s = ca->s[i];
1834         sample_range_t *r = &(ca->r[i]);
1835         if (r->use)
1836         {
1837             if (!s->hist)
1838             {
1839                 for (j = r->start; j < r->end; j++)
1840                 {
1841                     W_ab += Wfac1*s->du[j];
1842                 }
1843             }
1844             else
1845             {
1846                 /* normalization factor multiplied with bin width and
1847                    number of samples (we normalize through M): */
1848                 double normdx = 1.;
1849                 double dx;
1850                 int    hd = 0; /* histogram direction */
1851                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1852                 {
1853                     hd = 1;
1854                 }
1855                 dx = s->hist->dx[hd];
1856
1857                 for (j = 0; j < s->hist->nbin[0]; j++)
1858                 {
1859                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1860                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1861                     W_ab += pxdx*x;
1862                 }
1863             }
1864         }
1865     }
1866     W_ab /= n1;
1867
1868     for (i = 0; i < cb->nsamples; i++)
1869     {
1870         samples_t      *s = cb->s[i];
1871         sample_range_t *r = &(cb->r[i]);
1872         if (r->use)
1873         {
1874             if (!s->hist)
1875             {
1876                 for (j = r->start; j < r->end; j++)
1877                 {
1878                     W_ba += Wfac1*s->du[j];
1879                 }
1880             }
1881             else
1882             {
1883                 /* normalization factor multiplied with bin width and
1884                    number of samples (we normalize through M): */
1885                 double normdx = 1.;
1886                 double dx;
1887                 int    hd = 0; /* histogram direction */
1888                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1889                 {
1890                     hd = 1;
1891                 }
1892                 dx = s->hist->dx[hd];
1893
1894                 for (j = 0; j < s->hist->nbin[0]; j++)
1895                 {
1896                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1897                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1898                     W_ba += pxdx*x;
1899                 }
1900             }
1901         }
1902     }
1903     W_ba /= n2;
1904
1905     /* then calculate the relative entropies */
1906     *sa = (W_ab - dg);
1907     *sb = (W_ba + dg);
1908 }
1909
1910 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1911                            double temp, double dg, double *stddev)
1912 {
1913     int    i, j;
1914     double M;
1915     double sigmafact = 0.;
1916     double kT, beta;
1917     double Wfac1, Wfac2;
1918     double n1, n2;
1919
1920     kT   = BOLTZ*temp;
1921     beta = 1/kT;
1922
1923     /* count the numbers of samples */
1924     n1 = ca->ntot;
1925     n2 = cb->ntot;
1926
1927     /* to ensure the work values are the same as during the delta_G */
1928     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1929     if (ca->foreign_lambda->dhdl < 0)
1930     {
1931         /* this is the case when the delta U were calculated directly
1932            (i.e. we're not scaling dhdl) */
1933         Wfac1 = beta;
1934         Wfac2 = beta;
1935     }
1936     else
1937     {
1938         /* we're using dhdl, so delta_lambda needs to be a
1939            multiplication factor.  */
1940         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1941                                                   ca->native_lambda);
1942         Wfac1 =  beta*delta_lambda;
1943         Wfac2 = -beta*delta_lambda;
1944     }
1945
1946     M = log(n1/n2);
1947
1948
1949     /* calculate average in both directions */
1950     for (i = 0; i < ca->nsamples; i++)
1951     {
1952         samples_t      *s = ca->s[i];
1953         sample_range_t *r = &(ca->r[i]);
1954         if (r->use)
1955         {
1956             if (!s->hist)
1957             {
1958                 for (j = r->start; j < r->end; j++)
1959                 {
1960                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1961                 }
1962             }
1963             else
1964             {
1965                 /* normalization factor multiplied with bin width and
1966                    number of samples (we normalize through M): */
1967                 double normdx = 1.;
1968                 double dx;
1969                 int    hd = 0; /* histogram direction */
1970                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1971                 {
1972                     hd = 1;
1973                 }
1974                 dx = s->hist->dx[hd];
1975
1976                 for (j = 0; j < s->hist->nbin[0]; j++)
1977                 {
1978                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1979                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1980
1981                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1982                 }
1983             }
1984         }
1985     }
1986     for (i = 0; i < cb->nsamples; i++)
1987     {
1988         samples_t      *s = cb->s[i];
1989         sample_range_t *r = &(cb->r[i]);
1990         if (r->use)
1991         {
1992             if (!s->hist)
1993             {
1994                 for (j = r->start; j < r->end; j++)
1995                 {
1996                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1997                 }
1998             }
1999             else
2000             {
2001                 /* normalization factor multiplied with bin width and
2002                    number of samples (we normalize through M): */
2003                 double normdx = 1.;
2004                 double dx;
2005                 int    hd = 0; /* histogram direction */
2006                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2007                 {
2008                     hd = 1;
2009                 }
2010                 dx = s->hist->dx[hd];
2011
2012                 for (j = 0; j < s->hist->nbin[0]; j++)
2013                 {
2014                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2015                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2016
2017                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2018                 }
2019             }
2020         }
2021     }
2022
2023     sigmafact /= (n1 + n2);
2024
2025
2026     /* Eq. 10 from
2027        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2028     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2029 }
2030
2031
2032
2033 static void calc_bar(barres_t *br, double tol,
2034                      int npee_min, int npee_max, gmx_bool *bEE,
2035                      double *partsum)
2036 {
2037     int      npee, p;
2038     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2039                                                         for calculated quantities */
2040     int      nsample1, nsample2;
2041     double   temp = br->a->temp;
2042     int      i, j;
2043     double   dg_min, dg_max;
2044     gmx_bool have_hist = FALSE;
2045
2046     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2047
2048     br->dg_disc_err      = 0.;
2049     br->dg_histrange_err = 0.;
2050
2051     /* check if there are histograms */
2052     for (i = 0; i < br->a->nsamples; i++)
2053     {
2054         if (br->a->r[i].use && br->a->s[i]->hist)
2055         {
2056             have_hist = TRUE;
2057             break;
2058         }
2059     }
2060     if (!have_hist)
2061     {
2062         for (i = 0; i < br->b->nsamples; i++)
2063         {
2064             if (br->b->r[i].use && br->b->s[i]->hist)
2065             {
2066                 have_hist = TRUE;
2067                 break;
2068             }
2069         }
2070     }
2071
2072     /* calculate histogram-specific errors */
2073     if (have_hist)
2074     {
2075         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2076         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2077
2078         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2079         {
2080             /* the histogram range  error is the biggest of the differences
2081                between the best estimate and the extremes */
2082             br->dg_histrange_err = fabs(dg_max - dg_min);
2083         }
2084         br->dg_disc_err = 0.;
2085         for (i = 0; i < br->a->nsamples; i++)
2086         {
2087             if (br->a->s[i]->hist)
2088             {
2089                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2090             }
2091         }
2092         for (i = 0; i < br->b->nsamples; i++)
2093         {
2094             if (br->b->s[i]->hist)
2095             {
2096                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2097             }
2098         }
2099     }
2100     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2101
2102     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2103
2104     dg_sig2     = 0;
2105     sa_sig2     = 0;
2106     sb_sig2     = 0;
2107     stddev_sig2 = 0;
2108
2109     *bEE = TRUE;
2110     {
2111         sample_coll_t ca, cb;
2112
2113         /* initialize the samples */
2114         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2115                          br->a->temp);
2116         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2117                          br->b->temp);
2118
2119         for (npee = npee_min; npee <= npee_max; npee++)
2120         {
2121             double dgs      = 0;
2122             double dgs2     = 0;
2123             double dsa      = 0;
2124             double dsb      = 0;
2125             double dsa2     = 0;
2126             double dsb2     = 0;
2127             double dstddev  = 0;
2128             double dstddev2 = 0;
2129
2130
2131             for (p = 0; p < npee; p++)
2132             {
2133                 double   dgp;
2134                 double   stddevc;
2135                 double   sac, sbc;
2136                 gmx_bool cac, cbc;
2137
2138                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2139                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2140
2141                 if (!cac || !cbc)
2142                 {
2143                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2144                     *bEE = FALSE;
2145                     if (cac)
2146                     {
2147                         sample_coll_destroy(&ca);
2148                     }
2149                     if (cbc)
2150                     {
2151                         sample_coll_destroy(&cb);
2152                     }
2153                     return;
2154                 }
2155
2156                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2157                 dgs  += dgp;
2158                 dgs2 += dgp*dgp;
2159
2160                 partsum[npee*(npee_max+1)+p] += dgp;
2161
2162                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2163                 dsa  += sac;
2164                 dsa2 += sac*sac;
2165                 dsb  += sbc;
2166                 dsb2 += sbc*sbc;
2167                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2168
2169                 dstddev  += stddevc;
2170                 dstddev2 += stddevc*stddevc;
2171
2172                 sample_coll_destroy(&ca);
2173                 sample_coll_destroy(&cb);
2174             }
2175             dgs     /= npee;
2176             dgs2    /= npee;
2177             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2178
2179             dsa     /= npee;
2180             dsa2    /= npee;
2181             dsb     /= npee;
2182             dsb2    /= npee;
2183             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2184             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2185
2186             dstddev     /= npee;
2187             dstddev2    /= npee;
2188             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2189         }
2190         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2191         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2192         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2193         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2194     }
2195 }
2196
2197
2198 static double bar_err(int nbmin, int nbmax, const double *partsum)
2199 {
2200     int    nb, b;
2201     double svar, s, s2, dg;
2202
2203     svar = 0;
2204     for (nb = nbmin; nb <= nbmax; nb++)
2205     {
2206         s  = 0;
2207         s2 = 0;
2208         for (b = 0; b < nb; b++)
2209         {
2210             dg  = partsum[nb*(nbmax+1)+b];
2211             s  += dg;
2212             s2 += dg*dg;
2213         }
2214         s    /= nb;
2215         s2   /= nb;
2216         svar += (s2 - s*s)/(nb - 1);
2217     }
2218
2219     return sqrt(svar/(nbmax + 1 - nbmin));
2220 }
2221
2222
2223 /* Seek the end of an identifier (consecutive non-spaces), followed by
2224    an optional number of spaces or '='-signs. Returns a pointer to the
2225    first non-space value found after that. Returns NULL if the string
2226    ends before that.
2227  */
2228 static const char *find_value(const char *str)
2229 {
2230     gmx_bool name_end_found = FALSE;
2231
2232     /* if the string is a NULL pointer, return a NULL pointer. */
2233     if (str == NULL)
2234     {
2235         return NULL;
2236     }
2237     while (*str != '\0')
2238     {
2239         /* first find the end of the name */
2240         if (!name_end_found)
2241         {
2242             if (isspace(*str) || (*str == '=') )
2243             {
2244                 name_end_found = TRUE;
2245             }
2246         }
2247         else
2248         {
2249             if (!( isspace(*str) || (*str == '=') ))
2250             {
2251                 return str;
2252             }
2253         }
2254         str++;
2255     }
2256     return NULL;
2257 }
2258
2259
2260
2261 /* read a vector-notation description of a lambda vector */
2262 static gmx_bool read_lambda_compvec(const char                *str,
2263                                     lambda_vec_t              *lv,
2264                                     const lambda_components_t *lc_in,
2265                                     lambda_components_t       *lc_out,
2266                                     const char               **end,
2267                                     const char                *fn)
2268 {
2269     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2270                                           components, or to check them */
2271     gmx_bool    start_reached = FALSE; /* whether the start of component names
2272                                           has been reached */
2273     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2274     int         n             = 0;     /* current component number */
2275     const char *val_start     = NULL;  /* start of the component name, or NULL
2276                                           if not in a value */
2277     char       *strtod_end;
2278     gmx_bool    OK = TRUE;
2279
2280     if (end)
2281     {
2282         *end = str;
2283     }
2284
2285
2286     if (lc_out && lc_out->N == 0)
2287     {
2288         initialize_lc = TRUE;
2289     }
2290
2291     if (lc_in == NULL)
2292     {
2293         lc_in = lc_out;
2294     }
2295
2296     while (1)
2297     {
2298         if (!start_reached)
2299         {
2300             if (isalnum(*str))
2301             {
2302                 vector        = FALSE;
2303                 start_reached = TRUE;
2304                 val_start     = str;
2305             }
2306             else if (*str == '(')
2307             {
2308                 vector        = TRUE;
2309                 start_reached = TRUE;
2310             }
2311             else if (!isspace(*str))
2312             {
2313                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2314             }
2315         }
2316         else
2317         {
2318             if (val_start)
2319             {
2320                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2321                 {
2322                     /* end of value */
2323                     if (lv == NULL)
2324                     {
2325                         if (initialize_lc)
2326                         {
2327                             lambda_components_add(lc_out, val_start,
2328                                                   (str-val_start));
2329                         }
2330                         else
2331                         {
2332                             if (!lambda_components_check(lc_out, n, val_start,
2333                                                          (str-val_start)))
2334                             {
2335                                 return FALSE;
2336                             }
2337                         }
2338                     }
2339                     else
2340                     {
2341                         /* add a vector component to lv */
2342                         lv->val[n] = strtod(val_start, &strtod_end);
2343                         if (val_start == strtod_end)
2344                         {
2345                             gmx_fatal(FARGS,
2346                                       "Error reading lambda vector in %s", fn);
2347                         }
2348                     }
2349                     /* reset for the next identifier */
2350                     val_start = NULL;
2351                     n++;
2352                     if (!vector)
2353                     {
2354                         return OK;
2355                     }
2356                 }
2357             }
2358             else if (isalnum(*str))
2359             {
2360                 val_start = str;
2361             }
2362             if (*str == ')')
2363             {
2364                 str++;
2365                 if (end)
2366                 {
2367                     *end = str;
2368                 }
2369                 if (!vector)
2370                 {
2371                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2372                 }
2373                 else
2374                 {
2375                     if (n == lc_in->N)
2376                     {
2377                         return OK;
2378                     }
2379                     else if (lv == NULL)
2380                     {
2381                         return FALSE;
2382                     }
2383                     else
2384                     {
2385                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2386                                   fn);
2387                         return FALSE;
2388                     }
2389
2390                 }
2391             }
2392         }
2393         if (*str == '\0')
2394         {
2395             break;
2396         }
2397         str++;
2398         if (end)
2399         {
2400             *end = str;
2401         }
2402     }
2403     if (vector)
2404     {
2405         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2406         return FALSE;
2407     }
2408     return OK;
2409 }
2410
2411 /* read and check the component names from a string */
2412 static gmx_bool read_lambda_components(const char          *str,
2413                                        lambda_components_t *lc,
2414                                        const char         **end,
2415                                        const char          *fn)
2416 {
2417     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2418 }
2419
2420 /* read an initialized lambda vector from a string */
2421 static gmx_bool read_lambda_vector(const char   *str,
2422                                    lambda_vec_t *lv,
2423                                    const char  **end,
2424                                    const char   *fn)
2425 {
2426     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2427 }
2428
2429
2430
2431 /* deduce lambda value from legend.
2432     fn = the file name
2433     legend = the legend string
2434     ba = the xvg data
2435     lam = the initialized lambda vector
2436    returns whether to use the data in this set.
2437  */
2438 static gmx_bool legend2lambda(const char   *fn,
2439                               const char   *legend,
2440                               xvg_t        *ba,
2441                               lambda_vec_t *lam)
2442 {
2443     double        lambda = 0;
2444     const char   *ptr    = NULL, *ptr2 = NULL;
2445     gmx_bool      ok     = FALSE;
2446     gmx_bool      bdhdl  = FALSE;
2447     const char   *tostr  = " to ";
2448
2449     if (legend == NULL)
2450     {
2451         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2452     }
2453
2454     /* look for the last 'to': */
2455     ptr2 = legend;
2456     do
2457     {
2458         ptr2 = strstr(ptr2, tostr);
2459         if (ptr2 != NULL)
2460         {
2461             ptr = ptr2;
2462             ptr2++;
2463         }
2464     }
2465     while (ptr2 != NULL && *ptr2 != '\0');
2466
2467     if (ptr)
2468     {
2469         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2470     }
2471     else
2472     {
2473         /* look for the = sign */
2474         ptr = strrchr(legend, '=');
2475         if (!ptr)
2476         {
2477             /* otherwise look for the last space */
2478             ptr = strrchr(legend, ' ');
2479         }
2480     }
2481
2482     if (strstr(legend, "dH"))
2483     {
2484         ok    = TRUE;
2485         bdhdl = TRUE;
2486     }
2487     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2488     {
2489         ok    = TRUE;
2490         bdhdl = FALSE;
2491     }
2492     else /*if (strstr(legend, "pV"))*/
2493     {
2494         return FALSE;
2495     }
2496     if (!ptr)
2497     {
2498         ok = FALSE;
2499     }
2500
2501     if (!ok)
2502     {
2503         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2504     }
2505     if (!bdhdl)
2506     {
2507         ptr = find_value(ptr);
2508         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2509         {
2510             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2511         }
2512     }
2513     else
2514     {
2515         int         dhdl_index;
2516         const char *end;
2517         char        buf[STRLEN];
2518
2519         ptr = strrchr(legend, '=');
2520         end = ptr;
2521         if (ptr)
2522         {
2523             /* there must be a component name */
2524             ptr--;
2525             if (ptr < legend)
2526             {
2527                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2528             }
2529             /* now backtrack to the start of the identifier */
2530             while (isspace(*ptr))
2531             {
2532                 end = ptr;
2533                 ptr--;
2534                 if (ptr < legend)
2535                 {
2536                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2537                 }
2538             }
2539             while (!isspace(*ptr))
2540             {
2541                 ptr--;
2542                 if (ptr < legend)
2543                 {
2544                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2545                 }
2546             }
2547             ptr++;
2548             strncpy(buf, ptr, (end-ptr));
2549             buf[(end-ptr)] = '\0';
2550             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2551             if (dhdl_index < 0)
2552             {
2553                 char buf[STRLEN];
2554                 strncpy(buf, ptr, (end-ptr));
2555                 buf[(end-ptr)] = '\0';
2556                 gmx_fatal(FARGS,
2557                           "Did not find lambda component for '%s' in %s",
2558                           buf, fn);
2559             }
2560         }
2561         else
2562         {
2563             if (lam->lc->N > 1)
2564             {
2565                 gmx_fatal(FARGS,
2566                           "dhdl without component name with >1 lambda component in %s",
2567                           fn);
2568             }
2569             dhdl_index = 0;
2570         }
2571         lam->dhdl = dhdl_index;
2572     }
2573     return TRUE;
2574 }
2575
2576 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2577                                 lambda_components_t *lc)
2578 {
2579     gmx_bool    bFound;
2580     const char *ptr;
2581     char       *end;
2582     double      native_lambda;
2583
2584     bFound = FALSE;
2585
2586     /* first check for a state string */
2587     ptr = strstr(subtitle, "state");
2588     if (ptr)
2589     {
2590         int         index = -1;
2591         const char *val_end;
2592
2593         /* the new 4.6 style lambda vectors */
2594         ptr = find_value(ptr);
2595         if (ptr)
2596         {
2597             index = strtol(ptr, &end, 10);
2598             if (ptr == end)
2599             {
2600                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2601                 return FALSE;
2602             }
2603             ptr = end;
2604         }
2605         else
2606         {
2607             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2608             return FALSE;
2609         }
2610         /* now find the lambda vector component names */
2611         while (*ptr != '(' && !isalnum(*ptr))
2612         {
2613             ptr++;
2614             if (*ptr == '\0')
2615             {
2616                 gmx_fatal(FARGS,
2617                           "Incomplete lambda vector component data in %s", fn);
2618                 return FALSE;
2619             }
2620         }
2621         val_end = ptr;
2622         if (!read_lambda_components(ptr, lc, &val_end, fn))
2623         {
2624             gmx_fatal(FARGS,
2625                       "lambda vector components in %s don't match those previously read",
2626                       fn);
2627         }
2628         ptr = find_value(val_end);
2629         if (!ptr)
2630         {
2631             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2632             return FALSE;
2633         }
2634         lambda_vec_init(&(ba->native_lambda), lc);
2635         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2636         {
2637             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2638         }
2639         ba->native_lambda.index = index;
2640         bFound                  = TRUE;
2641     }
2642     else
2643     {
2644         /* compatibility mode: check for lambda in other ways. */
2645         /* plain text lambda string */
2646         ptr = strstr(subtitle, "lambda");
2647         if (ptr == NULL)
2648         {
2649             /* xmgrace formatted lambda string */
2650             ptr = strstr(subtitle, "\\xl\\f{}");
2651         }
2652         if (ptr == NULL)
2653         {
2654             /* xmgr formatted lambda string */
2655             ptr = strstr(subtitle, "\\8l\\4");
2656         }
2657         if (ptr != NULL)
2658         {
2659             ptr = strstr(ptr, "=");
2660         }
2661         if (ptr != NULL)
2662         {
2663             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2664             /* add the lambda component name as an empty string */
2665             if (lc->N > 0)
2666             {
2667                 if (!lambda_components_check(lc, 0, "", 0))
2668                 {
2669                     gmx_fatal(FARGS,
2670                               "lambda vector components in %s don't match those previously read",
2671                               fn);
2672                 }
2673             }
2674             else
2675             {
2676                 lambda_components_add(lc, "", 0);
2677             }
2678             lambda_vec_init(&(ba->native_lambda), lc);
2679             ba->native_lambda.val[0] = native_lambda;
2680         }
2681     }
2682
2683     return bFound;
2684 }
2685
2686 static void filename2lambda(const char *fn, xvg_t *ba)
2687 {
2688     double        lambda;
2689     const char   *ptr, *digitptr;
2690     char         *endptr;
2691     int           dirsep;
2692     ptr = fn;
2693     /* go to the end of the path string and search backward to find the last
2694        directory in the path which has to contain the value of lambda
2695      */
2696     while (ptr[1] != '\0')
2697     {
2698         ptr++;
2699     }
2700     /* searching backward to find the second directory separator */
2701     dirsep   = 0;
2702     digitptr = NULL;
2703     while (ptr >= fn)
2704     {
2705         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2706         {
2707             if (dirsep == 1)
2708             {
2709                 break;
2710             }
2711             dirsep++;
2712         }
2713         /* save the last position of a digit between the last two
2714            separators = in the last dirname */
2715         if (dirsep > 0 && isdigit(*ptr))
2716         {
2717             digitptr = ptr;
2718         }
2719         ptr--;
2720     }
2721     if (!digitptr)
2722     {
2723         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2724                   " last directory in the path '%s' does not contain a number", fn);
2725     }
2726     if (digitptr[-1] == '-')
2727     {
2728         digitptr--;
2729     }
2730     lambda = strtod(digitptr, &endptr);
2731     if (endptr == digitptr)
2732     {
2733         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2734     }
2735 }
2736
2737 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2738                                   lambda_components_t *lc)
2739 {
2740     int          i;
2741     char        *subtitle, **legend, *ptr;
2742     int          np;
2743     gmx_bool     native_lambda_read = FALSE;
2744     char         buf[STRLEN];
2745     lambda_vec_t lv;
2746
2747     xvg_init(ba);
2748
2749     ba->filename = fn;
2750
2751     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2752     if (!ba->y)
2753     {
2754         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2755     }
2756     /* Reorder the data */
2757     ba->t  = ba->y[0];
2758     for (i = 1; i < ba->nset; i++)
2759     {
2760         ba->y[i-1] = ba->y[i];
2761     }
2762     ba->nset--;
2763
2764     snew(ba->np, ba->nset);
2765     for (i = 0; i < ba->nset; i++)
2766     {
2767         ba->np[i] = np;
2768     }
2769
2770     ba->temp = -1;
2771     if (subtitle != NULL)
2772     {
2773         /* try to extract temperature */
2774         ptr = strstr(subtitle, "T =");
2775         if (ptr != NULL)
2776         {
2777             ptr += 3;
2778             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2779             {
2780                 if (ba->temp <= 0)
2781                 {
2782                     gmx_fatal(FARGS, "Found temperature of %g in file '%s'",
2783                               ba->temp, fn);
2784                 }
2785             }
2786         }
2787     }
2788     if (ba->temp < 0)
2789     {
2790         if (*temp <= 0)
2791         {
2792             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2793         }
2794         ba->temp = *temp;
2795     }
2796
2797     /* Try to deduce lambda from the subtitle */
2798     if (subtitle)
2799     {
2800         if (subtitle2lambda(subtitle, ba, fn, lc))
2801         {
2802             native_lambda_read = TRUE;
2803         }
2804     }
2805     snew(ba->lambda, ba->nset);
2806     if (legend == NULL)
2807     {
2808         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2809         if (ba->nset == 1)
2810         {
2811             if (!native_lambda_read)
2812             {
2813                 /* Deduce lambda from the file name */
2814                 filename2lambda(fn, ba);
2815                 native_lambda_read = TRUE;
2816             }
2817             ba->lambda[0] = ba->native_lambda;
2818         }
2819         else
2820         {
2821             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2822         }
2823     }
2824     else
2825     {
2826         for (i = 0; i < ba->nset; )
2827         {
2828             gmx_bool use = FALSE;
2829             /* Read lambda from the legend */
2830             lambda_vec_init( &(ba->lambda[i]), lc );
2831             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2832             use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2833             if (use)
2834             {
2835                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2836                 i++;
2837             }
2838             else
2839             {
2840                 int j;
2841                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2842                 for (j = i+1; j < ba->nset; j++)
2843                 {
2844                     ba->y[j-1]  = ba->y[j];
2845                     legend[j-1] = legend[j];
2846                 }
2847                 ba->nset--;
2848             }
2849         }
2850     }
2851
2852     if (!native_lambda_read)
2853     {
2854         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2855     }
2856
2857     if (legend != NULL)
2858     {
2859         for (i = 0; i < ba->nset-1; i++)
2860         {
2861             sfree(legend[i]);
2862         }
2863         sfree(legend);
2864     }
2865 }
2866
2867 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2868 {
2869     xvg_t     *barsim;
2870     samples_t *s;
2871     int        i;
2872     double    *lambda;
2873
2874     snew(barsim, 1);
2875
2876     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2877
2878     if (barsim->nset < 1)
2879     {
2880         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2881     }
2882
2883     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2884     {
2885         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2886     }
2887     *temp = barsim->temp;
2888
2889     /* now create a series of samples_t */
2890     snew(s, barsim->nset);
2891     for (i = 0; i < barsim->nset; i++)
2892     {
2893         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2894                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2895                                                    &(barsim->lambda[i])),
2896                      fn);
2897         s[i].du  = barsim->y[i];
2898         s[i].ndu = barsim->np[i];
2899         s[i].t   = barsim->t;
2900
2901         lambda_data_list_insert_sample(sd->lb, s+i);
2902     }
2903     {
2904         char buf[STRLEN];
2905
2906         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2907         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2908                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2909         for (i = 0; i < barsim->nset; i++)
2910         {
2911             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2912             printf("        %s (%d pts)\n", buf, s[i].ndu);
2913         }
2914     }
2915     printf("\n\n");
2916 }
2917
2918 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2919                                  double start_time, double delta_time,
2920                                  lambda_vec_t *native_lambda, double temp,
2921                                  double *last_t, const char *filename)
2922 {
2923     int           i, j;
2924     gmx_bool      allocated;
2925     double        old_foreign_lambda;
2926     lambda_vec_t *foreign_lambda;
2927     int           type;
2928     samples_t    *s; /* convenience pointer */
2929     int           startj;
2930
2931     /* check the block types etc. */
2932     if ( (blk->nsub < 3) ||
2933          (blk->sub[0].type != xdr_datatype_int) ||
2934          (blk->sub[1].type != xdr_datatype_double) ||
2935          (
2936              (blk->sub[2].type != xdr_datatype_float) &&
2937              (blk->sub[2].type != xdr_datatype_double)
2938          ) ||
2939          (blk->sub[0].nr < 1) ||
2940          (blk->sub[1].nr < 1) )
2941     {
2942         gmx_fatal(FARGS,
2943                   "Unexpected/corrupted block data in file %s around time %g.",
2944                   filename, start_time);
2945     }
2946
2947     snew(foreign_lambda, 1);
2948     lambda_vec_init(foreign_lambda, native_lambda->lc);
2949     lambda_vec_copy(foreign_lambda, native_lambda);
2950     type = blk->sub[0].ival[0];
2951     if (type == dhbtDH)
2952     {
2953         for (i = 0; i < native_lambda->lc->N; i++)
2954         {
2955             foreign_lambda->val[i] = blk->sub[1].dval[i];
2956         }
2957     }
2958     else
2959     {
2960         if (blk->sub[0].nr > 1)
2961         {
2962             foreign_lambda->dhdl = blk->sub[0].ival[1];
2963         }
2964         else
2965         {
2966             foreign_lambda->dhdl = 0;
2967         }
2968     }
2969
2970     if (!*smp)
2971     {
2972         /* initialize the samples structure if it's empty. */
2973         snew(*smp, 1);
2974         samples_init(*smp, native_lambda, foreign_lambda, temp,
2975                      type == dhbtDHDL, filename);
2976         (*smp)->start_time = start_time;
2977         (*smp)->delta_time = delta_time;
2978     }
2979
2980     /* set convenience pointer */
2981     s = *smp;
2982
2983     /* now double check */
2984     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2985     {
2986         char buf[STRLEN], buf2[STRLEN];
2987         lambda_vec_print(foreign_lambda, buf, FALSE);
2988         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2989         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2990         gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2991                   filename, start_time);
2992     }
2993
2994     /* make room for the data */
2995     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2996     {
2997         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2998             blk->sub[2].nr*2 : s->ndu_alloc;
2999         srenew(s->du_alloc, s->ndu_alloc);
3000         s->du = s->du_alloc;
3001     }
3002     startj   = s->ndu;
3003     s->ndu  += blk->sub[2].nr;
3004     s->ntot += blk->sub[2].nr;
3005     *ndu     = blk->sub[2].nr;
3006
3007     /* and copy the data*/
3008     for (j = 0; j < blk->sub[2].nr; j++)
3009     {
3010         if (blk->sub[2].type == xdr_datatype_float)
3011         {
3012             s->du[startj+j] = blk->sub[2].fval[j];
3013         }
3014         else
3015         {
3016             s->du[startj+j] = blk->sub[2].dval[j];
3017         }
3018     }
3019     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3020     {
3021         *last_t = start_time + blk->sub[2].nr*delta_time;
3022     }
3023 }
3024
3025 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3026                                       double start_time, double delta_time,
3027                                       lambda_vec_t *native_lambda, double temp,
3028                                       double *last_t, const char *filename)
3029 {
3030     int           i, j;
3031     samples_t    *s;
3032     int           nhist;
3033     double        old_foreign_lambda;
3034     lambda_vec_t *foreign_lambda;
3035     int           type;
3036     int           nbins[2];
3037
3038     /* check the block types etc. */
3039     if ( (blk->nsub < 2) ||
3040          (blk->sub[0].type != xdr_datatype_double) ||
3041          (blk->sub[1].type != xdr_datatype_large_int) ||
3042          (blk->sub[0].nr < 2)  ||
3043          (blk->sub[1].nr < 2) )
3044     {
3045         gmx_fatal(FARGS,
3046                   "Unexpected/corrupted block data in file %s around time %g",
3047                   filename, start_time);
3048     }
3049
3050     nhist = blk->nsub-2;
3051     if (nhist == 0)
3052     {
3053         return NULL;
3054     }
3055     if (nhist > 2)
3056     {
3057         gmx_fatal(FARGS,
3058                   "Unexpected/corrupted block data in file %s around time %g",
3059                   filename, start_time);
3060     }
3061
3062     snew(s, 1);
3063     *nsamples = 1;
3064
3065     snew(foreign_lambda, 1);
3066     lambda_vec_init(foreign_lambda, native_lambda->lc);
3067     lambda_vec_copy(foreign_lambda, native_lambda);
3068     type = (int)(blk->sub[1].lval[1]);
3069     if (type == dhbtDH)
3070     {
3071         double old_foreign_lambda;
3072
3073         old_foreign_lambda = blk->sub[0].dval[0];
3074         if (old_foreign_lambda >= 0)
3075         {
3076             foreign_lambda->val[0] = old_foreign_lambda;
3077             if (foreign_lambda->lc->N > 1)
3078             {
3079                 gmx_fatal(FARGS,
3080                           "Single-component lambda in multi-component file %s",
3081                           filename);
3082             }
3083         }
3084         else
3085         {
3086             for (i = 0; i < native_lambda->lc->N; i++)
3087             {
3088                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3089             }
3090         }
3091     }
3092     else
3093     {
3094         if (foreign_lambda->lc->N > 1)
3095         {
3096             if (blk->sub[1].nr < 3 + nhist)
3097             {
3098                 gmx_fatal(FARGS,
3099                           "Missing derivative coord in multi-component file %s",
3100                           filename);
3101             }
3102             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3103         }
3104         else
3105         {
3106             foreign_lambda->dhdl = 0;
3107         }
3108     }
3109
3110     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3111                  filename);
3112     snew(s->hist, 1);
3113
3114     for (i = 0; i < nhist; i++)
3115     {
3116         nbins[i] = blk->sub[i+2].nr;
3117     }
3118
3119     hist_init(s->hist, nhist, nbins);
3120
3121     for (i = 0; i < nhist; i++)
3122     {
3123         s->hist->x0[i] = blk->sub[1].lval[2+i];
3124         s->hist->dx[i] = blk->sub[0].dval[1];
3125         if (i == 1)
3126         {
3127             s->hist->dx[i] = -s->hist->dx[i];
3128         }
3129     }
3130
3131     s->hist->start_time = start_time;
3132     s->hist->delta_time = delta_time;
3133     s->start_time       = start_time;
3134     s->delta_time       = delta_time;
3135
3136     for (i = 0; i < nhist; i++)
3137     {
3138         int             nbin;
3139         gmx_large_int_t sum = 0;
3140
3141         for (j = 0; j < s->hist->nbin[i]; j++)
3142         {
3143             int binv = (int)(blk->sub[i+2].ival[j]);
3144
3145             s->hist->bin[i][j] = binv;
3146             sum               += binv;
3147
3148         }
3149         if (i == 0)
3150         {
3151             s->ntot      = sum;
3152             s->hist->sum = sum;
3153         }
3154         else
3155         {
3156             if (s->ntot != sum)
3157             {
3158                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3159                           filename);
3160             }
3161         }
3162     }
3163
3164     if (start_time + s->hist->sum*delta_time > *last_t)
3165     {
3166         *last_t = start_time + s->hist->sum*delta_time;
3167     }
3168     return s;
3169 }
3170
3171
3172 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3173 {
3174     int            i, j;
3175     ener_file_t    fp;
3176     t_enxframe    *fr;
3177     int            nre;
3178     gmx_enxnm_t   *enm           = NULL;
3179     double         first_t       = -1;
3180     double         last_t        = -1;
3181     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3182     int           *nhists        = NULL; /* array to keep count & print at end */
3183     int           *npts          = NULL; /* array to keep count & print at end */
3184     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3185     lambda_vec_t  *native_lambda;
3186     double         end_time;             /* the end time of the last batch of samples */
3187     int            nsamples = 0;
3188     lambda_vec_t   start_lambda;
3189
3190     fp = open_enx(fn, "r");
3191     do_enxnms(fp, &nre, &enm);
3192     snew(fr, 1);
3193
3194     snew(native_lambda, 1);
3195     start_lambda.lc = NULL;
3196
3197     while (do_enx(fp, fr))
3198     {
3199         /* count the data blocks */
3200         int    nblocks_raw  = 0;
3201         int    nblocks_hist = 0;
3202         int    nlam         = 0;
3203         int    k;
3204         /* DHCOLL block information: */
3205         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3206         double rtemp      = 0;
3207
3208         /* count the blocks and handle collection information: */
3209         for (i = 0; i < fr->nblock; i++)
3210         {
3211             if (fr->block[i].id == enxDHHIST)
3212             {
3213                 nblocks_hist++;
3214             }
3215             if (fr->block[i].id == enxDH)
3216             {
3217                 nblocks_raw++;
3218             }
3219             if (fr->block[i].id == enxDHCOLL)
3220             {
3221                 nlam++;
3222                 if ( (fr->block[i].nsub < 1) ||
3223                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3224                      (fr->block[i].sub[0].nr < 5))
3225                 {
3226                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3227                 }
3228
3229                 /* read the data from the DHCOLL block */
3230                 rtemp            =        fr->block[i].sub[0].dval[0];
3231                 start_time       =   fr->block[i].sub[0].dval[1];
3232                 delta_time       =   fr->block[i].sub[0].dval[2];
3233                 old_start_lambda = fr->block[i].sub[0].dval[3];
3234                 delta_lambda     = fr->block[i].sub[0].dval[4];
3235
3236                 if (delta_lambda > 0)
3237                 {
3238                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3239                 }
3240                 if ( ( *temp != rtemp) && (*temp > 0) )
3241                 {
3242                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3243                 }
3244                 *temp = rtemp;
3245
3246                 if (old_start_lambda >= 0)
3247                 {
3248                     if (sd->lc.N > 0)
3249                     {
3250                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3251                         {
3252                             gmx_fatal(FARGS,
3253                                       "lambda vector components in %s don't match those previously read",
3254                                       fn);
3255                         }
3256                     }
3257                     else
3258                     {
3259                         lambda_components_add(&(sd->lc), "", 0);
3260                     }
3261                     if (!start_lambda.lc)
3262                     {
3263                         lambda_vec_init(&start_lambda, &(sd->lc));
3264                     }
3265                     start_lambda.val[0] = old_start_lambda;
3266                 }
3267                 else
3268                 {
3269                     /* read lambda vector */
3270                     int      n_lambda_vec;
3271                     gmx_bool check = (sd->lc.N > 0);
3272                     if (fr->block[i].nsub < 2)
3273                     {
3274                         gmx_fatal(FARGS,
3275                                   "No lambda vector, but start_lambda=%g\n",
3276                                   old_start_lambda);
3277                     }
3278                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3279                     for (j = 0; j < n_lambda_vec; j++)
3280                     {
3281                         const char *name =
3282                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3283                         if (check)
3284                         {
3285                             /* check the components */
3286                             lambda_components_check(&(sd->lc), j, name,
3287                                                     strlen(name));
3288                         }
3289                         else
3290                         {
3291                             lambda_components_add(&(sd->lc), name,
3292                                                   strlen(name));
3293                         }
3294                     }
3295                     lambda_vec_init(&start_lambda, &(sd->lc));
3296                     start_lambda.index = fr->block[i].sub[1].ival[0];
3297                     for (j = 0; j < n_lambda_vec; j++)
3298                     {
3299                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3300                     }
3301                 }
3302                 if (first_t < 0)
3303                 {
3304                     first_t = start_time;
3305                 }
3306             }
3307         }
3308
3309         if (nlam != 1)
3310         {
3311             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3312         }
3313         if (nblocks_raw > 0 && nblocks_hist > 0)
3314         {
3315             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3316         }
3317
3318         if (nsamples > 0)
3319         {
3320             /* check the native lambda */
3321             if (!lambda_vec_same(&start_lambda, native_lambda) )
3322             {
3323                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
3324                           fn, native_lambda, start_lambda, start_time);
3325             }
3326             /* check the number of samples against the previous number */
3327             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3328             {
3329                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3330                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3331             }
3332             /* check whether last iterations's end time matches with
3333                the currrent start time */
3334             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3335             {
3336                 /* it didn't. We need to store our samples and reallocate */
3337                 for (i = 0; i < nsamples; i++)
3338                 {
3339                     if (samples_rawdh[i])
3340                     {
3341                         /* insert it into the existing list */
3342                         lambda_data_list_insert_sample(sd->lb,
3343                                                        samples_rawdh[i]);
3344                         /* and make sure we'll allocate a new one this time
3345                            around */
3346                         samples_rawdh[i] = NULL;
3347                     }
3348                 }
3349             }
3350         }
3351         else
3352         {
3353             /* this is the first round; allocate the associated data
3354                structures */
3355             /*native_lambda=start_lambda;*/
3356             lambda_vec_init(native_lambda, &(sd->lc));
3357             lambda_vec_copy(native_lambda, &start_lambda);
3358             nsamples = nblocks_raw+nblocks_hist;
3359             snew(nhists, nsamples);
3360             snew(npts, nsamples);
3361             snew(lambdas, nsamples);
3362             snew(samples_rawdh, nsamples);
3363             for (i = 0; i < nsamples; i++)
3364             {
3365                 nhists[i]        = 0;
3366                 npts[i]          = 0;
3367                 lambdas[i]       = NULL;
3368                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3369                                             ones contain values */
3370             }
3371         }
3372
3373         /* and read them */
3374         k = 0; /* counter for the lambdas, etc. arrays */
3375         for (i = 0; i < fr->nblock; i++)
3376         {
3377             if (fr->block[i].id == enxDH)
3378             {
3379                 int type = (fr->block[i].sub[0].ival[0]);
3380                 if (type == dhbtDH || type == dhbtDHDL)
3381                 {
3382                     int ndu;
3383                     read_edr_rawdh_block(&(samples_rawdh[k]),
3384                                          &ndu,
3385                                          &(fr->block[i]),
3386                                          start_time, delta_time,
3387                                          native_lambda, rtemp,
3388                                          &last_t, fn);
3389                     npts[k] += ndu;
3390                     if (samples_rawdh[k])
3391                     {
3392                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3393                     }
3394                     k++;
3395                 }
3396             }
3397             else if (fr->block[i].id == enxDHHIST)
3398             {
3399                 int type = (int)(fr->block[i].sub[1].lval[1]);
3400                 if (type == dhbtDH || type == dhbtDHDL)
3401                 {
3402                     int        j;
3403                     int        nb = 0;
3404                     samples_t *s; /* this is where the data will go */
3405                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3406                                             start_time, delta_time,
3407                                             native_lambda, rtemp,
3408                                             &last_t, fn);
3409                     nhists[k] += nb;
3410                     if (nb > 0)
3411                     {
3412                         lambdas[k] = s->foreign_lambda;
3413                     }
3414                     k++;
3415                     /* and insert the new sample immediately */
3416                     for (j = 0; j < nb; j++)
3417                     {
3418                         lambda_data_list_insert_sample(sd->lb, s+j);
3419                     }
3420                 }
3421             }
3422         }
3423     }
3424     /* Now store all our extant sample collections */
3425     for (i = 0; i < nsamples; i++)
3426     {
3427         if (samples_rawdh[i])
3428         {
3429             /* insert it into the existing list */
3430             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3431         }
3432     }
3433
3434
3435     {
3436         char buf[STRLEN];
3437         printf("\n");
3438         lambda_vec_print(native_lambda, buf, FALSE);
3439         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3440                fn, first_t, last_t, buf);
3441         for (i = 0; i < nsamples; i++)
3442         {
3443             if (lambdas[i])
3444             {
3445                 lambda_vec_print(lambdas[i], buf, TRUE);
3446                 if (nhists[i] > 0)
3447                 {
3448                     printf("        %s (%d hists)\n", buf, nhists[i]);
3449                 }
3450                 else
3451                 {
3452                     printf("        %s (%d pts)\n", buf, npts[i]);
3453                 }
3454             }
3455         }
3456     }
3457     printf("\n\n");
3458     sfree(npts);
3459     sfree(nhists);
3460     sfree(lambdas);
3461 }
3462
3463
3464 int gmx_bar(int argc, char *argv[])
3465 {
3466     static const char *desc[] = {
3467         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3468         "Bennett's acceptance ratio method (BAR). It also automatically",
3469         "adds series of individual free energies obtained with BAR into",
3470         "a combined free energy estimate.[PAR]",
3471
3472         "Every individual BAR free energy difference relies on two ",
3473         "simulations at different states: say state A and state B, as",
3474         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3475         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3476         "average of the Hamiltonian difference of state B given state A and",
3477         "vice versa.",
3478         "The energy differences to the other state must be calculated",
3479         "explicitly during the simulation. This can be done with",
3480         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3481
3482         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3483         "Two types of input files are supported:[BR]",
3484         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3485         "The files should have columns ",
3486         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3487         "The [GRK]lambda[grk] values are inferred ",
3488         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3489         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3490         "legends of Delta H",
3491         "[BR]",
3492         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3493         "[TT]-extp[tt] option for these files, it is assumed",
3494         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3495         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3496         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3497         "subtitle (if present), otherwise from a number in the subdirectory ",
3498         "in the file name.[PAR]",
3499
3500         "The [GRK]lambda[grk] of the simulation is parsed from ",
3501         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3502         "foreign [GRK]lambda[grk] values from the legend containing the ",
3503         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3504         "the legend line containing 'T ='.[PAR]",
3505
3506         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3507         "These can contain either lists of energy differences (see the ",
3508         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3509         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3510         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3511         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3512
3513         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3514         "the energy difference can also be extrapolated from the ",
3515         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3516         "option, which assumes that the system's Hamiltonian depends linearly",
3517         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3518
3519         "The free energy estimates are determined using BAR with bisection, ",
3520         "with the precision of the output set with [TT]-prec[tt]. ",
3521         "An error estimate taking into account time correlations ",
3522         "is made by splitting the data into blocks and determining ",
3523         "the free energy differences over those blocks and assuming ",
3524         "the blocks are independent. ",
3525         "The final error estimate is determined from the average variance ",
3526         "over 5 blocks. A range of block numbers for error estimation can ",
3527         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3528
3529         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3530         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3531         "samples. [BB]Note[bb] that when aggregating energy ",
3532         "differences/derivatives with different sampling intervals, this is ",
3533         "almost certainly not correct. Usually subsequent energies are ",
3534         "correlated and different time intervals mean different degrees ",
3535         "of correlation between samples.[PAR]",
3536
3537         "The results are split in two parts: the last part contains the final ",
3538         "results in kJ/mol, together with the error estimate for each part ",
3539         "and the total. The first part contains detailed free energy ",
3540         "difference estimates and phase space overlap measures in units of ",
3541         "kT (together with their computed error estimate). The printed ",
3542         "values are:[BR]",
3543         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3544         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3545         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3546         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3547         "[TT]*[tt]    s_A: an estimate of the relative entropy of A in B.[BR]",
3548         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3549
3550         "The relative entropy of both states in each other's ensemble can be ",
3551         "interpreted as a measure of phase space overlap: ",
3552         "the relative entropy s_A of the work samples of lambda_B in the ",
3553         "ensemble of lambda_A (and vice versa for s_B), is a ",
3554         "measure of the 'distance' between Boltzmann distributions of ",
3555         "the two states, that goes to zero for identical distributions. See ",
3556         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3557         "[PAR]",
3558         "The estimate of the expected per-sample standard deviation, as given ",
3559         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3560         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3561         "of the actual statistical error, because it assumes independent samples).[PAR]",
3562
3563         "To get a visual estimate of the phase space overlap, use the ",
3564         "[TT]-oh[tt] option to write series of histograms, together with the ",
3565         "[TT]-nbin[tt] option.[PAR]"
3566     };
3567     static real        begin    = 0, end = -1, temp = -1;
3568     int                nd       = 2, nbmin = 5, nbmax = 5;
3569     int                nbin     = 100;
3570     gmx_bool           use_dhdl = FALSE;
3571     gmx_bool           calc_s, calc_v;
3572     t_pargs            pa[] = {
3573         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3574         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3575         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3576         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3577         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3578         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3579         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3580         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3581     };
3582
3583     t_filenm           fnm[] = {
3584         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3585         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3586         { efXVG, "-o",  "bar",    ffOPTWR },
3587         { efXVG, "-oi", "barint", ffOPTWR },
3588         { efXVG, "-oh", "histogram", ffOPTWR }
3589     };
3590 #define NFILE asize(fnm)
3591
3592     int          f, i, j;
3593     int          nf = 0;    /* file counter */
3594     int          nbs;
3595     int          nfile_tot; /* total number of input files */
3596     int          nxvgfile = 0;
3597     int          nedrfile = 0;
3598     char       **fxvgnms;
3599     char       **fedrnms;
3600     sim_data_t   sim_data; /* the simulation data */
3601     barres_t    *results;  /* the results */
3602     int          nresults; /* number of results in results array */
3603
3604     double      *partsum;
3605     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3606     FILE        *fpb, *fpi;
3607     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3608     char         buf[STRLEN], buf2[STRLEN];
3609     char         ktformat[STRLEN], sktformat[STRLEN];
3610     char         kteformat[STRLEN], skteformat[STRLEN];
3611     output_env_t oenv;
3612     double       kT, beta;
3613     gmx_bool     result_OK = TRUE, bEE = TRUE;
3614
3615     gmx_bool     disc_err          = FALSE;
3616     double       sum_disc_err      = 0.; /* discretization error */
3617     gmx_bool     histrange_err     = FALSE;
3618     double       sum_histrange_err = 0.; /* histogram range error */
3619     double       stat_err          = 0.; /* statistical error */
3620
3621     parse_common_args(&argc, argv,
3622                       PCA_CAN_VIEW,
3623                       NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3624
3625     if (opt2bSet("-f", NFILE, fnm))
3626     {
3627         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3628     }
3629     if (opt2bSet("-g", NFILE, fnm))
3630     {
3631         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3632     }
3633
3634     sim_data_init(&sim_data);
3635 #if 0
3636     /* make linked list */
3637     lb = &lambda_head;
3638     lambda_data_init(lb, 0, 0);
3639     lb->next = lb;
3640     lb->prev = lb;
3641 #endif
3642
3643
3644     nfile_tot = nxvgfile + nedrfile;
3645
3646     if (nfile_tot == 0)
3647     {
3648         gmx_fatal(FARGS, "No input files!");
3649     }
3650
3651     if (nd < 0)
3652     {
3653         gmx_fatal(FARGS, "Can not have negative number of digits");
3654     }
3655     prec = pow(10, -nd);
3656
3657     snew(partsum, (nbmax+1)*(nbmax+1));
3658     nf = 0;
3659
3660     /* read in all files. First xvg files */
3661     for (f = 0; f < nxvgfile; f++)
3662     {
3663         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3664         nf++;
3665     }
3666     /* then .edr files */
3667     for (f = 0; f < nedrfile; f++)
3668     {
3669         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3670         nf++;
3671     }
3672
3673     /* fix the times to allow for equilibration */
3674     sim_data_impose_times(&sim_data, begin, end);
3675
3676     if (opt2bSet("-oh", NFILE, fnm))
3677     {
3678         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3679     }
3680
3681     /* assemble the output structures from the lambdas */
3682     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3683
3684     sum_disc_err = barres_list_max_disc_err(results, nresults);
3685
3686     if (nresults == 0)
3687     {
3688         printf("\nNo results to calculate.\n");
3689         return 0;
3690     }
3691
3692     if (sum_disc_err > prec)
3693     {
3694         prec = sum_disc_err;
3695         nd   = ceil(-log10(prec));
3696         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3697     }
3698
3699
3700     /*sprintf(lamformat,"%%6.3f");*/
3701     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3702     /* the format strings of the results in kT */
3703     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3704     sprintf( sktformat, "%%%ds", 6+nd);
3705     /* the format strings of the errors in kT */
3706     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3707     sprintf( skteformat, "%%%ds", 4+nd);
3708     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3709     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3710
3711
3712
3713     fpb = NULL;
3714     if (opt2bSet("-o", NFILE, fnm))
3715     {
3716         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3717         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3718                             "\\lambda", buf, exvggtXYDY, oenv);
3719     }
3720
3721     fpi = NULL;
3722     if (opt2bSet("-oi", NFILE, fnm))
3723     {
3724         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3725         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3726                        "\\lambda", buf, oenv);
3727     }
3728
3729
3730
3731     if (nbmin > nbmax)
3732     {
3733         nbmin = nbmax;
3734     }
3735
3736     /* first calculate results */
3737     bEE      = TRUE;
3738     disc_err = FALSE;
3739     for (f = 0; f < nresults; f++)
3740     {
3741         /* Determine the free energy difference with a factor of 10
3742          * more accuracy than requested for printing.
3743          */
3744         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3745                  &bEE, partsum);
3746
3747         if (results[f].dg_disc_err > prec/10.)
3748         {
3749             disc_err = TRUE;
3750         }
3751         if (results[f].dg_histrange_err > prec/10.)
3752         {
3753             histrange_err = TRUE;
3754         }
3755     }
3756
3757     /* print results in kT */
3758     kT   = BOLTZ*temp;
3759     beta = 1/kT;
3760
3761     printf("\nTemperature: %g K\n", temp);
3762
3763     printf("\nDetailed results in kT (see help for explanation):\n\n");
3764     printf("%6s ", " lam_A");
3765     printf("%6s ", " lam_B");
3766     printf(sktformat,  "DG ");
3767     if (bEE)
3768     {
3769         printf(skteformat, "+/- ");
3770     }
3771     if (disc_err)
3772     {
3773         printf(skteformat, "disc ");
3774     }
3775     if (histrange_err)
3776     {
3777         printf(skteformat, "range ");
3778     }
3779     printf(sktformat,  "s_A ");
3780     if (bEE)
3781     {
3782         printf(skteformat, "+/- " );
3783     }
3784     printf(sktformat,  "s_B ");
3785     if (bEE)
3786     {
3787         printf(skteformat, "+/- " );
3788     }
3789     printf(sktformat,  "stdev ");
3790     if (bEE)
3791     {
3792         printf(skteformat, "+/- ");
3793     }
3794     printf("\n");
3795     for (f = 0; f < nresults; f++)
3796     {
3797         lambda_vec_print_short(results[f].a->native_lambda, buf);
3798         printf("%s ", buf);
3799         lambda_vec_print_short(results[f].b->native_lambda, buf);
3800         printf("%s ", buf);
3801         printf(ktformat,  results[f].dg);
3802         printf(" ");
3803         if (bEE)
3804         {
3805             printf(kteformat, results[f].dg_err);
3806             printf(" ");
3807         }
3808         if (disc_err)
3809         {
3810             printf(kteformat, results[f].dg_disc_err);
3811             printf(" ");
3812         }
3813         if (histrange_err)
3814         {
3815             printf(kteformat, results[f].dg_histrange_err);
3816             printf(" ");
3817         }
3818         printf(ktformat,  results[f].sa);
3819         printf(" ");
3820         if (bEE)
3821         {
3822             printf(kteformat, results[f].sa_err);
3823             printf(" ");
3824         }
3825         printf(ktformat,  results[f].sb);
3826         printf(" ");
3827         if (bEE)
3828         {
3829             printf(kteformat, results[f].sb_err);
3830             printf(" ");
3831         }
3832         printf(ktformat,  results[f].dg_stddev);
3833         printf(" ");
3834         if (bEE)
3835         {
3836             printf(kteformat, results[f].dg_stddev_err);
3837         }
3838         printf("\n");
3839
3840         /* Check for negative relative entropy with a 95% certainty. */
3841         if (results[f].sa < -2*results[f].sa_err ||
3842             results[f].sb < -2*results[f].sb_err)
3843         {
3844             result_OK = FALSE;
3845         }
3846     }
3847
3848     if (!result_OK)
3849     {
3850         printf("\nWARNING: Some of these results violate the Second Law of "
3851                "Thermodynamics: \n"
3852                "         This is can be the result of severe undersampling, or "
3853                "(more likely)\n"
3854                "         there is something wrong with the simulations.\n");
3855     }
3856
3857
3858     /* final results in kJ/mol */
3859     printf("\n\nFinal results in kJ/mol:\n\n");
3860     dg_tot  = 0;
3861     for (f = 0; f < nresults; f++)
3862     {
3863
3864         if (fpi != NULL)
3865         {
3866             lambda_vec_print_short(results[f].a->native_lambda, buf);
3867             fprintf(fpi, xvg2format, buf, dg_tot);
3868         }
3869
3870
3871         if (fpb != NULL)
3872         {
3873             lambda_vec_print_intermediate(results[f].a->native_lambda,
3874                                           results[f].b->native_lambda,
3875                                           buf);
3876
3877             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3878         }
3879
3880         printf("point ");
3881         lambda_vec_print_short(results[f].a->native_lambda, buf);
3882         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3883         printf("%s - %s", buf, buf2);
3884         printf(",   DG ");
3885
3886         printf(dgformat, results[f].dg*kT);
3887         if (bEE)
3888         {
3889             printf(" +/- ");
3890             printf(dgformat, results[f].dg_err*kT);
3891         }
3892         if (histrange_err)
3893         {
3894             printf(" (max. range err. = ");
3895             printf(dgformat, results[f].dg_histrange_err*kT);
3896             printf(")");
3897             sum_histrange_err += results[f].dg_histrange_err*kT;
3898         }
3899
3900         printf("\n");
3901         dg_tot += results[f].dg;
3902     }
3903     printf("\n");
3904     printf("total ");
3905     lambda_vec_print_short(results[0].a->native_lambda, buf);
3906     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3907     printf("%s - %s", buf, buf2);
3908     printf(",   DG ");
3909
3910     printf(dgformat, dg_tot*kT);
3911     if (bEE)
3912     {
3913         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3914         printf(" +/- ");
3915         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3916     }
3917     printf("\n");
3918     if (disc_err)
3919     {
3920         printf("\nmaximum discretization error = ");
3921         printf(dgformat, sum_disc_err);
3922         if (bEE && stat_err < sum_disc_err)
3923         {
3924             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3925         }
3926     }
3927     if (histrange_err)
3928     {
3929         printf("\nmaximum histogram range error = ");
3930         printf(dgformat, sum_histrange_err);
3931         if (bEE && stat_err < sum_histrange_err)
3932         {
3933             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3934         }
3935
3936     }
3937     printf("\n");
3938
3939
3940     if (fpi != NULL)
3941     {
3942         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3943         fprintf(fpi, xvg2format, buf, dg_tot);
3944         ffclose(fpi);
3945     }
3946
3947     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3948     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3949
3950     thanx(stderr);
3951
3952     return 0;
3953 }