src/gromacs/gmxana/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <ctype.h>
  40 #include <float.h>
  41 #include <math.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44
  45 #include "typedefs.h"
  46 #include "gromacs/utility/smalloc.h"
  47 #include "gromacs/utility/futil.h"
  48 #include "gromacs/commandline/pargs.h"
  49 #include "macros.h"
  50 #include "gromacs/fileio/enxio.h"
  51 #include "physics.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "xvgr.h"
  54 #include "gmx_ana.h"
  55 #include "gromacs/math/utilities.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "names.h"
  58 #include "mdebin.h"
  59
  60
  61 /* Structure for the names of lambda vector components */
  62 typedef struct lambda_components_t
  63 {
  64     char **names;  /* Array of strings with names for the lambda vector
  65                       components */
  66     int    N;      /* The number of components */
  67     int    Nalloc; /* The number of allocated components */
  68 } lambda_components_t;
  69
  70 /* Structure for a lambda vector or a dhdl derivative direction */
  71 typedef struct lambda_vec_t
  72 {
  73     double                    *val;   /* The lambda vector component values. Only valid if
  74                                          dhdl == -1 */
  75     int                        dhdl;  /* The coordinate index for the derivative described by this
  76                                          structure, or -1 */
  77     const lambda_components_t *lc;    /* the associated lambda_components
  78                                          structure */
  79     int                        index; /* The state number (init-lambda-state) of this lambda
  80                                          vector, if known. If not, it is set to -1 */
  81 } lambda_vec_t;
  82
  83 /* the dhdl.xvg data from a simulation */
  84 typedef struct xvg_t
  85 {
  86     const char   *filename;
  87     int           ftp;           /* file type */
  88     int           nset;          /* number of lambdas, including dhdl */
  89     int          *np;            /* number of data points (du or hists) per lambda */
  90     int           np_alloc;      /* number of points (du or hists) allocated */
  91     double        temp;          /* temperature */
  92     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  93     double       *t;             /* the times (of second index for y) */
  94     double      **y;             /* the dU values. y[0] holds the derivative, while
  95                                     further ones contain the energy differences between
  96                                     the native lambda and the 'foreign' lambdas. */
  97     lambda_vec_t  native_lambda; /* the native lambda */
  98
  99     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 100 } xvg_t;
 101
 102
 103 typedef struct hist_t
 104 {
 105     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 106     double          dx[2];                  /* the histogram spacing. The reverse
 107                                                dx is the negative of the forward dx.*/
 108     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 109                                                    point(s) as int */
 110
 111     int             nbin[2];                /* the (forward+reverse) number of bins */
 112     gmx_int64_t     sum;                    /* the total number of counts. Must be
 113                                                    the same for forward + reverse.  */
 114     int             nhist;                  /* number of hist datas (forward or reverse) */
 115
 116     double          start_time, delta_time; /* start time, end time of histogram */
 117 } hist_t;
 118
 119
 120 /* an aggregate of samples for partial free energy calculation */
 121 typedef struct samples_t
 122 {
 123     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 124     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 125     double        temp;           /* the temperature */
 126     gmx_bool      derivative;     /* whether this sample is a derivative */
 127
 128     /* The samples come either as either delta U lists: */
 129     int     ndu;                    /* the number of delta U samples */
 130     double *du;                     /* the delta u's */
 131     double *t;                      /* the times associated with those samples, or: */
 132     double  start_time, delta_time; /*start time and delta time for linear time*/
 133
 134     /* or as histograms: */
 135     hist_t *hist; /* a histogram */
 136
 137     /* allocation data: (not NULL for data 'owned' by this struct) */
 138     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 139     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 140     hist_t         *hist_alloc;          /* allocated hist */
 141
 142     gmx_int64_t     ntot;                /* total number of samples */
 143     const char     *filename;            /* the file name this sample comes from */
 144 } samples_t;
 145
 146 /* a sample range (start to end for du-style data, or boolean
 147     for both du-style data and histograms */
 148 typedef struct sample_range_t
 149 {
 150     int        start, end; /* start and end index for du style data */
 151     gmx_bool   use;        /* whether to use this sample */
 152
 153     samples_t *s;          /* the samples this range belongs to */
 154 } sample_range_t;
 155
 156
 157 /* a collection of samples for a partial free energy calculation
 158     (i.e. the collection of samples from one native lambda to one
 159     foreign lambda) */
 160 typedef struct sample_coll_t
 161 {
 162     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 163                                           in the histogram */
 164     lambda_vec_t   *foreign_lambda;    /* collection */
 165     double          temp;              /* the temperature */
 166
 167     int             nsamples;          /* the number of samples */
 168     samples_t     **s;                 /* the samples themselves */
 169     sample_range_t *r;                 /* the sample ranges */
 170     int             nsamples_alloc;    /* number of allocated samples */
 171
 172     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 173                                               this collection */
 174
 175     struct sample_coll_t *next, *prev; /* next and previous in the list */
 176 } sample_coll_t;
 177
 178 /* all the samples associated with a lambda point */
 179 typedef struct lambda_data_t
 180 {
 181     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 182     double                temp;        /* temperature */
 183
 184     sample_coll_t        *sc;          /* the samples */
 185
 186     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 187
 188     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 189 } lambda_data_t;
 190
 191 /* Top-level data structure of simulation data */
 192 typedef struct sim_data_t
 193 {
 194     lambda_data_t      *lb;      /* a lambda data linked list */
 195     lambda_data_t       lb_head; /* The head element of the linked list */
 196
 197     lambda_components_t lc;      /* the allowed components of the lambda
 198                                     vectors */
 199 } sim_data_t;
 200
 201 /* Top-level data structure with calculated values. */
 202 typedef struct {
 203     sample_coll_t *a, *b;            /* the simulation data */
 204
 205     double         dg;               /* the free energy difference */
 206     double         dg_err;           /* the free energy difference */
 207
 208     double         dg_disc_err;      /* discretization error */
 209     double         dg_histrange_err; /* histogram range error */
 210
 211     double         sa;               /* relative entropy of b in state a */
 212     double         sa_err;           /* error in sa */
 213     double         sb;               /* relative entropy of a in state b */
 214     double         sb_err;           /* error in sb */
 215
 216     double         dg_stddev;        /* expected dg stddev per sample */
 217     double         dg_stddev_err;    /* error in dg_stddev */
 218 } barres_t;
 219
 220
 221 /* Initialize a lambda_components structure */
 222 static void lambda_components_init(lambda_components_t *lc)
 223 {
 224     lc->N      = 0;
 225     lc->Nalloc = 2;
 226     snew(lc->names, lc->Nalloc);
 227 }
 228
 229 /* Add a component to a lambda_components structure */
 230 static void lambda_components_add(lambda_components_t *lc,
 231                                   const char *name, size_t name_length)
 232 {
 233     while (lc->N + 1 > lc->Nalloc)
 234     {
 235         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 236         srenew(lc->names, lc->Nalloc);
 237     }
 238     snew(lc->names[lc->N], name_length+1);
 239     strncpy(lc->names[lc->N], name, name_length);
 240     lc->N++;
 241 }
 242
 243 /* check whether a component with index 'index' matches the given name, or
 244    is also NULL. Returns TRUE if this is the case.
 245    the string name does not need to end */
 246 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 247                                         int                        index,
 248                                         const char                *name,
 249                                         size_t                     name_length)
 250 {
 251     size_t len;
 252     if (index >= lc->N)
 253     {
 254         return FALSE;
 255     }
 256     if (name == NULL && lc->names[index] == NULL)
 257     {
 258         return TRUE;
 259     }
 260     if ((name == NULL) != (lc->names[index] == NULL))
 261     {
 262         return FALSE;
 263     }
 264     len = strlen(lc->names[index]);
 265     if (len != name_length)
 266     {
 267         return FALSE;
 268     }
 269     if (strncmp(lc->names[index], name, name_length) == 0)
 270     {
 271         return TRUE;
 272     }
 273     return FALSE;
 274 }
 275
 276 /* Find the index of a given lambda component name, or -1 if not found */
 277 static int lambda_components_find(const lambda_components_t *lc,
 278                                   const char                *name,
 279                                   size_t                     name_length)
 280 {
 281     int i;
 282
 283     for (i = 0; i < lc->N; i++)
 284     {
 285         if (strncmp(lc->names[i], name, name_length) == 0)
 286         {
 287             return i;
 288         }
 289     }
 290     return -1;
 291 }
 292
 293
 294
 295 /* initialize a lambda vector */
 296 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 297 {
 298     snew(lv->val, lc->N);
 299     lv->index = -1;
 300     lv->dhdl  = -1;
 301     lv->lc    = lc;
 302 }
 303
 304 static void lambda_vec_destroy(lambda_vec_t *lv)
 305 {
 306     sfree(lv->val);
 307 }
 308
 309 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 310 {
 311     int i;
 312
 313     lambda_vec_init(lv, orig->lc);
 314     lv->dhdl  = orig->dhdl;
 315     lv->index = orig->index;
 316     for (i = 0; i < lv->lc->N; i++)
 317     {
 318         lv->val[i] = orig->val[i];
 319     }
 320 }
 321
 322 /* write a lambda vec to a preallocated string */
 323 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 324 {
 325     int    i;
 326     size_t np;
 327
 328     str[0] = 0; /* reset the string */
 329     if (lv->dhdl < 0)
 330     {
 331         if (named)
 332         {
 333             str += sprintf(str, "delta H to ");
 334         }
 335         if (lv->lc->N > 1)
 336         {
 337             str += sprintf(str, "(");
 338         }
 339         for (i = 0; i < lv->lc->N; i++)
 340         {
 341             str += sprintf(str, "%g", lv->val[i]);
 342             if (i < lv->lc->N-1)
 343             {
 344                 str += sprintf(str, ", ");
 345             }
 346         }
 347         if (lv->lc->N > 1)
 348         {
 349             str += sprintf(str, ")");
 350         }
 351     }
 352     else
 353     {
 354         /* this lambda vector describes a derivative */
 355         str += sprintf(str, "dH/dl");
 356         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 357         {
 358             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 359         }
 360     }
 361 }
 362
 363 /* write a shortened version of the lambda vec to a preallocated string */
 364 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 365 {
 366     int    i;
 367     size_t np;
 368
 369     if (lv->index >= 0)
 370     {
 371         sprintf(str, "%6d", lv->index);
 372     }
 373     else
 374     {
 375         if (lv->dhdl < 0)
 376         {
 377             sprintf(str, "%6.3f", lv->val[0]);
 378         }
 379         else
 380         {
 381             sprintf(str, "dH/dl[%d]", lv->dhdl);
 382         }
 383     }
 384 }
 385
 386 /* write an intermediate version of two lambda vecs to a preallocated string */
 387 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 388                                           const lambda_vec_t *b, char *str)
 389 {
 390     int    i;
 391     size_t np;
 392
 393     str[0] = 0;
 394     if ( (a->index >= 0) && (b->index >= 0) )
 395     {
 396         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 397     }
 398     else
 399     {
 400         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 401         {
 402             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 403         }
 404     }
 405 }
 406
 407
 408
 409 /* calculate the difference in lambda vectors: c = a-b.
 410    c must be initialized already, and a and b must describe non-derivative
 411    lambda points */
 412 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 413                             lambda_vec_t *c)
 414 {
 415     int i;
 416
 417     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 418     {
 419         gmx_fatal(FARGS,
 420                   "Trying to calculate the difference between derivatives instead of lambda points");
 421     }
 422     if ((a->lc != b->lc) || (a->lc != c->lc) )
 423     {
 424         gmx_fatal(FARGS,
 425                   "Trying to calculate the difference lambdas with differing basis set");
 426     }
 427     for (i = 0; i < a->lc->N; i++)
 428     {
 429         c->val[i] = a->val[i] - b->val[i];
 430     }
 431 }
 432
 433 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 434    a and b must describe non-derivative lambda points */
 435 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 436 {
 437     int    i;
 438     double ret = 0.;
 439
 440     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 441     {
 442         gmx_fatal(FARGS,
 443                   "Trying to calculate the difference between derivatives instead of lambda points");
 444     }
 445     if (a->lc != b->lc)
 446     {
 447         gmx_fatal(FARGS,
 448                   "Trying to calculate the difference lambdas with differing basis set");
 449     }
 450     for (i = 0; i < a->lc->N; i++)
 451     {
 452         double df = a->val[i] - b->val[i];
 453         ret += df*df;
 454     }
 455     return sqrt(ret);
 456 }
 457
 458
 459 /* check whether two lambda vectors are the same */
 460 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 461 {
 462     int i;
 463
 464     if (a->lc != b->lc)
 465     {
 466         return FALSE;
 467     }
 468     if (a->dhdl < 0)
 469     {
 470         for (i = 0; i < a->lc->N; i++)
 471         {
 472             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 473             {
 474                 return FALSE;
 475             }
 476         }
 477         return TRUE;
 478     }
 479     else
 480     {
 481         /* they're derivatives, so we check whether the indices match */
 482         return (a->dhdl == b->dhdl);
 483     }
 484 }
 485
 486 /* Compare the sort order of two foreign lambda vectors
 487
 488     returns 1 if a is 'bigger' than b,
 489     returns 0 if they're the same,
 490     returns -1 if a is 'smaller' than b.*/
 491 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 492                                        const lambda_vec_t *b)
 493 {
 494     int      i;
 495     double   norm_a    = 0, norm_b = 0;
 496     gmx_bool different = FALSE;
 497
 498     if (a->lc != b->lc)
 499     {
 500         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 501     }
 502     /* if either one has an index we sort based on that */
 503     if ((a->index >= 0) || (b->index >= 0))
 504     {
 505         if (a->index == b->index)
 506         {
 507             return 0;
 508         }
 509         return (a->index > b->index) ? 1 : -1;
 510     }
 511     if (a->dhdl >= 0 || b->dhdl >= 0)
 512     {
 513         /* lambda vectors that are derivatives always sort higher than those
 514            without derivatives */
 515         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 516         {
 517             return (a->dhdl >= 0) ? 1 : -1;
 518         }
 519         return a->dhdl > b->dhdl;
 520     }
 521
 522     /* neither has an index, so we can only sort on the lambda components,
 523        which is only valid if there is one component */
 524     for (i = 0; i < a->lc->N; i++)
 525     {
 526         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 527         {
 528             different = TRUE;
 529         }
 530         norm_a += a->val[i]*a->val[i];
 531         norm_b += b->val[i]*b->val[i];
 532     }
 533     if (!different)
 534     {
 535         return 0;
 536     }
 537     return norm_a > norm_b;
 538 }
 539
 540 /* Compare the sort order of two native lambda vectors
 541
 542     returns 1 if a is 'bigger' than b,
 543     returns 0 if they're the same,
 544     returns -1 if a is 'smaller' than b.*/
 545 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 546                                       const lambda_vec_t *b)
 547 {
 548     int i;
 549
 550     if (a->lc != b->lc)
 551     {
 552         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 553     }
 554     /* if either one has an index we sort based on that */
 555     if ((a->index >= 0) || (b->index >= 0))
 556     {
 557         if (a->index == b->index)
 558         {
 559             return 0;
 560         }
 561         return (a->index > b->index) ? 1 : -1;
 562     }
 563     /* neither has an index, so we can only sort on the lambda components,
 564        which is only valid if there is one component */
 565     if (a->lc->N > 1)
 566     {
 567         gmx_fatal(FARGS,
 568                   "Can't compare lambdas with no index and > 1 component");
 569     }
 570     if (a->dhdl >= 0 || b->dhdl >= 0)
 571     {
 572         gmx_fatal(FARGS,
 573                   "Can't compare native lambdas that are derivatives");
 574     }
 575     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 576     {
 577         return 0;
 578     }
 579     return a->val[0] > b->val[0] ? 1 : -1;
 580 }
 581
 582
 583
 584
 585 static void hist_init(hist_t *h, int nhist, int *nbin)
 586 {
 587     int i;
 588     if (nhist > 2)
 589     {
 590         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 591     }
 592     for (i = 0; i < nhist; i++)
 593     {
 594         snew(h->bin[i], nbin[i]);
 595         h->x0[i]      = 0;
 596         h->nbin[i]    = nbin[i];
 597         h->start_time = h->delta_time = 0;
 598         h->dx[i]      = 0;
 599     }
 600     h->sum   = 0;
 601     h->nhist = nhist;
 602 }
 603
 604 static void hist_destroy(hist_t *h)
 605 {
 606     sfree(h->bin);
 607 }
 608
 609
 610 static void xvg_init(xvg_t *ba)
 611 {
 612     ba->filename = NULL;
 613     ba->nset     = 0;
 614     ba->np_alloc = 0;
 615     ba->np       = NULL;
 616     ba->y        = NULL;
 617 }
 618
 619 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 620                          lambda_vec_t *foreign_lambda, double temp,
 621                          gmx_bool derivative, const char *filename)
 622 {
 623     s->native_lambda  = native_lambda;
 624     s->foreign_lambda = foreign_lambda;
 625     s->temp           = temp;
 626     s->derivative     = derivative;
 627
 628     s->ndu        = 0;
 629     s->du         = NULL;
 630     s->t          = NULL;
 631     s->start_time = s->delta_time = 0;
 632     s->hist       = NULL;
 633     s->du_alloc   = NULL;
 634     s->t_alloc    = NULL;
 635     s->hist_alloc = NULL;
 636     s->ndu_alloc  = 0;
 637     s->nt_alloc   = 0;
 638
 639     s->ntot     = 0;
 640     s->filename = filename;
 641 }
 642
 643 static void sample_range_init(sample_range_t *r, samples_t *s)
 644 {
 645     r->start = 0;
 646     r->end   = s->ndu;
 647     r->use   = TRUE;
 648     r->s     = NULL;
 649 }
 650
 651 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 652                              lambda_vec_t *foreign_lambda, double temp)
 653 {
 654     sc->native_lambda  = native_lambda;
 655     sc->foreign_lambda = foreign_lambda;
 656     sc->temp           = temp;
 657
 658     sc->nsamples       = 0;
 659     sc->s              = NULL;
 660     sc->r              = NULL;
 661     sc->nsamples_alloc = 0;
 662
 663     sc->ntot = 0;
 664     sc->next = sc->prev = NULL;
 665 }
 666
 667 static void sample_coll_destroy(sample_coll_t *sc)
 668 {
 669     /* don't free the samples themselves */
 670     sfree(sc->r);
 671     sfree(sc->s);
 672 }
 673
 674
 675 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 676                              double temp)
 677 {
 678     l->lambda = native_lambda;
 679     l->temp   = temp;
 680
 681     l->next = NULL;
 682     l->prev = NULL;
 683
 684     l->sc = &(l->sc_head);
 685
 686     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 687     l->sc->next = l->sc;
 688     l->sc->prev = l->sc;
 689 }
 690
 691 static void barres_init(barres_t *br)
 692 {
 693     br->dg            = 0;
 694     br->dg_err        = 0;
 695     br->sa            = 0;
 696     br->sa_err        = 0;
 697     br->sb            = 0;
 698     br->sb_err        = 0;
 699     br->dg_stddev     = 0;
 700     br->dg_stddev_err = 0;
 701
 702     br->a = NULL;
 703     br->b = NULL;
 704 }
 705
 706
 707 /* calculate the total number of samples in a sample collection */
 708 static void sample_coll_calc_ntot(sample_coll_t *sc)
 709 {
 710     int i;
 711
 712     sc->ntot = 0;
 713     for (i = 0; i < sc->nsamples; i++)
 714     {
 715         if (sc->r[i].use)
 716         {
 717             if (sc->s[i]->hist)
 718             {
 719                 sc->ntot += sc->s[i]->ntot;
 720             }
 721             else
 722             {
 723                 sc->ntot += sc->r[i].end - sc->r[i].start;
 724             }
 725         }
 726     }
 727 }
 728
 729
 730 /* find the barsamples_t associated with a lambda that corresponds to
 731    a specific foreign lambda */
 732 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 733                                                    lambda_vec_t  *foreign_lambda)
 734 {
 735     sample_coll_t *sc = l->sc->next;
 736
 737     while (sc != l->sc)
 738     {
 739         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 740         {
 741             return sc;
 742         }
 743         sc = sc->next;
 744     }
 745
 746     return NULL;
 747 }
 748
 749 /* insert li into an ordered list of lambda_colls */
 750 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 751 {
 752     sample_coll_t *scn = l->sc->next;
 753     while ( (scn != l->sc) )
 754     {
 755         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 756         {
 757             break;
 758         }
 759         scn = scn->next;
 760     }
 761     /* now insert it before the found scn */
 762     sc->next        = scn;
 763     sc->prev        = scn->prev;
 764     scn->prev->next = sc;
 765     scn->prev       = sc;
 766 }
 767
 768 /* insert li into an ordered list of lambdas */
 769 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 770 {
 771     lambda_data_t *lc = head->next;
 772     while (lc != head)
 773     {
 774         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 775         {
 776             break;
 777         }
 778         lc = lc->next;
 779     }
 780     /* now insert ourselves before the found lc */
 781     li->next       = lc;
 782     li->prev       = lc->prev;
 783     lc->prev->next = li;
 784     lc->prev       = li;
 785 }
 786
 787 /* insert a sample and a sample_range into a sample_coll. The
 788     samples are stored as a pointer, the range is copied. */
 789 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 790                                       sample_range_t *r)
 791 {
 792     /* first check if it belongs here */
 793     if (sc->temp != s->temp)
 794     {
 795         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 796                   s->filename, sc->next->s[0]->filename);
 797     }
 798     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 799     {
 800         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 801                   s->filename, sc->next->s[0]->filename);
 802     }
 803     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 804     {
 805         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 806                   s->filename, sc->next->s[0]->filename);
 807     }
 808
 809     /* check if there's room */
 810     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 811     {
 812         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 813         srenew(sc->s, sc->nsamples_alloc);
 814         srenew(sc->r, sc->nsamples_alloc);
 815     }
 816     sc->s[sc->nsamples] = s;
 817     sc->r[sc->nsamples] = *r;
 818     sc->nsamples++;
 819
 820     sample_coll_calc_ntot(sc);
 821 }
 822
 823 /* insert a sample into a lambda_list, creating the right sample_coll if
 824    neccesary */
 825 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 826 {
 827     gmx_bool       found = FALSE;
 828     sample_coll_t *sc;
 829     sample_range_t r;
 830
 831     lambda_data_t *l = head->next;
 832
 833     /* first search for the right lambda_data_t */
 834     while (l != head)
 835     {
 836         if (lambda_vec_same(l->lambda, s->native_lambda) )
 837         {
 838             found = TRUE;
 839             break;
 840         }
 841         l = l->next;
 842     }
 843
 844     if (!found)
 845     {
 846         snew(l, 1);                                     /* allocate a new one */
 847         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 848         lambda_data_insert_lambda(head, l);             /* add it to the list */
 849     }
 850
 851     /* now look for a sample collection */
 852     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 853     if (!sc)
 854     {
 855         snew(sc, 1); /* allocate a new one */
 856         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 857         lambda_data_insert_sample_coll(l, sc);
 858     }
 859
 860     /* now insert the samples into the sample coll */
 861     sample_range_init(&r, s);
 862     sample_coll_insert_sample(sc, s, &r);
 863 }
 864
 865
 866 /* make a histogram out of a sample collection */
 867 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 868                                   int *nbin_alloc, int *nbin,
 869                                   double *dx, double *xmin, int nbin_default)
 870 {
 871     int      i, j, k;
 872     gmx_bool dx_set   = FALSE;
 873     gmx_bool xmin_set = FALSE;
 874
 875     gmx_bool xmax_set      = FALSE;
 876     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 877                                        limits of a histogram */
 878     double   xmax          = -1;
 879
 880     /* first determine dx and xmin; try the histograms */
 881     for (i = 0; i < sc->nsamples; i++)
 882     {
 883         if (sc->s[i]->hist)
 884         {
 885             hist_t *hist = sc->s[i]->hist;
 886             for (k = 0; k < hist->nhist; k++)
 887             {
 888                 double hdx      = hist->dx[k];
 889                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 890
 891                 /* we use the biggest dx*/
 892                 if ( (!dx_set) || hist->dx[0] > *dx)
 893                 {
 894                     dx_set = TRUE;
 895                     *dx    = hist->dx[0];
 896                 }
 897                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 898                 {
 899                     xmin_set = TRUE;
 900                     *xmin    = (hist->x0[k]*hdx);
 901                 }
 902
 903                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 904                 {
 905                     xmax_set = TRUE;
 906                     xmax     = xmax_now;
 907                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 908                     {
 909                         xmax_set_hard = TRUE;
 910                     }
 911                 }
 912                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 913                 {
 914                     xmax_set_hard = TRUE;
 915                     xmax          = xmax_now;
 916                 }
 917             }
 918         }
 919     }
 920     /* and the delta us */
 921     for (i = 0; i < sc->nsamples; i++)
 922     {
 923         if (sc->s[i]->ndu > 0)
 924         {
 925             /* determine min and max */
 926             int    starti  = sc->r[i].start;
 927             int    endi    = sc->r[i].end;
 928             double du_xmin = sc->s[i]->du[starti];
 929             double du_xmax = sc->s[i]->du[starti];
 930             for (j = starti+1; j < endi; j++)
 931             {
 932                 if (sc->s[i]->du[j] < du_xmin)
 933                 {
 934                     du_xmin = sc->s[i]->du[j];
 935                 }
 936                 if (sc->s[i]->du[j] > du_xmax)
 937                 {
 938                     du_xmax = sc->s[i]->du[j];
 939                 }
 940             }
 941
 942             /* and now change the limits */
 943             if ( (!xmin_set) || (du_xmin < *xmin) )
 944             {
 945                 xmin_set = TRUE;
 946                 *xmin    = du_xmin;
 947             }
 948             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 949             {
 950                 xmax_set = TRUE;
 951                 xmax     = du_xmax;
 952             }
 953         }
 954     }
 955
 956     if (!xmax_set || !xmin_set)
 957     {
 958         *nbin = 0;
 959         return;
 960     }
 961
 962
 963     if (!dx_set)
 964     {
 965         *nbin = nbin_default;
 966         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 967                                                be 0, and we count from 0 */
 968     }
 969     else
 970     {
 971         *nbin = (xmax-(*xmin))/(*dx);
 972     }
 973
 974     if (*nbin > *nbin_alloc)
 975     {
 976         *nbin_alloc = *nbin;
 977         srenew(*bin, *nbin_alloc);
 978     }
 979
 980     /* reset the histogram */
 981     for (i = 0; i < (*nbin); i++)
 982     {
 983         (*bin)[i] = 0;
 984     }
 985
 986     /* now add the actual data */
 987     for (i = 0; i < sc->nsamples; i++)
 988     {
 989         if (sc->s[i]->hist)
 990         {
 991             hist_t *hist = sc->s[i]->hist;
 992             for (k = 0; k < hist->nhist; k++)
 993             {
 994                 double hdx       = hist->dx[k];
 995                 double xmin_hist = hist->x0[k]*hdx;
 996                 for (j = 0; j < hist->nbin[k]; j++)
 997                 {
 998                     /* calculate the bin corresponding to the middle of the
 999                        original bin */
1000                     double x     = hdx*(j+0.5) + xmin_hist;
1001                     int    binnr = (int)((x-(*xmin))/(*dx));
1002
1003                     if (binnr >= *nbin || binnr < 0)
1004                     {
1005                         binnr = (*nbin)-1;
1006                     }
1007
1008                     (*bin)[binnr] += hist->bin[k][j];
1009                 }
1010             }
1011         }
1012         else
1013         {
1014             int starti = sc->r[i].start;
1015             int endi   = sc->r[i].end;
1016             for (j = starti; j < endi; j++)
1017             {
1018                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1019                 if (binnr >= *nbin || binnr < 0)
1020                 {
1021                     binnr = (*nbin)-1;
1022                 }
1023
1024                 (*bin)[binnr]++;
1025             }
1026         }
1027     }
1028 }
1029
1030 /* write a collection of histograms to a file */
1031 void sim_data_histogram(sim_data_t *sd, const char *filename,
1032                         int nbin_default, const output_env_t oenv)
1033 {
1034     char           label_x[STRLEN];
1035     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1036     const char    *title   = "N(\\DeltaH)";
1037     const char    *label_y = "Samples";
1038     FILE          *fp;
1039     lambda_data_t *bl;
1040     int            nsets     = 0;
1041     char         **setnames  = NULL;
1042     gmx_bool       first_set = FALSE;
1043     /* histogram data: */
1044     int           *hist       = NULL;
1045     int            nbin       = 0;
1046     int            nbin_alloc = 0;
1047     double         dx         = 0;
1048     double         min        = 0;
1049     int            i;
1050     lambda_data_t *bl_head = sd->lb;
1051
1052     printf("\nWriting histogram to %s\n", filename);
1053     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1054
1055     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1056
1057     /* first get all the set names */
1058     bl = bl_head->next;
1059     /* iterate over all lambdas */
1060     while (bl != bl_head)
1061     {
1062         sample_coll_t *sc = bl->sc->next;
1063
1064         /* iterate over all samples */
1065         while (sc != bl->sc)
1066         {
1067             char buf[STRLEN], buf2[STRLEN];
1068
1069             nsets++;
1070             srenew(setnames, nsets);
1071             snew(setnames[nsets-1], STRLEN);
1072             if (sc->foreign_lambda->dhdl < 0)
1073             {
1074                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1075                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1076                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1077                         deltag, lambda, buf2, lambda, buf);
1078             }
1079             else
1080             {
1081                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1082                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1083                         dhdl, lambda, buf);
1084             }
1085             sc = sc->next;
1086         }
1087
1088         bl = bl->next;
1089     }
1090     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1091
1092
1093     /* now make the histograms */
1094     bl = bl_head->next;
1095     /* iterate over all lambdas */
1096     while (bl != bl_head)
1097     {
1098         sample_coll_t *sc = bl->sc->next;
1099
1100         /* iterate over all samples */
1101         while (sc != bl->sc)
1102         {
1103             if (!first_set)
1104             {
1105                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1106             }
1107
1108             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1109                                   nbin_default);
1110
1111             for (i = 0; i < nbin; i++)
1112             {
1113                 double xmin = i*dx + min;
1114                 double xmax = (i+1)*dx + min;
1115
1116                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1117             }
1118
1119             first_set = FALSE;
1120             sc        = sc->next;
1121         }
1122
1123         bl = bl->next;
1124     }
1125
1126     if (hist)
1127     {
1128         sfree(hist);
1129     }
1130
1131     xvgrclose(fp);
1132 }
1133
1134 /* create a collection (array) of barres_t object given a ordered linked list
1135    of barlamda_t sample collections */
1136 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1137                                     gmx_bool use_dhdl)
1138 {
1139     lambda_data_t *bl;
1140     int            nlambda = 0;
1141     barres_t      *res;
1142     int            i;
1143     gmx_bool       dhdl    = FALSE;
1144     gmx_bool       first   = TRUE;
1145     lambda_data_t *bl_head = sd->lb;
1146
1147     /* first count the lambdas */
1148     bl = bl_head->next;
1149     while (bl != bl_head)
1150     {
1151         nlambda++;
1152         bl = bl->next;
1153     }
1154     snew(res, nlambda-1);
1155
1156     /* next put the right samples in the res */
1157     *nres = 0;
1158     bl    = bl_head->next->next; /* we start with the second one. */
1159     while (bl != bl_head)
1160     {
1161         sample_coll_t *sc, *scprev;
1162         barres_t      *br = &(res[*nres]);
1163         /* there is always a previous one. we search for that as a foreign
1164            lambda: */
1165         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1166         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1167
1168         barres_init(br);
1169
1170         if (use_dhdl)
1171         {
1172             /* we use dhdl */
1173
1174             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1175             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1176
1177             if (first)
1178             {
1179                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1180                 dhdl = TRUE;
1181             }
1182             if (!dhdl)
1183             {
1184                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1185             }
1186         }
1187         else if (!scprev && !sc)
1188         {
1189             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1190         }
1191
1192         /* normal delta H */
1193         if (!scprev)
1194         {
1195             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1196         }
1197         if (!sc)
1198         {
1199             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1200         }
1201         br->a = scprev;
1202         br->b = sc;
1203
1204         first = FALSE;
1205         (*nres)++;
1206         bl = bl->next;
1207     }
1208     return res;
1209 }
1210
1211 /* estimate the maximum discretization error */
1212 static double barres_list_max_disc_err(barres_t *res, int nres)
1213 {
1214     int    i, j;
1215     double disc_err = 0.;
1216     double delta_lambda;
1217
1218     for (i = 0; i < nres; i++)
1219     {
1220         barres_t *br = &(res[i]);
1221
1222         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1223                                            br->a->native_lambda);
1224
1225         for (j = 0; j < br->a->nsamples; j++)
1226         {
1227             if (br->a->s[j]->hist)
1228             {
1229                 double Wfac = 1.;
1230                 if (br->a->s[j]->derivative)
1231                 {
1232                     Wfac =  delta_lambda;
1233                 }
1234
1235                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1236             }
1237         }
1238         for (j = 0; j < br->b->nsamples; j++)
1239         {
1240             if (br->b->s[j]->hist)
1241             {
1242                 double Wfac = 1.;
1243                 if (br->b->s[j]->derivative)
1244                 {
1245                     Wfac =  delta_lambda;
1246                 }
1247                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1248             }
1249         }
1250     }
1251     return disc_err;
1252 }
1253
1254
1255 /* impose start and end times on a sample collection, updating sample_ranges */
1256 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1257                                      double end_t)
1258 {
1259     int i;
1260     for (i = 0; i < sc->nsamples; i++)
1261     {
1262         samples_t      *s = sc->s[i];
1263         sample_range_t *r = &(sc->r[i]);
1264         if (s->hist)
1265         {
1266             double end_time = s->hist->delta_time*s->hist->sum +
1267                 s->hist->start_time;
1268             if (s->hist->start_time < begin_t || end_time > end_t)
1269             {
1270                 r->use = FALSE;
1271             }
1272         }
1273         else
1274         {
1275             if (!s->t)
1276             {
1277                 double end_time;
1278                 if (s->start_time < begin_t)
1279                 {
1280                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1281                 }
1282                 end_time = s->delta_time*s->ndu + s->start_time;
1283                 if (end_time > end_t)
1284                 {
1285                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1286                 }
1287             }
1288             else
1289             {
1290                 int j;
1291                 for (j = 0; j < s->ndu; j++)
1292                 {
1293                     if (s->t[j] < begin_t)
1294                     {
1295                         r->start = j;
1296                     }
1297
1298                     if (s->t[j] >= end_t)
1299                     {
1300                         r->end = j;
1301                         break;
1302                     }
1303                 }
1304             }
1305             if (r->start > r->end)
1306             {
1307                 r->use = FALSE;
1308             }
1309         }
1310     }
1311     sample_coll_calc_ntot(sc);
1312 }
1313
1314 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1315 {
1316     double         first_t, last_t;
1317     double         begin_t, end_t;
1318     lambda_data_t *lc;
1319     lambda_data_t *head = sd->lb;
1320     int            j;
1321
1322     if (begin <= 0 && end < 0)
1323     {
1324         return;
1325     }
1326
1327     /* first determine the global start and end times */
1328     first_t = -1;
1329     last_t  = -1;
1330     lc      = head->next;
1331     while (lc != head)
1332     {
1333         sample_coll_t *sc = lc->sc->next;
1334         while (sc != lc->sc)
1335         {
1336             for (j = 0; j < sc->nsamples; j++)
1337             {
1338                 double start_t, end_t;
1339
1340                 start_t = sc->s[j]->start_time;
1341                 end_t   =   sc->s[j]->start_time;
1342                 if (sc->s[j]->hist)
1343                 {
1344                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1345                 }
1346                 else
1347                 {
1348                     if (sc->s[j]->t)
1349                     {
1350                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1351                     }
1352                     else
1353                     {
1354                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1355                     }
1356                 }
1357
1358                 if (start_t < first_t || first_t < 0)
1359                 {
1360                     first_t = start_t;
1361                 }
1362                 if (end_t > last_t)
1363                 {
1364                     last_t = end_t;
1365                 }
1366             }
1367             sc = sc->next;
1368         }
1369         lc = lc->next;
1370     }
1371
1372     /* calculate the actual times */
1373     if (begin > 0)
1374     {
1375         begin_t = begin;
1376     }
1377     else
1378     {
1379         begin_t = first_t;
1380     }
1381
1382     if (end > 0)
1383     {
1384         end_t = end;
1385     }
1386     else
1387     {
1388         end_t = last_t;
1389     }
1390     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1391
1392     if (begin_t > end_t)
1393     {
1394         return;
1395     }
1396     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1397
1398     /* then impose them */
1399     lc = head->next;
1400     while (lc != head)
1401     {
1402         sample_coll_t *sc = lc->sc->next;
1403         while (sc != lc->sc)
1404         {
1405             sample_coll_impose_times(sc, begin_t, end_t);
1406             sc = sc->next;
1407         }
1408         lc = lc->next;
1409     }
1410 }
1411
1412
1413 /* create subsample i out of ni from an existing sample_coll */
1414 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1415                                              sample_coll_t *sc_orig,
1416                                              int i, int ni)
1417 {
1418     int             j;
1419     int             hist_start, hist_end;
1420
1421     gmx_int64_t     ntot_start;
1422     gmx_int64_t     ntot_end;
1423     gmx_int64_t     ntot_so_far;
1424
1425     *sc = *sc_orig; /* just copy all fields */
1426
1427     /* allocate proprietary memory */
1428     snew(sc->s, sc_orig->nsamples);
1429     snew(sc->r, sc_orig->nsamples);
1430
1431     /* copy the samples */
1432     for (j = 0; j < sc_orig->nsamples; j++)
1433     {
1434         sc->s[j] = sc_orig->s[j];
1435         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1436     }
1437
1438     /* now fix start and end fields */
1439     /* the casts avoid possible overflows */
1440     ntot_start  = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1441     ntot_end    = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1442     ntot_so_far = 0;
1443     for (j = 0; j < sc->nsamples; j++)
1444     {
1445         gmx_int64_t ntot_add;
1446         gmx_int64_t new_start, new_end;
1447
1448         if (sc->r[j].use)
1449         {
1450             if (sc->s[j]->hist)
1451             {
1452                 ntot_add = sc->s[j]->hist->sum;
1453             }
1454             else
1455             {
1456                 ntot_add = sc->r[j].end - sc->r[j].start;
1457             }
1458         }
1459         else
1460         {
1461             ntot_add = 0;
1462         }
1463
1464         if (!sc->s[j]->hist)
1465         {
1466             if (ntot_so_far < ntot_start)
1467             {
1468                 /* adjust starting point */
1469                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1470             }
1471             else
1472             {
1473                 new_start = sc->r[j].start;
1474             }
1475             /* adjust end point */
1476             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1477             if (new_end > sc->r[j].end)
1478             {
1479                 new_end = sc->r[j].end;
1480             }
1481
1482             /* check if we're in range at all */
1483             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1484             {
1485                 new_start = 0;
1486                 new_end   = 0;
1487             }
1488             /* and write the new range */
1489             sc->r[j].start = (int)new_start;
1490             sc->r[j].end   = (int)new_end;
1491         }
1492         else
1493         {
1494             if (sc->r[j].use)
1495             {
1496                 double overlap;
1497                 double ntot_start_norm, ntot_end_norm;
1498                 /* calculate the amount of overlap of the
1499                    desired range (ntot_start -- ntot_end) onto
1500                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1501
1502                 /* first calculate normalized bounds
1503                    (where 0 is the start of the hist range, and 1 the end) */
1504                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1505                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1506
1507                 /* now fix the boundaries */
1508                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1509                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1510
1511                 /* and calculate the overlap */
1512                 overlap = ntot_end_norm - ntot_start_norm;
1513
1514                 if (overlap > 0.95) /* we allow for 5% slack */
1515                 {
1516                     sc->r[j].use = TRUE;
1517                 }
1518                 else if (overlap < 0.05)
1519                 {
1520                     sc->r[j].use = FALSE;
1521                 }
1522                 else
1523                 {
1524                     return FALSE;
1525                 }
1526             }
1527         }
1528         ntot_so_far += ntot_add;
1529     }
1530     sample_coll_calc_ntot(sc);
1531
1532     return TRUE;
1533 }
1534
1535 /* calculate minimum and maximum work values in sample collection */
1536 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1537                                 double *Wmin, double *Wmax)
1538 {
1539     int i, j;
1540
1541     *Wmin = FLT_MAX;
1542     *Wmax = -FLT_MAX;
1543
1544     for (i = 0; i < sc->nsamples; i++)
1545     {
1546         samples_t      *s = sc->s[i];
1547         sample_range_t *r = &(sc->r[i]);
1548         if (r->use)
1549         {
1550             if (!s->hist)
1551             {
1552                 for (j = r->start; j < r->end; j++)
1553                 {
1554                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1555                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1556                 }
1557             }
1558             else
1559             {
1560                 int    hd = 0; /* determine the histogram direction: */
1561                 double dx;
1562                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1563                 {
1564                     hd = 1;
1565                 }
1566                 dx = s->hist->dx[hd];
1567
1568                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1569                 {
1570                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1571                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1572                     /* look for the highest value bin with values */
1573                     if (s->hist->bin[hd][j] > 0)
1574                     {
1575                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1576                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1577                         break;
1578                     }
1579                 }
1580             }
1581         }
1582     }
1583 }
1584
1585 /* Initialize a sim_data structure */
1586 static void sim_data_init(sim_data_t *sd)
1587 {
1588     /* make linked list */
1589     sd->lb       = &(sd->lb_head);
1590     sd->lb->next = sd->lb;
1591     sd->lb->prev = sd->lb;
1592
1593     lambda_components_init(&(sd->lc));
1594 }
1595
1596
1597 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1598 {
1599     int    i;
1600     double sum;
1601
1602     sum = 0;
1603
1604     for (i = 0; i < n; i++)
1605     {
1606         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1607     }
1608
1609     return sum;
1610 }
1611
1612 /* calculate the BAR average given a histogram
1613
1614     if type== 0, calculate the best estimate for the average,
1615     if type==-1, calculate the minimum possible value given the histogram
1616     if type== 1, calculate the maximum possible value given the histogram */
1617 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1618                                 int type)
1619 {
1620     double sum = 0.;
1621     int    i;
1622     int    max;
1623     /* normalization factor multiplied with bin width and
1624        number of samples (we normalize through M): */
1625     double normdx = 1.;
1626     int    hd     = 0; /* determine the histogram direction: */
1627     double dx;
1628
1629     if ( (hist->nhist > 1) && (Wfac < 0) )
1630     {
1631         hd = 1;
1632     }
1633     dx  = hist->dx[hd];
1634     max = hist->nbin[hd]-1;
1635     if (type == 1)
1636     {
1637         max = hist->nbin[hd]; /* we also add whatever was out of range */
1638     }
1639
1640     for (i = 0; i < max; i++)
1641     {
1642         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1643         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1644
1645         sum += pxdx/(1. + exp(x + sbMmDG));
1646     }
1647
1648     return sum;
1649 }
1650
1651 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1652                                 double temp, double tol, int type)
1653 {
1654     double kT, beta, M;
1655     double DG;
1656     int    i, j;
1657     double Wfac1, Wfac2, Wmin, Wmax;
1658     double DG0, DG1, DG2, dDG1;
1659     double sum1, sum2;
1660     double n1, n2; /* numbers of samples as doubles */
1661
1662     kT   = BOLTZ*temp;
1663     beta = 1/kT;
1664
1665     /* count the numbers of samples */
1666     n1 = ca->ntot;
1667     n2 = cb->ntot;
1668
1669     M = log(n1/n2);
1670
1671     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1672     if (ca->foreign_lambda->dhdl < 0)
1673     {
1674         /* this is the case when the delta U were calculated directly
1675            (i.e. we're not scaling dhdl) */
1676         Wfac1 = beta;
1677         Wfac2 = beta;
1678     }
1679     else
1680     {
1681         /* we're using dhdl, so delta_lambda needs to be a
1682            multiplication factor.  */
1683         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1684         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1685                                                   ca->native_lambda);
1686         if (cb->native_lambda->lc->N > 1)
1687         {
1688             gmx_fatal(FARGS,
1689                       "Can't (yet) do multi-component dhdl interpolation");
1690         }
1691
1692         Wfac1 =  beta*delta_lambda;
1693         Wfac2 = -beta*delta_lambda;
1694     }
1695
1696     if (beta < 1)
1697     {
1698         /* We print the output both in kT and kJ/mol.
1699          * Here we determine DG in kT, so when beta < 1
1700          * the precision has to be increased.
1701          */
1702         tol *= beta;
1703     }
1704
1705     /* Calculate minimum and maximum work to give an initial estimate of
1706      * delta G  as their average.
1707      */
1708     {
1709         double Wmin1, Wmin2, Wmax1, Wmax2;
1710         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1711         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1712
1713         Wmin = min(Wmin1, Wmin2);
1714         Wmax = max(Wmax1, Wmax2);
1715     }
1716
1717     DG0 = Wmin;
1718     DG2 = Wmax;
1719
1720     if (debug)
1721     {
1722         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1723     }
1724     /* We approximate by bisection: given our initial estimates
1725        we keep checking whether the halfway point is greater or
1726        smaller than what we get out of the BAR averages.
1727
1728        For the comparison we can use twice the tolerance. */
1729     while (DG2 - DG0 > 2*tol)
1730     {
1731         DG1 = 0.5*(DG0 + DG2);
1732
1733         /* calculate the BAR averages */
1734         dDG1 = 0.;
1735
1736         for (i = 0; i < ca->nsamples; i++)
1737         {
1738             samples_t      *s = ca->s[i];
1739             sample_range_t *r = &(ca->r[i]);
1740             if (r->use)
1741             {
1742                 if (s->hist)
1743                 {
1744                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1745                 }
1746                 else
1747                 {
1748                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1749                                          Wfac1, (M-DG1));
1750                 }
1751             }
1752         }
1753         for (i = 0; i < cb->nsamples; i++)
1754         {
1755             samples_t      *s = cb->s[i];
1756             sample_range_t *r = &(cb->r[i]);
1757             if (r->use)
1758             {
1759                 if (s->hist)
1760                 {
1761                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1762                 }
1763                 else
1764                 {
1765                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1766                                          Wfac2, -(M-DG1));
1767                 }
1768             }
1769         }
1770
1771         if (dDG1 < 0)
1772         {
1773             DG0 = DG1;
1774         }
1775         else
1776         {
1777             DG2 = DG1;
1778         }
1779         if (debug)
1780         {
1781             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1782         }
1783     }
1784
1785     return 0.5*(DG0 + DG2);
1786 }
1787
1788 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1789                              double temp, double dg, double *sa, double *sb)
1790 {
1791     int    i, j;
1792     double W_ab = 0.;
1793     double W_ba = 0.;
1794     double kT, beta;
1795     double Wfac1, Wfac2;
1796     double n1, n2;
1797
1798     kT   = BOLTZ*temp;
1799     beta = 1/kT;
1800
1801     /* count the numbers of samples */
1802     n1 = ca->ntot;
1803     n2 = cb->ntot;
1804
1805     /* to ensure the work values are the same as during the delta_G */
1806     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1807     if (ca->foreign_lambda->dhdl < 0)
1808     {
1809         /* this is the case when the delta U were calculated directly
1810            (i.e. we're not scaling dhdl) */
1811         Wfac1 = beta;
1812         Wfac2 = beta;
1813     }
1814     else
1815     {
1816         /* we're using dhdl, so delta_lambda needs to be a
1817            multiplication factor.  */
1818         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1819                                                   ca->native_lambda);
1820         Wfac1 =  beta*delta_lambda;
1821         Wfac2 = -beta*delta_lambda;
1822     }
1823
1824     /* first calculate the average work in both directions */
1825     for (i = 0; i < ca->nsamples; i++)
1826     {
1827         samples_t      *s = ca->s[i];
1828         sample_range_t *r = &(ca->r[i]);
1829         if (r->use)
1830         {
1831             if (!s->hist)
1832             {
1833                 for (j = r->start; j < r->end; j++)
1834                 {
1835                     W_ab += Wfac1*s->du[j];
1836                 }
1837             }
1838             else
1839             {
1840                 /* normalization factor multiplied with bin width and
1841                    number of samples (we normalize through M): */
1842                 double normdx = 1.;
1843                 double dx;
1844                 int    hd = 0; /* histogram direction */
1845                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1846                 {
1847                     hd = 1;
1848                 }
1849                 dx = s->hist->dx[hd];
1850
1851                 for (j = 0; j < s->hist->nbin[0]; j++)
1852                 {
1853                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1854                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1855                     W_ab += pxdx*x;
1856                 }
1857             }
1858         }
1859     }
1860     W_ab /= n1;
1861
1862     for (i = 0; i < cb->nsamples; i++)
1863     {
1864         samples_t      *s = cb->s[i];
1865         sample_range_t *r = &(cb->r[i]);
1866         if (r->use)
1867         {
1868             if (!s->hist)
1869             {
1870                 for (j = r->start; j < r->end; j++)
1871                 {
1872                     W_ba += Wfac1*s->du[j];
1873                 }
1874             }
1875             else
1876             {
1877                 /* normalization factor multiplied with bin width and
1878                    number of samples (we normalize through M): */
1879                 double normdx = 1.;
1880                 double dx;
1881                 int    hd = 0; /* histogram direction */
1882                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1883                 {
1884                     hd = 1;
1885                 }
1886                 dx = s->hist->dx[hd];
1887
1888                 for (j = 0; j < s->hist->nbin[0]; j++)
1889                 {
1890                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1891                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1892                     W_ba += pxdx*x;
1893                 }
1894             }
1895         }
1896     }
1897     W_ba /= n2;
1898
1899     /* then calculate the relative entropies */
1900     *sa = (W_ab - dg);
1901     *sb = (W_ba + dg);
1902 }
1903
1904 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1905                            double temp, double dg, double *stddev)
1906 {
1907     int    i, j;
1908     double M;
1909     double sigmafact = 0.;
1910     double kT, beta;
1911     double Wfac1, Wfac2;
1912     double n1, n2;
1913
1914     kT   = BOLTZ*temp;
1915     beta = 1/kT;
1916
1917     /* count the numbers of samples */
1918     n1 = ca->ntot;
1919     n2 = cb->ntot;
1920
1921     /* to ensure the work values are the same as during the delta_G */
1922     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1923     if (ca->foreign_lambda->dhdl < 0)
1924     {
1925         /* this is the case when the delta U were calculated directly
1926            (i.e. we're not scaling dhdl) */
1927         Wfac1 = beta;
1928         Wfac2 = beta;
1929     }
1930     else
1931     {
1932         /* we're using dhdl, so delta_lambda needs to be a
1933            multiplication factor.  */
1934         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1935                                                   ca->native_lambda);
1936         Wfac1 =  beta*delta_lambda;
1937         Wfac2 = -beta*delta_lambda;
1938     }
1939
1940     M = log(n1/n2);
1941
1942
1943     /* calculate average in both directions */
1944     for (i = 0; i < ca->nsamples; i++)
1945     {
1946         samples_t      *s = ca->s[i];
1947         sample_range_t *r = &(ca->r[i]);
1948         if (r->use)
1949         {
1950             if (!s->hist)
1951             {
1952                 for (j = r->start; j < r->end; j++)
1953                 {
1954                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1955                 }
1956             }
1957             else
1958             {
1959                 /* normalization factor multiplied with bin width and
1960                    number of samples (we normalize through M): */
1961                 double normdx = 1.;
1962                 double dx;
1963                 int    hd = 0; /* histogram direction */
1964                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1965                 {
1966                     hd = 1;
1967                 }
1968                 dx = s->hist->dx[hd];
1969
1970                 for (j = 0; j < s->hist->nbin[0]; j++)
1971                 {
1972                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1973                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1974
1975                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1976                 }
1977             }
1978         }
1979     }
1980     for (i = 0; i < cb->nsamples; i++)
1981     {
1982         samples_t      *s = cb->s[i];
1983         sample_range_t *r = &(cb->r[i]);
1984         if (r->use)
1985         {
1986             if (!s->hist)
1987             {
1988                 for (j = r->start; j < r->end; j++)
1989                 {
1990                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1991                 }
1992             }
1993             else
1994             {
1995                 /* normalization factor multiplied with bin width and
1996                    number of samples (we normalize through M): */
1997                 double normdx = 1.;
1998                 double dx;
1999                 int    hd = 0; /* histogram direction */
2000                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2001                 {
2002                     hd = 1;
2003                 }
2004                 dx = s->hist->dx[hd];
2005
2006                 for (j = 0; j < s->hist->nbin[0]; j++)
2007                 {
2008                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2009                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2010
2011                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2012                 }
2013             }
2014         }
2015     }
2016
2017     sigmafact /= (n1 + n2);
2018
2019
2020     /* Eq. 10 from
2021        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2022     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2023 }
2024
2025
2026
2027 static void calc_bar(barres_t *br, double tol,
2028                      int npee_min, int npee_max, gmx_bool *bEE,
2029                      double *partsum)
2030 {
2031     int      npee, p;
2032     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2033                                                         for calculated quantities */
2034     int      nsample1, nsample2;
2035     double   temp = br->a->temp;
2036     int      i, j;
2037     double   dg_min, dg_max;
2038     gmx_bool have_hist = FALSE;
2039
2040     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2041
2042     br->dg_disc_err      = 0.;
2043     br->dg_histrange_err = 0.;
2044
2045     /* check if there are histograms */
2046     for (i = 0; i < br->a->nsamples; i++)
2047     {
2048         if (br->a->r[i].use && br->a->s[i]->hist)
2049         {
2050             have_hist = TRUE;
2051             break;
2052         }
2053     }
2054     if (!have_hist)
2055     {
2056         for (i = 0; i < br->b->nsamples; i++)
2057         {
2058             if (br->b->r[i].use && br->b->s[i]->hist)
2059             {
2060                 have_hist = TRUE;
2061                 break;
2062             }
2063         }
2064     }
2065
2066     /* calculate histogram-specific errors */
2067     if (have_hist)
2068     {
2069         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2070         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2071
2072         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2073         {
2074             /* the histogram range  error is the biggest of the differences
2075                between the best estimate and the extremes */
2076             br->dg_histrange_err = fabs(dg_max - dg_min);
2077         }
2078         br->dg_disc_err = 0.;
2079         for (i = 0; i < br->a->nsamples; i++)
2080         {
2081             if (br->a->s[i]->hist)
2082             {
2083                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2084             }
2085         }
2086         for (i = 0; i < br->b->nsamples; i++)
2087         {
2088             if (br->b->s[i]->hist)
2089             {
2090                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2091             }
2092         }
2093     }
2094     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2095
2096     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2097
2098     dg_sig2     = 0;
2099     sa_sig2     = 0;
2100     sb_sig2     = 0;
2101     stddev_sig2 = 0;
2102
2103     *bEE = TRUE;
2104     {
2105         sample_coll_t ca, cb;
2106
2107         /* initialize the samples */
2108         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2109                          br->a->temp);
2110         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2111                          br->b->temp);
2112
2113         for (npee = npee_min; npee <= npee_max; npee++)
2114         {
2115             double dgs      = 0;
2116             double dgs2     = 0;
2117             double dsa      = 0;
2118             double dsb      = 0;
2119             double dsa2     = 0;
2120             double dsb2     = 0;
2121             double dstddev  = 0;
2122             double dstddev2 = 0;
2123
2124
2125             for (p = 0; p < npee; p++)
2126             {
2127                 double   dgp;
2128                 double   stddevc;
2129                 double   sac, sbc;
2130                 gmx_bool cac, cbc;
2131
2132                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2133                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2134
2135                 if (!cac || !cbc)
2136                 {
2137                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2138                     *bEE = FALSE;
2139                     if (cac)
2140                     {
2141                         sample_coll_destroy(&ca);
2142                     }
2143                     if (cbc)
2144                     {
2145                         sample_coll_destroy(&cb);
2146                     }
2147                     return;
2148                 }
2149
2150                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2151                 dgs  += dgp;
2152                 dgs2 += dgp*dgp;
2153
2154                 partsum[npee*(npee_max+1)+p] += dgp;
2155
2156                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2157                 dsa  += sac;
2158                 dsa2 += sac*sac;
2159                 dsb  += sbc;
2160                 dsb2 += sbc*sbc;
2161                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2162
2163                 dstddev  += stddevc;
2164                 dstddev2 += stddevc*stddevc;
2165
2166                 sample_coll_destroy(&ca);
2167                 sample_coll_destroy(&cb);
2168             }
2169             dgs     /= npee;
2170             dgs2    /= npee;
2171             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2172
2173             dsa     /= npee;
2174             dsa2    /= npee;
2175             dsb     /= npee;
2176             dsb2    /= npee;
2177             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2178             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2179
2180             dstddev     /= npee;
2181             dstddev2    /= npee;
2182             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2183         }
2184         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2185         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2186         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2187         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2188     }
2189 }
2190
2191
2192 static double bar_err(int nbmin, int nbmax, const double *partsum)
2193 {
2194     int    nb, b;
2195     double svar, s, s2, dg;
2196
2197     svar = 0;
2198     for (nb = nbmin; nb <= nbmax; nb++)
2199     {
2200         s  = 0;
2201         s2 = 0;
2202         for (b = 0; b < nb; b++)
2203         {
2204             dg  = partsum[nb*(nbmax+1)+b];
2205             s  += dg;
2206             s2 += dg*dg;
2207         }
2208         s    /= nb;
2209         s2   /= nb;
2210         svar += (s2 - s*s)/(nb - 1);
2211     }
2212
2213     return sqrt(svar/(nbmax + 1 - nbmin));
2214 }
2215
2216
2217 /* Seek the end of an identifier (consecutive non-spaces), followed by
2218    an optional number of spaces or '='-signs. Returns a pointer to the
2219    first non-space value found after that. Returns NULL if the string
2220    ends before that.
2221  */
2222 static const char *find_value(const char *str)
2223 {
2224     gmx_bool name_end_found = FALSE;
2225
2226     /* if the string is a NULL pointer, return a NULL pointer. */
2227     if (str == NULL)
2228     {
2229         return NULL;
2230     }
2231     while (*str != '\0')
2232     {
2233         /* first find the end of the name */
2234         if (!name_end_found)
2235         {
2236             if (isspace(*str) || (*str == '=') )
2237             {
2238                 name_end_found = TRUE;
2239             }
2240         }
2241         else
2242         {
2243             if (!( isspace(*str) || (*str == '=') ))
2244             {
2245                 return str;
2246             }
2247         }
2248         str++;
2249     }
2250     return NULL;
2251 }
2252
2253
2254
2255 /* read a vector-notation description of a lambda vector */
2256 static gmx_bool read_lambda_compvec(const char                *str,
2257                                     lambda_vec_t              *lv,
2258                                     const lambda_components_t *lc_in,
2259                                     lambda_components_t       *lc_out,
2260                                     const char               **end,
2261                                     const char                *fn)
2262 {
2263     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2264                                           components, or to check them */
2265     gmx_bool    start_reached = FALSE; /* whether the start of component names
2266                                           has been reached */
2267     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2268     int         n             = 0;     /* current component number */
2269     const char *val_start     = NULL;  /* start of the component name, or NULL
2270                                           if not in a value */
2271     char       *strtod_end;
2272     gmx_bool    OK = TRUE;
2273
2274     if (end)
2275     {
2276         *end = str;
2277     }
2278
2279
2280     if (lc_out && lc_out->N == 0)
2281     {
2282         initialize_lc = TRUE;
2283     }
2284
2285     if (lc_in == NULL)
2286     {
2287         lc_in = lc_out;
2288     }
2289
2290     while (1)
2291     {
2292         if (!start_reached)
2293         {
2294             if (isalnum(*str))
2295             {
2296                 vector        = FALSE;
2297                 start_reached = TRUE;
2298                 val_start     = str;
2299             }
2300             else if (*str == '(')
2301             {
2302                 vector        = TRUE;
2303                 start_reached = TRUE;
2304             }
2305             else if (!isspace(*str))
2306             {
2307                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2308             }
2309         }
2310         else
2311         {
2312             if (val_start)
2313             {
2314                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2315                 {
2316                     /* end of value */
2317                     if (lv == NULL)
2318                     {
2319                         if (initialize_lc)
2320                         {
2321                             lambda_components_add(lc_out, val_start,
2322                                                   (str-val_start));
2323                         }
2324                         else
2325                         {
2326                             if (!lambda_components_check(lc_out, n, val_start,
2327                                                          (str-val_start)))
2328                             {
2329                                 return FALSE;
2330                             }
2331                         }
2332                     }
2333                     else
2334                     {
2335                         /* add a vector component to lv */
2336                         lv->val[n] = strtod(val_start, &strtod_end);
2337                         if (val_start == strtod_end)
2338                         {
2339                             gmx_fatal(FARGS,
2340                                       "Error reading lambda vector in %s", fn);
2341                         }
2342                     }
2343                     /* reset for the next identifier */
2344                     val_start = NULL;
2345                     n++;
2346                     if (!vector)
2347                     {
2348                         return OK;
2349                     }
2350                 }
2351             }
2352             else if (isalnum(*str))
2353             {
2354                 val_start = str;
2355             }
2356             if (*str == ')')
2357             {
2358                 str++;
2359                 if (end)
2360                 {
2361                     *end = str;
2362                 }
2363                 if (!vector)
2364                 {
2365                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2366                 }
2367                 else
2368                 {
2369                     if (n == lc_in->N)
2370                     {
2371                         return OK;
2372                     }
2373                     else if (lv == NULL)
2374                     {
2375                         return FALSE;
2376                     }
2377                     else
2378                     {
2379                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2380                                   fn);
2381                         return FALSE;
2382                     }
2383
2384                 }
2385             }
2386         }
2387         if (*str == '\0')
2388         {
2389             break;
2390         }
2391         str++;
2392         if (end)
2393         {
2394             *end = str;
2395         }
2396     }
2397     if (vector)
2398     {
2399         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2400         return FALSE;
2401     }
2402     return OK;
2403 }
2404
2405 /* read and check the component names from a string */
2406 static gmx_bool read_lambda_components(const char          *str,
2407                                        lambda_components_t *lc,
2408                                        const char         **end,
2409                                        const char          *fn)
2410 {
2411     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2412 }
2413
2414 /* read an initialized lambda vector from a string */
2415 static gmx_bool read_lambda_vector(const char   *str,
2416                                    lambda_vec_t *lv,
2417                                    const char  **end,
2418                                    const char   *fn)
2419 {
2420     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2421 }
2422
2423
2424
2425 /* deduce lambda value from legend.
2426     fn = the file name
2427     legend = the legend string
2428     ba = the xvg data
2429     lam = the initialized lambda vector
2430    returns whether to use the data in this set.
2431  */
2432 static gmx_bool legend2lambda(const char   *fn,
2433                               const char   *legend,
2434                               lambda_vec_t *lam)
2435 {
2436     double        lambda = 0;
2437     const char   *ptr    = NULL, *ptr2 = NULL;
2438     gmx_bool      ok     = FALSE;
2439     gmx_bool      bdhdl  = FALSE;
2440     const char   *tostr  = " to ";
2441
2442     if (legend == NULL)
2443     {
2444         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2445     }
2446
2447     /* look for the last 'to': */
2448     ptr2 = legend;
2449     do
2450     {
2451         ptr2 = strstr(ptr2, tostr);
2452         if (ptr2 != NULL)
2453         {
2454             ptr = ptr2;
2455             ptr2++;
2456         }
2457     }
2458     while (ptr2 != NULL && *ptr2 != '\0');
2459
2460     if (ptr)
2461     {
2462         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2463     }
2464     else
2465     {
2466         /* look for the = sign */
2467         ptr = strrchr(legend, '=');
2468         if (!ptr)
2469         {
2470             /* otherwise look for the last space */
2471             ptr = strrchr(legend, ' ');
2472         }
2473     }
2474
2475     if (strstr(legend, "dH"))
2476     {
2477         ok    = TRUE;
2478         bdhdl = TRUE;
2479     }
2480     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2481     {
2482         ok    = TRUE;
2483         bdhdl = FALSE;
2484     }
2485     else /*if (strstr(legend, "pV"))*/
2486     {
2487         return FALSE;
2488     }
2489     if (!ptr)
2490     {
2491         ok = FALSE;
2492     }
2493
2494     if (!ok)
2495     {
2496         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2497     }
2498     if (!bdhdl)
2499     {
2500         ptr = find_value(ptr);
2501         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2502         {
2503             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2504         }
2505     }
2506     else
2507     {
2508         int         dhdl_index;
2509         const char *end;
2510         char        buf[STRLEN];
2511
2512         ptr = strrchr(legend, '=');
2513         end = ptr;
2514         if (ptr)
2515         {
2516             /* there must be a component name */
2517             ptr--;
2518             if (ptr < legend)
2519             {
2520                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2521             }
2522             /* now backtrack to the start of the identifier */
2523             while (isspace(*ptr))
2524             {
2525                 end = ptr;
2526                 ptr--;
2527                 if (ptr < legend)
2528                 {
2529                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2530                 }
2531             }
2532             while (!isspace(*ptr))
2533             {
2534                 ptr--;
2535                 if (ptr < legend)
2536                 {
2537                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2538                 }
2539             }
2540             ptr++;
2541             strncpy(buf, ptr, (end-ptr));
2542             buf[(end-ptr)] = '\0';
2543             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2544             if (dhdl_index < 0)
2545             {
2546                 char buf[STRLEN];
2547                 strncpy(buf, ptr, (end-ptr));
2548                 buf[(end-ptr)] = '\0';
2549                 gmx_fatal(FARGS,
2550                           "Did not find lambda component for '%s' in %s",
2551                           buf, fn);
2552             }
2553         }
2554         else
2555         {
2556             if (lam->lc->N > 1)
2557             {
2558                 gmx_fatal(FARGS,
2559                           "dhdl without component name with >1 lambda component in %s",
2560                           fn);
2561             }
2562             dhdl_index = 0;
2563         }
2564         lam->dhdl = dhdl_index;
2565     }
2566     return TRUE;
2567 }
2568
2569 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2570                                 lambda_components_t *lc)
2571 {
2572     gmx_bool    bFound;
2573     const char *ptr;
2574     char       *end;
2575     double      native_lambda;
2576
2577     bFound = FALSE;
2578
2579     /* first check for a state string */
2580     ptr = strstr(subtitle, "state");
2581     if (ptr)
2582     {
2583         int         index = -1;
2584         const char *val_end;
2585
2586         /* the new 4.6 style lambda vectors */
2587         ptr = find_value(ptr);
2588         if (ptr)
2589         {
2590             index = strtol(ptr, &end, 10);
2591             if (ptr == end)
2592             {
2593                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2594                 return FALSE;
2595             }
2596             ptr = end;
2597         }
2598         else
2599         {
2600             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2601             return FALSE;
2602         }
2603         /* now find the lambda vector component names */
2604         while (*ptr != '(' && !isalnum(*ptr))
2605         {
2606             ptr++;
2607             if (*ptr == '\0')
2608             {
2609                 gmx_fatal(FARGS,
2610                           "Incomplete lambda vector component data in %s", fn);
2611                 return FALSE;
2612             }
2613         }
2614         val_end = ptr;
2615         if (!read_lambda_components(ptr, lc, &val_end, fn))
2616         {
2617             gmx_fatal(FARGS,
2618                       "lambda vector components in %s don't match those previously read",
2619                       fn);
2620         }
2621         ptr = find_value(val_end);
2622         if (!ptr)
2623         {
2624             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2625             return FALSE;
2626         }
2627         lambda_vec_init(&(ba->native_lambda), lc);
2628         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2629         {
2630             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2631         }
2632         ba->native_lambda.index = index;
2633         bFound                  = TRUE;
2634     }
2635     else
2636     {
2637         /* compatibility mode: check for lambda in other ways. */
2638         /* plain text lambda string */
2639         ptr = strstr(subtitle, "lambda");
2640         if (ptr == NULL)
2641         {
2642             /* xmgrace formatted lambda string */
2643             ptr = strstr(subtitle, "\\xl\\f{}");
2644         }
2645         if (ptr == NULL)
2646         {
2647             /* xmgr formatted lambda string */
2648             ptr = strstr(subtitle, "\\8l\\4");
2649         }
2650         if (ptr != NULL)
2651         {
2652             ptr = strstr(ptr, "=");
2653         }
2654         if (ptr != NULL)
2655         {
2656             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2657             /* add the lambda component name as an empty string */
2658             if (lc->N > 0)
2659             {
2660                 if (!lambda_components_check(lc, 0, "", 0))
2661                 {
2662                     gmx_fatal(FARGS,
2663                               "lambda vector components in %s don't match those previously read",
2664                               fn);
2665                 }
2666             }
2667             else
2668             {
2669                 lambda_components_add(lc, "", 0);
2670             }
2671             lambda_vec_init(&(ba->native_lambda), lc);
2672             ba->native_lambda.val[0] = native_lambda;
2673         }
2674     }
2675
2676     return bFound;
2677 }
2678
2679 static void filename2lambda(const char *fn)
2680 {
2681     double        lambda;
2682     const char   *ptr, *digitptr;
2683     char         *endptr;
2684     int           dirsep;
2685     ptr = fn;
2686     /* go to the end of the path string and search backward to find the last
2687        directory in the path which has to contain the value of lambda
2688      */
2689     while (ptr[1] != '\0')
2690     {
2691         ptr++;
2692     }
2693     /* searching backward to find the second directory separator */
2694     dirsep   = 0;
2695     digitptr = NULL;
2696     while (ptr >= fn)
2697     {
2698         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2699         {
2700             if (dirsep == 1)
2701             {
2702                 break;
2703             }
2704             dirsep++;
2705         }
2706         /* save the last position of a digit between the last two
2707            separators = in the last dirname */
2708         if (dirsep > 0 && isdigit(*ptr))
2709         {
2710             digitptr = ptr;
2711         }
2712         ptr--;
2713     }
2714     if (!digitptr)
2715     {
2716         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2717                   " last directory in the path '%s' does not contain a number", fn);
2718     }
2719     if (digitptr[-1] == '-')
2720     {
2721         digitptr--;
2722     }
2723     lambda = strtod(digitptr, &endptr);
2724     if (endptr == digitptr)
2725     {
2726         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2727     }
2728 }
2729
2730 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2731                                   lambda_components_t *lc)
2732 {
2733     int          i;
2734     char        *subtitle, **legend, *ptr;
2735     int          np;
2736     gmx_bool     native_lambda_read = FALSE;
2737     char         buf[STRLEN];
2738     lambda_vec_t lv;
2739
2740     xvg_init(ba);
2741
2742     ba->filename = fn;
2743
2744     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2745     if (!ba->y)
2746     {
2747         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2748     }
2749     /* Reorder the data */
2750     ba->t  = ba->y[0];
2751     for (i = 1; i < ba->nset; i++)
2752     {
2753         ba->y[i-1] = ba->y[i];
2754     }
2755     ba->nset--;
2756
2757     snew(ba->np, ba->nset);
2758     for (i = 0; i < ba->nset; i++)
2759     {
2760         ba->np[i] = np;
2761     }
2762
2763     ba->temp = -1;
2764     if (subtitle != NULL)
2765     {
2766         /* try to extract temperature */
2767         ptr = strstr(subtitle, "T =");
2768         if (ptr != NULL)
2769         {
2770             ptr += 3;
2771             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2772             {
2773                 if (ba->temp <= 0)
2774                 {
2775                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2776                               ba->temp, fn);
2777                 }
2778             }
2779         }
2780     }
2781     if (ba->temp < 0)
2782     {
2783         if (*temp <= 0)
2784         {
2785             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2786         }
2787         ba->temp = *temp;
2788     }
2789
2790     /* Try to deduce lambda from the subtitle */
2791     if (subtitle)
2792     {
2793         if (subtitle2lambda(subtitle, ba, fn, lc))
2794         {
2795             native_lambda_read = TRUE;
2796         }
2797     }
2798     snew(ba->lambda, ba->nset);
2799     if (legend == NULL)
2800     {
2801         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2802         if (ba->nset == 1)
2803         {
2804             if (!native_lambda_read)
2805             {
2806                 /* Deduce lambda from the file name */
2807                 filename2lambda(fn);
2808                 native_lambda_read = TRUE;
2809             }
2810             ba->lambda[0] = ba->native_lambda;
2811         }
2812         else
2813         {
2814             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2815         }
2816     }
2817     else
2818     {
2819         for (i = 0; i < ba->nset; )
2820         {
2821             gmx_bool use = FALSE;
2822             /* Read lambda from the legend */
2823             lambda_vec_init( &(ba->lambda[i]), lc );
2824             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2825             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2826             if (use)
2827             {
2828                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2829                 i++;
2830             }
2831             else
2832             {
2833                 int j;
2834                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2835                 for (j = i+1; j < ba->nset; j++)
2836                 {
2837                     ba->y[j-1]  = ba->y[j];
2838                     legend[j-1] = legend[j];
2839                 }
2840                 ba->nset--;
2841             }
2842         }
2843     }
2844
2845     if (!native_lambda_read)
2846     {
2847         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2848     }
2849
2850     if (legend != NULL)
2851     {
2852         for (i = 0; i < ba->nset-1; i++)
2853         {
2854             sfree(legend[i]);
2855         }
2856         sfree(legend);
2857     }
2858 }
2859
2860 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2861 {
2862     xvg_t     *barsim;
2863     samples_t *s;
2864     int        i;
2865     double    *lambda;
2866
2867     snew(barsim, 1);
2868
2869     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2870
2871     if (barsim->nset < 1)
2872     {
2873         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2874     }
2875
2876     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2877     {
2878         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2879     }
2880     *temp = barsim->temp;
2881
2882     /* now create a series of samples_t */
2883     snew(s, barsim->nset);
2884     for (i = 0; i < barsim->nset; i++)
2885     {
2886         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2887                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2888                                                    &(barsim->lambda[i])),
2889                      fn);
2890         s[i].du  = barsim->y[i];
2891         s[i].ndu = barsim->np[i];
2892         s[i].t   = barsim->t;
2893
2894         lambda_data_list_insert_sample(sd->lb, s+i);
2895     }
2896     {
2897         char buf[STRLEN];
2898
2899         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2900         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2901                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2902         for (i = 0; i < barsim->nset; i++)
2903         {
2904             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2905             printf("        %s (%d pts)\n", buf, s[i].ndu);
2906         }
2907     }
2908     printf("\n\n");
2909 }
2910
2911 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2912                                  double start_time, double delta_time,
2913                                  lambda_vec_t *native_lambda, double temp,
2914                                  double *last_t, const char *filename)
2915 {
2916     int           i, j;
2917     gmx_bool      allocated;
2918     double        old_foreign_lambda;
2919     lambda_vec_t *foreign_lambda;
2920     int           type;
2921     samples_t    *s; /* convenience pointer */
2922     int           startj;
2923
2924     /* check the block types etc. */
2925     if ( (blk->nsub < 3) ||
2926          (blk->sub[0].type != xdr_datatype_int) ||
2927          (blk->sub[1].type != xdr_datatype_double) ||
2928          (
2929              (blk->sub[2].type != xdr_datatype_float) &&
2930              (blk->sub[2].type != xdr_datatype_double)
2931          ) ||
2932          (blk->sub[0].nr < 1) ||
2933          (blk->sub[1].nr < 1) )
2934     {
2935         gmx_fatal(FARGS,
2936                   "Unexpected/corrupted block data in file %s around time %f.",
2937                   filename, start_time);
2938     }
2939
2940     snew(foreign_lambda, 1);
2941     lambda_vec_init(foreign_lambda, native_lambda->lc);
2942     lambda_vec_copy(foreign_lambda, native_lambda);
2943     type = blk->sub[0].ival[0];
2944     if (type == dhbtDH)
2945     {
2946         for (i = 0; i < native_lambda->lc->N; i++)
2947         {
2948             foreign_lambda->val[i] = blk->sub[1].dval[i];
2949         }
2950     }
2951     else
2952     {
2953         if (blk->sub[0].nr > 1)
2954         {
2955             foreign_lambda->dhdl = blk->sub[0].ival[1];
2956         }
2957         else
2958         {
2959             foreign_lambda->dhdl = 0;
2960         }
2961     }
2962
2963     if (!*smp)
2964     {
2965         /* initialize the samples structure if it's empty. */
2966         snew(*smp, 1);
2967         samples_init(*smp, native_lambda, foreign_lambda, temp,
2968                      type == dhbtDHDL, filename);
2969         (*smp)->start_time = start_time;
2970         (*smp)->delta_time = delta_time;
2971     }
2972
2973     /* set convenience pointer */
2974     s = *smp;
2975
2976     /* now double check */
2977     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2978     {
2979         char buf[STRLEN], buf2[STRLEN];
2980         lambda_vec_print(foreign_lambda, buf, FALSE);
2981         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2982         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2983         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2984                   filename, start_time);
2985     }
2986
2987     /* make room for the data */
2988     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2989     {
2990         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2991             blk->sub[2].nr*2 : s->ndu_alloc;
2992         srenew(s->du_alloc, s->ndu_alloc);
2993         s->du = s->du_alloc;
2994     }
2995     startj   = s->ndu;
2996     s->ndu  += blk->sub[2].nr;
2997     s->ntot += blk->sub[2].nr;
2998     *ndu     = blk->sub[2].nr;
2999
3000     /* and copy the data*/
3001     for (j = 0; j < blk->sub[2].nr; j++)
3002     {
3003         if (blk->sub[2].type == xdr_datatype_float)
3004         {
3005             s->du[startj+j] = blk->sub[2].fval[j];
3006         }
3007         else
3008         {
3009             s->du[startj+j] = blk->sub[2].dval[j];
3010         }
3011     }
3012     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3013     {
3014         *last_t = start_time + blk->sub[2].nr*delta_time;
3015     }
3016 }
3017
3018 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3019                                       double start_time, double delta_time,
3020                                       lambda_vec_t *native_lambda, double temp,
3021                                       double *last_t, const char *filename)
3022 {
3023     int           i, j;
3024     samples_t    *s;
3025     int           nhist;
3026     double        old_foreign_lambda;
3027     lambda_vec_t *foreign_lambda;
3028     int           type;
3029     int           nbins[2];
3030
3031     /* check the block types etc. */
3032     if ( (blk->nsub < 2) ||
3033          (blk->sub[0].type != xdr_datatype_double) ||
3034          (blk->sub[1].type != xdr_datatype_int64) ||
3035          (blk->sub[0].nr < 2)  ||
3036          (blk->sub[1].nr < 2) )
3037     {
3038         gmx_fatal(FARGS,
3039                   "Unexpected/corrupted block data in file %s around time %f",
3040                   filename, start_time);
3041     }
3042
3043     nhist = blk->nsub-2;
3044     if (nhist == 0)
3045     {
3046         return NULL;
3047     }
3048     if (nhist > 2)
3049     {
3050         gmx_fatal(FARGS,
3051                   "Unexpected/corrupted block data in file %s around time %f",
3052                   filename, start_time);
3053     }
3054
3055     snew(s, 1);
3056     *nsamples = 1;
3057
3058     snew(foreign_lambda, 1);
3059     lambda_vec_init(foreign_lambda, native_lambda->lc);
3060     lambda_vec_copy(foreign_lambda, native_lambda);
3061     type = (int)(blk->sub[1].lval[1]);
3062     if (type == dhbtDH)
3063     {
3064         double old_foreign_lambda;
3065
3066         old_foreign_lambda = blk->sub[0].dval[0];
3067         if (old_foreign_lambda >= 0)
3068         {
3069             foreign_lambda->val[0] = old_foreign_lambda;
3070             if (foreign_lambda->lc->N > 1)
3071             {
3072                 gmx_fatal(FARGS,
3073                           "Single-component lambda in multi-component file %s",
3074                           filename);
3075             }
3076         }
3077         else
3078         {
3079             for (i = 0; i < native_lambda->lc->N; i++)
3080             {
3081                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3082             }
3083         }
3084     }
3085     else
3086     {
3087         if (foreign_lambda->lc->N > 1)
3088         {
3089             if (blk->sub[1].nr < 3 + nhist)
3090             {
3091                 gmx_fatal(FARGS,
3092                           "Missing derivative coord in multi-component file %s",
3093                           filename);
3094             }
3095             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3096         }
3097         else
3098         {
3099             foreign_lambda->dhdl = 0;
3100         }
3101     }
3102
3103     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3104                  filename);
3105     snew(s->hist, 1);
3106
3107     for (i = 0; i < nhist; i++)
3108     {
3109         nbins[i] = blk->sub[i+2].nr;
3110     }
3111
3112     hist_init(s->hist, nhist, nbins);
3113
3114     for (i = 0; i < nhist; i++)
3115     {
3116         s->hist->x0[i] = blk->sub[1].lval[2+i];
3117         s->hist->dx[i] = blk->sub[0].dval[1];
3118         if (i == 1)
3119         {
3120             s->hist->dx[i] = -s->hist->dx[i];
3121         }
3122     }
3123
3124     s->hist->start_time = start_time;
3125     s->hist->delta_time = delta_time;
3126     s->start_time       = start_time;
3127     s->delta_time       = delta_time;
3128
3129     for (i = 0; i < nhist; i++)
3130     {
3131         int             nbin;
3132         gmx_int64_t     sum = 0;
3133
3134         for (j = 0; j < s->hist->nbin[i]; j++)
3135         {
3136             int binv = (int)(blk->sub[i+2].ival[j]);
3137
3138             s->hist->bin[i][j] = binv;
3139             sum               += binv;
3140
3141         }
3142         if (i == 0)
3143         {
3144             s->ntot      = sum;
3145             s->hist->sum = sum;
3146         }
3147         else
3148         {
3149             if (s->ntot != sum)
3150             {
3151                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3152                           filename);
3153             }
3154         }
3155     }
3156
3157     if (start_time + s->hist->sum*delta_time > *last_t)
3158     {
3159         *last_t = start_time + s->hist->sum*delta_time;
3160     }
3161     return s;
3162 }
3163
3164
3165 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3166 {
3167     int            i, j;
3168     ener_file_t    fp;
3169     t_enxframe    *fr;
3170     int            nre;
3171     gmx_enxnm_t   *enm           = NULL;
3172     double         first_t       = -1;
3173     double         last_t        = -1;
3174     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3175     int           *nhists        = NULL; /* array to keep count & print at end */
3176     int           *npts          = NULL; /* array to keep count & print at end */
3177     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3178     lambda_vec_t  *native_lambda;
3179     double         end_time;             /* the end time of the last batch of samples */
3180     int            nsamples = 0;
3181     lambda_vec_t   start_lambda;
3182
3183     fp = open_enx(fn, "r");
3184     do_enxnms(fp, &nre, &enm);
3185     snew(fr, 1);
3186
3187     snew(native_lambda, 1);
3188     start_lambda.lc = NULL;
3189
3190     while (do_enx(fp, fr))
3191     {
3192         /* count the data blocks */
3193         int    nblocks_raw  = 0;
3194         int    nblocks_hist = 0;
3195         int    nlam         = 0;
3196         int    k;
3197         /* DHCOLL block information: */
3198         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3199         double rtemp      = 0;
3200
3201         /* count the blocks and handle collection information: */
3202         for (i = 0; i < fr->nblock; i++)
3203         {
3204             if (fr->block[i].id == enxDHHIST)
3205             {
3206                 nblocks_hist++;
3207             }
3208             if (fr->block[i].id == enxDH)
3209             {
3210                 nblocks_raw++;
3211             }
3212             if (fr->block[i].id == enxDHCOLL)
3213             {
3214                 nlam++;
3215                 if ( (fr->block[i].nsub < 1) ||
3216                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3217                      (fr->block[i].sub[0].nr < 5))
3218                 {
3219                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3220                 }
3221
3222                 /* read the data from the DHCOLL block */
3223                 rtemp            =        fr->block[i].sub[0].dval[0];
3224                 start_time       =   fr->block[i].sub[0].dval[1];
3225                 delta_time       =   fr->block[i].sub[0].dval[2];
3226                 old_start_lambda = fr->block[i].sub[0].dval[3];
3227                 delta_lambda     = fr->block[i].sub[0].dval[4];
3228
3229                 if (delta_lambda > 0)
3230                 {
3231                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3232                 }
3233                 if ( ( *temp != rtemp) && (*temp > 0) )
3234                 {
3235                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3236                 }
3237                 *temp = rtemp;
3238
3239                 if (old_start_lambda >= 0)
3240                 {
3241                     if (sd->lc.N > 0)
3242                     {
3243                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3244                         {
3245                             gmx_fatal(FARGS,
3246                                       "lambda vector components in %s don't match those previously read",
3247                                       fn);
3248                         }
3249                     }
3250                     else
3251                     {
3252                         lambda_components_add(&(sd->lc), "", 0);
3253                     }
3254                     if (!start_lambda.lc)
3255                     {
3256                         lambda_vec_init(&start_lambda, &(sd->lc));
3257                     }
3258                     start_lambda.val[0] = old_start_lambda;
3259                 }
3260                 else
3261                 {
3262                     /* read lambda vector */
3263                     int      n_lambda_vec;
3264                     gmx_bool check = (sd->lc.N > 0);
3265                     if (fr->block[i].nsub < 2)
3266                     {
3267                         gmx_fatal(FARGS,
3268                                   "No lambda vector, but start_lambda=%f\n",
3269                                   old_start_lambda);
3270                     }
3271                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3272                     for (j = 0; j < n_lambda_vec; j++)
3273                     {
3274                         const char *name =
3275                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3276                         if (check)
3277                         {
3278                             /* check the components */
3279                             lambda_components_check(&(sd->lc), j, name,
3280                                                     strlen(name));
3281                         }
3282                         else
3283                         {
3284                             lambda_components_add(&(sd->lc), name,
3285                                                   strlen(name));
3286                         }
3287                     }
3288                     lambda_vec_init(&start_lambda, &(sd->lc));
3289                     start_lambda.index = fr->block[i].sub[1].ival[0];
3290                     for (j = 0; j < n_lambda_vec; j++)
3291                     {
3292                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3293                     }
3294                 }
3295                 if (first_t < 0)
3296                 {
3297                     first_t = start_time;
3298                 }
3299             }
3300         }
3301
3302         if (nlam != 1)
3303         {
3304             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3305         }
3306         if (nblocks_raw > 0 && nblocks_hist > 0)
3307         {
3308             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3309         }
3310
3311         if (nsamples > 0)
3312         {
3313             /* check the native lambda */
3314             if (!lambda_vec_same(&start_lambda, native_lambda) )
3315             {
3316                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3317                           fn, native_lambda, start_lambda, start_time);
3318             }
3319             /* check the number of samples against the previous number */
3320             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3321             {
3322                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3323                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3324             }
3325             /* check whether last iterations's end time matches with
3326                the currrent start time */
3327             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3328             {
3329                 /* it didn't. We need to store our samples and reallocate */
3330                 for (i = 0; i < nsamples; i++)
3331                 {
3332                     if (samples_rawdh[i])
3333                     {
3334                         /* insert it into the existing list */
3335                         lambda_data_list_insert_sample(sd->lb,
3336                                                        samples_rawdh[i]);
3337                         /* and make sure we'll allocate a new one this time
3338                            around */
3339                         samples_rawdh[i] = NULL;
3340                     }
3341                 }
3342             }
3343         }
3344         else
3345         {
3346             /* this is the first round; allocate the associated data
3347                structures */
3348             /*native_lambda=start_lambda;*/
3349             lambda_vec_init(native_lambda, &(sd->lc));
3350             lambda_vec_copy(native_lambda, &start_lambda);
3351             nsamples = nblocks_raw+nblocks_hist;
3352             snew(nhists, nsamples);
3353             snew(npts, nsamples);
3354             snew(lambdas, nsamples);
3355             snew(samples_rawdh, nsamples);
3356             for (i = 0; i < nsamples; i++)
3357             {
3358                 nhists[i]        = 0;
3359                 npts[i]          = 0;
3360                 lambdas[i]       = NULL;
3361                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3362                                             ones contain values */
3363             }
3364         }
3365
3366         /* and read them */
3367         k = 0; /* counter for the lambdas, etc. arrays */
3368         for (i = 0; i < fr->nblock; i++)
3369         {
3370             if (fr->block[i].id == enxDH)
3371             {
3372                 int type = (fr->block[i].sub[0].ival[0]);
3373                 if (type == dhbtDH || type == dhbtDHDL)
3374                 {
3375                     int ndu;
3376                     read_edr_rawdh_block(&(samples_rawdh[k]),
3377                                          &ndu,
3378                                          &(fr->block[i]),
3379                                          start_time, delta_time,
3380                                          native_lambda, rtemp,
3381                                          &last_t, fn);
3382                     npts[k] += ndu;
3383                     if (samples_rawdh[k])
3384                     {
3385                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3386                     }
3387                     k++;
3388                 }
3389             }
3390             else if (fr->block[i].id == enxDHHIST)
3391             {
3392                 int type = (int)(fr->block[i].sub[1].lval[1]);
3393                 if (type == dhbtDH || type == dhbtDHDL)
3394                 {
3395                     int        j;
3396                     int        nb = 0;
3397                     samples_t *s; /* this is where the data will go */
3398                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3399                                             start_time, delta_time,
3400                                             native_lambda, rtemp,
3401                                             &last_t, fn);
3402                     nhists[k] += nb;
3403                     if (nb > 0)
3404                     {
3405                         lambdas[k] = s->foreign_lambda;
3406                     }
3407                     k++;
3408                     /* and insert the new sample immediately */
3409                     for (j = 0; j < nb; j++)
3410                     {
3411                         lambda_data_list_insert_sample(sd->lb, s+j);
3412                     }
3413                 }
3414             }
3415         }
3416     }
3417     /* Now store all our extant sample collections */
3418     for (i = 0; i < nsamples; i++)
3419     {
3420         if (samples_rawdh[i])
3421         {
3422             /* insert it into the existing list */
3423             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3424         }
3425     }
3426
3427
3428     {
3429         char buf[STRLEN];
3430         printf("\n");
3431         lambda_vec_print(native_lambda, buf, FALSE);
3432         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3433                fn, first_t, last_t, buf);
3434         for (i = 0; i < nsamples; i++)
3435         {
3436             if (lambdas[i])
3437             {
3438                 lambda_vec_print(lambdas[i], buf, TRUE);
3439                 if (nhists[i] > 0)
3440                 {
3441                     printf("        %s (%d hists)\n", buf, nhists[i]);
3442                 }
3443                 else
3444                 {
3445                     printf("        %s (%d pts)\n", buf, npts[i]);
3446                 }
3447             }
3448         }
3449     }
3450     printf("\n\n");
3451     sfree(npts);
3452     sfree(nhists);
3453     sfree(lambdas);
3454 }
3455
3456
3457 int gmx_bar(int argc, char *argv[])
3458 {
3459     static const char *desc[] = {
3460         "[THISMODULE] calculates free energy difference estimates through ",
3461         "Bennett's acceptance ratio method (BAR). It also automatically",
3462         "adds series of individual free energies obtained with BAR into",
3463         "a combined free energy estimate.[PAR]",
3464
3465         "Every individual BAR free energy difference relies on two ",
3466         "simulations at different states: say state A and state B, as",
3467         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3468         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3469         "average of the Hamiltonian difference of state B given state A and",
3470         "vice versa.",
3471         "The energy differences to the other state must be calculated",
3472         "explicitly during the simulation. This can be done with",
3473         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3474
3475         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3476         "Two types of input files are supported:[BR]",
3477         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3478         "The files should have columns ",
3479         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3480         "The [GRK]lambda[grk] values are inferred ",
3481         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3482         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3483         "legends of Delta H",
3484         "[BR]",
3485         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3486         "[TT]-extp[tt] option for these files, it is assumed",
3487         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3488         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3489         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3490         "subtitle (if present), otherwise from a number in the subdirectory ",
3491         "in the file name.[PAR]",
3492
3493         "The [GRK]lambda[grk] of the simulation is parsed from ",
3494         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3495         "foreign [GRK]lambda[grk] values from the legend containing the ",
3496         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3497         "the legend line containing 'T ='.[PAR]",
3498
3499         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3500         "These can contain either lists of energy differences (see the ",
3501         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3502         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3503         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3504         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3505
3506         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3507         "the energy difference can also be extrapolated from the ",
3508         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3509         "option, which assumes that the system's Hamiltonian depends linearly",
3510         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3511
3512         "The free energy estimates are determined using BAR with bisection, ",
3513         "with the precision of the output set with [TT]-prec[tt]. ",
3514         "An error estimate taking into account time correlations ",
3515         "is made by splitting the data into blocks and determining ",
3516         "the free energy differences over those blocks and assuming ",
3517         "the blocks are independent. ",
3518         "The final error estimate is determined from the average variance ",
3519         "over 5 blocks. A range of block numbers for error estimation can ",
3520         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3521
3522         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3523         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3524         "samples. [BB]Note[bb] that when aggregating energy ",
3525         "differences/derivatives with different sampling intervals, this is ",
3526         "almost certainly not correct. Usually subsequent energies are ",
3527         "correlated and different time intervals mean different degrees ",
3528         "of correlation between samples.[PAR]",
3529
3530         "The results are split in two parts: the last part contains the final ",
3531         "results in kJ/mol, together with the error estimate for each part ",
3532         "and the total. The first part contains detailed free energy ",
3533         "difference estimates and phase space overlap measures in units of ",
3534         "kT (together with their computed error estimate). The printed ",
3535         "values are:[BR]",
3536         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3537         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3538         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3539         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3540         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3541         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3542
3543         "The relative entropy of both states in each other's ensemble can be ",
3544         "interpreted as a measure of phase space overlap: ",
3545         "the relative entropy s_A of the work samples of lambda_B in the ",
3546         "ensemble of lambda_A (and vice versa for s_B), is a ",
3547         "measure of the 'distance' between Boltzmann distributions of ",
3548         "the two states, that goes to zero for identical distributions. See ",
3549         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3550         "[PAR]",
3551         "The estimate of the expected per-sample standard deviation, as given ",
3552         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3553         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3554         "of the actual statistical error, because it assumes independent samples).[PAR]",
3555
3556         "To get a visual estimate of the phase space overlap, use the ",
3557         "[TT]-oh[tt] option to write series of histograms, together with the ",
3558         "[TT]-nbin[tt] option.[PAR]"
3559     };
3560     static real        begin    = 0, end = -1, temp = -1;
3561     int                nd       = 2, nbmin = 5, nbmax = 5;
3562     int                nbin     = 100;
3563     gmx_bool           use_dhdl = FALSE;
3564     gmx_bool           calc_s, calc_v;
3565     t_pargs            pa[] = {
3566         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3567         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3568         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3569         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3570         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3571         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3572         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3573         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3574     };
3575
3576     t_filenm           fnm[] = {
3577         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3578         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3579         { efXVG, "-o",  "bar",    ffOPTWR },
3580         { efXVG, "-oi", "barint", ffOPTWR },
3581         { efXVG, "-oh", "histogram", ffOPTWR }
3582     };
3583 #define NFILE asize(fnm)
3584
3585     int          f, i, j;
3586     int          nf = 0;    /* file counter */
3587     int          nbs;
3588     int          nfile_tot; /* total number of input files */
3589     int          nxvgfile = 0;
3590     int          nedrfile = 0;
3591     char       **fxvgnms;
3592     char       **fedrnms;
3593     sim_data_t   sim_data; /* the simulation data */
3594     barres_t    *results;  /* the results */
3595     int          nresults; /* number of results in results array */
3596
3597     double      *partsum;
3598     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3599     FILE        *fpb, *fpi;
3600     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3601     char         buf[STRLEN], buf2[STRLEN];
3602     char         ktformat[STRLEN], sktformat[STRLEN];
3603     char         kteformat[STRLEN], skteformat[STRLEN];
3604     output_env_t oenv;
3605     double       kT, beta;
3606     gmx_bool     result_OK = TRUE, bEE = TRUE;
3607
3608     gmx_bool     disc_err          = FALSE;
3609     double       sum_disc_err      = 0.; /* discretization error */
3610     gmx_bool     histrange_err     = FALSE;
3611     double       sum_histrange_err = 0.; /* histogram range error */
3612     double       stat_err          = 0.; /* statistical error */
3613
3614     if (!parse_common_args(&argc, argv,
3615                            PCA_CAN_VIEW,
3616                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3617     {
3618         return 0;
3619     }
3620
3621     if (opt2bSet("-f", NFILE, fnm))
3622     {
3623         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3624     }
3625     if (opt2bSet("-g", NFILE, fnm))
3626     {
3627         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3628     }
3629
3630     sim_data_init(&sim_data);
3631 #if 0
3632     /* make linked list */
3633     lb = &lambda_head;
3634     lambda_data_init(lb, 0, 0);
3635     lb->next = lb;
3636     lb->prev = lb;
3637 #endif
3638
3639
3640     nfile_tot = nxvgfile + nedrfile;
3641
3642     if (nfile_tot == 0)
3643     {
3644         gmx_fatal(FARGS, "No input files!");
3645     }
3646
3647     if (nd < 0)
3648     {
3649         gmx_fatal(FARGS, "Can not have negative number of digits");
3650     }
3651     prec = pow(10, -nd);
3652
3653     snew(partsum, (nbmax+1)*(nbmax+1));
3654     nf = 0;
3655
3656     /* read in all files. First xvg files */
3657     for (f = 0; f < nxvgfile; f++)
3658     {
3659         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3660         nf++;
3661     }
3662     /* then .edr files */
3663     for (f = 0; f < nedrfile; f++)
3664     {
3665         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3666         nf++;
3667     }
3668
3669     /* fix the times to allow for equilibration */
3670     sim_data_impose_times(&sim_data, begin, end);
3671
3672     if (opt2bSet("-oh", NFILE, fnm))
3673     {
3674         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3675     }
3676
3677     /* assemble the output structures from the lambdas */
3678     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3679
3680     sum_disc_err = barres_list_max_disc_err(results, nresults);
3681
3682     if (nresults == 0)
3683     {
3684         printf("\nNo results to calculate.\n");
3685         return 0;
3686     }
3687
3688     if (sum_disc_err > prec)
3689     {
3690         prec = sum_disc_err;
3691         nd   = ceil(-log10(prec));
3692         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3693     }
3694
3695
3696     /*sprintf(lamformat,"%%6.3f");*/
3697     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3698     /* the format strings of the results in kT */
3699     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3700     sprintf( sktformat, "%%%ds", 6+nd);
3701     /* the format strings of the errors in kT */
3702     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3703     sprintf( skteformat, "%%%ds", 4+nd);
3704     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3705     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3706
3707
3708
3709     fpb = NULL;
3710     if (opt2bSet("-o", NFILE, fnm))
3711     {
3712         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3713         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3714                             "\\lambda", buf, exvggtXYDY, oenv);
3715     }
3716
3717     fpi = NULL;
3718     if (opt2bSet("-oi", NFILE, fnm))
3719     {
3720         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3721         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3722                        "\\lambda", buf, oenv);
3723     }
3724
3725
3726
3727     if (nbmin > nbmax)
3728     {
3729         nbmin = nbmax;
3730     }
3731
3732     /* first calculate results */
3733     bEE      = TRUE;
3734     disc_err = FALSE;
3735     for (f = 0; f < nresults; f++)
3736     {
3737         /* Determine the free energy difference with a factor of 10
3738          * more accuracy than requested for printing.
3739          */
3740         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3741                  &bEE, partsum);
3742
3743         if (results[f].dg_disc_err > prec/10.)
3744         {
3745             disc_err = TRUE;
3746         }
3747         if (results[f].dg_histrange_err > prec/10.)
3748         {
3749             histrange_err = TRUE;
3750         }
3751     }
3752
3753     /* print results in kT */
3754     kT   = BOLTZ*temp;
3755     beta = 1/kT;
3756
3757     printf("\nTemperature: %g K\n", temp);
3758
3759     printf("\nDetailed results in kT (see help for explanation):\n\n");
3760     printf("%6s ", " lam_A");
3761     printf("%6s ", " lam_B");
3762     printf(sktformat,  "DG ");
3763     if (bEE)
3764     {
3765         printf(skteformat, "+/- ");
3766     }
3767     if (disc_err)
3768     {
3769         printf(skteformat, "disc ");
3770     }
3771     if (histrange_err)
3772     {
3773         printf(skteformat, "range ");
3774     }
3775     printf(sktformat,  "s_A ");
3776     if (bEE)
3777     {
3778         printf(skteformat, "+/- " );
3779     }
3780     printf(sktformat,  "s_B ");
3781     if (bEE)
3782     {
3783         printf(skteformat, "+/- " );
3784     }
3785     printf(sktformat,  "stdev ");
3786     if (bEE)
3787     {
3788         printf(skteformat, "+/- ");
3789     }
3790     printf("\n");
3791     for (f = 0; f < nresults; f++)
3792     {
3793         lambda_vec_print_short(results[f].a->native_lambda, buf);
3794         printf("%s ", buf);
3795         lambda_vec_print_short(results[f].b->native_lambda, buf);
3796         printf("%s ", buf);
3797         printf(ktformat,  results[f].dg);
3798         printf(" ");
3799         if (bEE)
3800         {
3801             printf(kteformat, results[f].dg_err);
3802             printf(" ");
3803         }
3804         if (disc_err)
3805         {
3806             printf(kteformat, results[f].dg_disc_err);
3807             printf(" ");
3808         }
3809         if (histrange_err)
3810         {
3811             printf(kteformat, results[f].dg_histrange_err);
3812             printf(" ");
3813         }
3814         printf(ktformat,  results[f].sa);
3815         printf(" ");
3816         if (bEE)
3817         {
3818             printf(kteformat, results[f].sa_err);
3819             printf(" ");
3820         }
3821         printf(ktformat,  results[f].sb);
3822         printf(" ");
3823         if (bEE)
3824         {
3825             printf(kteformat, results[f].sb_err);
3826             printf(" ");
3827         }
3828         printf(ktformat,  results[f].dg_stddev);
3829         printf(" ");
3830         if (bEE)
3831         {
3832             printf(kteformat, results[f].dg_stddev_err);
3833         }
3834         printf("\n");
3835
3836         /* Check for negative relative entropy with a 95% certainty. */
3837         if (results[f].sa < -2*results[f].sa_err ||
3838             results[f].sb < -2*results[f].sb_err)
3839         {
3840             result_OK = FALSE;
3841         }
3842     }
3843
3844     if (!result_OK)
3845     {
3846         printf("\nWARNING: Some of these results violate the Second Law of "
3847                "Thermodynamics: \n"
3848                "         This is can be the result of severe undersampling, or "
3849                "(more likely)\n"
3850                "         there is something wrong with the simulations.\n");
3851     }
3852
3853
3854     /* final results in kJ/mol */
3855     printf("\n\nFinal results in kJ/mol:\n\n");
3856     dg_tot  = 0;
3857     for (f = 0; f < nresults; f++)
3858     {
3859
3860         if (fpi != NULL)
3861         {
3862             lambda_vec_print_short(results[f].a->native_lambda, buf);
3863             fprintf(fpi, xvg2format, buf, dg_tot);
3864         }
3865
3866
3867         if (fpb != NULL)
3868         {
3869             lambda_vec_print_intermediate(results[f].a->native_lambda,
3870                                           results[f].b->native_lambda,
3871                                           buf);
3872
3873             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3874         }
3875
3876         printf("point ");
3877         lambda_vec_print_short(results[f].a->native_lambda, buf);
3878         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3879         printf("%s - %s", buf, buf2);
3880         printf(",   DG ");
3881
3882         printf(dgformat, results[f].dg*kT);
3883         if (bEE)
3884         {
3885             printf(" +/- ");
3886             printf(dgformat, results[f].dg_err*kT);
3887         }
3888         if (histrange_err)
3889         {
3890             printf(" (max. range err. = ");
3891             printf(dgformat, results[f].dg_histrange_err*kT);
3892             printf(")");
3893             sum_histrange_err += results[f].dg_histrange_err*kT;
3894         }
3895
3896         printf("\n");
3897         dg_tot += results[f].dg;
3898     }
3899     printf("\n");
3900     printf("total ");
3901     lambda_vec_print_short(results[0].a->native_lambda, buf);
3902     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3903     printf("%s - %s", buf, buf2);
3904     printf(",   DG ");
3905
3906     printf(dgformat, dg_tot*kT);
3907     if (bEE)
3908     {
3909         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3910         printf(" +/- ");
3911         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3912     }
3913     printf("\n");
3914     if (disc_err)
3915     {
3916         printf("\nmaximum discretization error = ");
3917         printf(dgformat, sum_disc_err);
3918         if (bEE && stat_err < sum_disc_err)
3919         {
3920             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3921         }
3922     }
3923     if (histrange_err)
3924     {
3925         printf("\nmaximum histogram range error = ");
3926         printf(dgformat, sum_histrange_err);
3927         if (bEE && stat_err < sum_histrange_err)
3928         {
3929             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3930         }
3931
3932     }
3933     printf("\n");
3934
3935
3936     if (fpi != NULL)
3937     {
3938         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3939         fprintf(fpi, xvg2format, buf, dg_tot);
3940         gmx_ffclose(fpi);
3941     }
3942
3943     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3944     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3945
3946     return 0;
3947 }