src/gromacs/gmxana/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <ctype.h>
  38 #include <float.h>
  39 #include <math.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "gromacs/legacyheaders/typedefs.h"
  44 #include "gromacs/utility/smalloc.h"
  45 #include "gromacs/utility/futil.h"
  46 #include "gromacs/commandline/pargs.h"
  47 #include "gromacs/legacyheaders/macros.h"
  48 #include "gromacs/fileio/enxio.h"
  49 #include "gromacs/math/units.h"
  50 #include "gromacs/utility/fatalerror.h"
  51 #include "gromacs/fileio/xvgr.h"
  52 #include "gromacs/legacyheaders/viewit.h"
  53 #include "gmx_ana.h"
  54 #include "gromacs/math/utilities.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "gromacs/legacyheaders/names.h"
  57 #include "gromacs/legacyheaders/mdebin.h"
  58
  59
  60 /* Structure for the names of lambda vector components */
  61 typedef struct lambda_components_t
  62 {
  63     char **names;  /* Array of strings with names for the lambda vector
  64                       components */
  65     int    N;      /* The number of components */
  66     int    Nalloc; /* The number of allocated components */
  67 } lambda_components_t;
  68
  69 /* Structure for a lambda vector or a dhdl derivative direction */
  70 typedef struct lambda_vec_t
  71 {
  72     double                    *val;   /* The lambda vector component values. Only valid if
  73                                          dhdl == -1 */
  74     int                        dhdl;  /* The coordinate index for the derivative described by this
  75                                          structure, or -1 */
  76     const lambda_components_t *lc;    /* the associated lambda_components
  77                                          structure */
  78     int                        index; /* The state number (init-lambda-state) of this lambda
  79                                          vector, if known. If not, it is set to -1 */
  80 } lambda_vec_t;
  81
  82 /* the dhdl.xvg data from a simulation */
  83 typedef struct xvg_t
  84 {
  85     const char   *filename;
  86     int           ftp;           /* file type */
  87     int           nset;          /* number of lambdas, including dhdl */
  88     int          *np;            /* number of data points (du or hists) per lambda */
  89     int           np_alloc;      /* number of points (du or hists) allocated */
  90     double        temp;          /* temperature */
  91     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  92     double       *t;             /* the times (of second index for y) */
  93     double      **y;             /* the dU values. y[0] holds the derivative, while
  94                                     further ones contain the energy differences between
  95                                     the native lambda and the 'foreign' lambdas. */
  96     lambda_vec_t  native_lambda; /* the native lambda */
  97
  98     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
  99 } xvg_t;
 100
 101
 102 typedef struct hist_t
 103 {
 104     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 105     double          dx[2];                  /* the histogram spacing. The reverse
 106                                                dx is the negative of the forward dx.*/
 107     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 108                                                    point(s) as int */
 109
 110     int             nbin[2];                /* the (forward+reverse) number of bins */
 111     gmx_int64_t     sum;                    /* the total number of counts. Must be
 112                                                    the same for forward + reverse.  */
 113     int             nhist;                  /* number of hist datas (forward or reverse) */
 114
 115     double          start_time, delta_time; /* start time, end time of histogram */
 116 } hist_t;
 117
 118
 119 /* an aggregate of samples for partial free energy calculation */
 120 typedef struct samples_t
 121 {
 122     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 123     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 124     double        temp;           /* the temperature */
 125     gmx_bool      derivative;     /* whether this sample is a derivative */
 126
 127     /* The samples come either as either delta U lists: */
 128     int     ndu;                    /* the number of delta U samples */
 129     double *du;                     /* the delta u's */
 130     double *t;                      /* the times associated with those samples, or: */
 131     double  start_time, delta_time; /*start time and delta time for linear time*/
 132
 133     /* or as histograms: */
 134     hist_t *hist; /* a histogram */
 135
 136     /* allocation data: (not NULL for data 'owned' by this struct) */
 137     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 138     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 139     hist_t         *hist_alloc;          /* allocated hist */
 140
 141     gmx_int64_t     ntot;                /* total number of samples */
 142     const char     *filename;            /* the file name this sample comes from */
 143 } samples_t;
 144
 145 /* a sample range (start to end for du-style data, or boolean
 146     for both du-style data and histograms */
 147 typedef struct sample_range_t
 148 {
 149     int        start, end; /* start and end index for du style data */
 150     gmx_bool   use;        /* whether to use this sample */
 151
 152     samples_t *s;          /* the samples this range belongs to */
 153 } sample_range_t;
 154
 155
 156 /* a collection of samples for a partial free energy calculation
 157     (i.e. the collection of samples from one native lambda to one
 158     foreign lambda) */
 159 typedef struct sample_coll_t
 160 {
 161     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 162                                           in the histogram */
 163     lambda_vec_t   *foreign_lambda;    /* collection */
 164     double          temp;              /* the temperature */
 165
 166     int             nsamples;          /* the number of samples */
 167     samples_t     **s;                 /* the samples themselves */
 168     sample_range_t *r;                 /* the sample ranges */
 169     int             nsamples_alloc;    /* number of allocated samples */
 170
 171     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 172                                               this collection */
 173
 174     struct sample_coll_t *next, *prev; /* next and previous in the list */
 175 } sample_coll_t;
 176
 177 /* all the samples associated with a lambda point */
 178 typedef struct lambda_data_t
 179 {
 180     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 181     double                temp;        /* temperature */
 182
 183     sample_coll_t        *sc;          /* the samples */
 184
 185     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 186
 187     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 188 } lambda_data_t;
 189
 190 /* Top-level data structure of simulation data */
 191 typedef struct sim_data_t
 192 {
 193     lambda_data_t      *lb;      /* a lambda data linked list */
 194     lambda_data_t       lb_head; /* The head element of the linked list */
 195
 196     lambda_components_t lc;      /* the allowed components of the lambda
 197                                     vectors */
 198 } sim_data_t;
 199
 200 /* Top-level data structure with calculated values. */
 201 typedef struct {
 202     sample_coll_t *a, *b;            /* the simulation data */
 203
 204     double         dg;               /* the free energy difference */
 205     double         dg_err;           /* the free energy difference */
 206
 207     double         dg_disc_err;      /* discretization error */
 208     double         dg_histrange_err; /* histogram range error */
 209
 210     double         sa;               /* relative entropy of b in state a */
 211     double         sa_err;           /* error in sa */
 212     double         sb;               /* relative entropy of a in state b */
 213     double         sb_err;           /* error in sb */
 214
 215     double         dg_stddev;        /* expected dg stddev per sample */
 216     double         dg_stddev_err;    /* error in dg_stddev */
 217 } barres_t;
 218
 219
 220 /* Initialize a lambda_components structure */
 221 static void lambda_components_init(lambda_components_t *lc)
 222 {
 223     lc->N      = 0;
 224     lc->Nalloc = 2;
 225     snew(lc->names, lc->Nalloc);
 226 }
 227
 228 /* Add a component to a lambda_components structure */
 229 static void lambda_components_add(lambda_components_t *lc,
 230                                   const char *name, size_t name_length)
 231 {
 232     while (lc->N + 1 > lc->Nalloc)
 233     {
 234         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 235         srenew(lc->names, lc->Nalloc);
 236     }
 237     snew(lc->names[lc->N], name_length+1);
 238     strncpy(lc->names[lc->N], name, name_length);
 239     lc->N++;
 240 }
 241
 242 /* check whether a component with index 'index' matches the given name, or
 243    is also NULL. Returns TRUE if this is the case.
 244    the string name does not need to end */
 245 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 246                                         int                        index,
 247                                         const char                *name,
 248                                         size_t                     name_length)
 249 {
 250     size_t len;
 251     if (index >= lc->N)
 252     {
 253         return FALSE;
 254     }
 255     if (name == NULL && lc->names[index] == NULL)
 256     {
 257         return TRUE;
 258     }
 259     if ((name == NULL) != (lc->names[index] == NULL))
 260     {
 261         return FALSE;
 262     }
 263     len = strlen(lc->names[index]);
 264     if (len != name_length)
 265     {
 266         return FALSE;
 267     }
 268     if (strncmp(lc->names[index], name, name_length) == 0)
 269     {
 270         return TRUE;
 271     }
 272     return FALSE;
 273 }
 274
 275 /* Find the index of a given lambda component name, or -1 if not found */
 276 static int lambda_components_find(const lambda_components_t *lc,
 277                                   const char                *name,
 278                                   size_t                     name_length)
 279 {
 280     int i;
 281
 282     for (i = 0; i < lc->N; i++)
 283     {
 284         if (strncmp(lc->names[i], name, name_length) == 0)
 285         {
 286             return i;
 287         }
 288     }
 289     return -1;
 290 }
 291
 292
 293
 294 /* initialize a lambda vector */
 295 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 296 {
 297     snew(lv->val, lc->N);
 298     lv->index = -1;
 299     lv->dhdl  = -1;
 300     lv->lc    = lc;
 301 }
 302
 303 static void lambda_vec_destroy(lambda_vec_t *lv)
 304 {
 305     sfree(lv->val);
 306 }
 307
 308 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 309 {
 310     int i;
 311
 312     lambda_vec_init(lv, orig->lc);
 313     lv->dhdl  = orig->dhdl;
 314     lv->index = orig->index;
 315     for (i = 0; i < lv->lc->N; i++)
 316     {
 317         lv->val[i] = orig->val[i];
 318     }
 319 }
 320
 321 /* write a lambda vec to a preallocated string */
 322 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 323 {
 324     int    i;
 325     size_t np;
 326
 327     str[0] = 0; /* reset the string */
 328     if (lv->dhdl < 0)
 329     {
 330         if (named)
 331         {
 332             str += sprintf(str, "delta H to ");
 333         }
 334         if (lv->lc->N > 1)
 335         {
 336             str += sprintf(str, "(");
 337         }
 338         for (i = 0; i < lv->lc->N; i++)
 339         {
 340             str += sprintf(str, "%g", lv->val[i]);
 341             if (i < lv->lc->N-1)
 342             {
 343                 str += sprintf(str, ", ");
 344             }
 345         }
 346         if (lv->lc->N > 1)
 347         {
 348             str += sprintf(str, ")");
 349         }
 350     }
 351     else
 352     {
 353         /* this lambda vector describes a derivative */
 354         str += sprintf(str, "dH/dl");
 355         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 356         {
 357             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 358         }
 359     }
 360 }
 361
 362 /* write a shortened version of the lambda vec to a preallocated string */
 363 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 364 {
 365     int    i;
 366     size_t np;
 367
 368     if (lv->index >= 0)
 369     {
 370         sprintf(str, "%6d", lv->index);
 371     }
 372     else
 373     {
 374         if (lv->dhdl < 0)
 375         {
 376             sprintf(str, "%6.3f", lv->val[0]);
 377         }
 378         else
 379         {
 380             sprintf(str, "dH/dl[%d]", lv->dhdl);
 381         }
 382     }
 383 }
 384
 385 /* write an intermediate version of two lambda vecs to a preallocated string */
 386 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 387                                           const lambda_vec_t *b, char *str)
 388 {
 389     int    i;
 390     size_t np;
 391
 392     str[0] = 0;
 393     if ( (a->index >= 0) && (b->index >= 0) )
 394     {
 395         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 396     }
 397     else
 398     {
 399         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 400         {
 401             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 402         }
 403     }
 404 }
 405
 406
 407
 408 /* calculate the difference in lambda vectors: c = a-b.
 409    c must be initialized already, and a and b must describe non-derivative
 410    lambda points */
 411 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 412                             lambda_vec_t *c)
 413 {
 414     int i;
 415
 416     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 417     {
 418         gmx_fatal(FARGS,
 419                   "Trying to calculate the difference between derivatives instead of lambda points");
 420     }
 421     if ((a->lc != b->lc) || (a->lc != c->lc) )
 422     {
 423         gmx_fatal(FARGS,
 424                   "Trying to calculate the difference lambdas with differing basis set");
 425     }
 426     for (i = 0; i < a->lc->N; i++)
 427     {
 428         c->val[i] = a->val[i] - b->val[i];
 429     }
 430 }
 431
 432 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 433    a and b must describe non-derivative lambda points */
 434 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 435 {
 436     int    i;
 437     double ret = 0.;
 438
 439     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 440     {
 441         gmx_fatal(FARGS,
 442                   "Trying to calculate the difference between derivatives instead of lambda points");
 443     }
 444     if (a->lc != b->lc)
 445     {
 446         gmx_fatal(FARGS,
 447                   "Trying to calculate the difference lambdas with differing basis set");
 448     }
 449     for (i = 0; i < a->lc->N; i++)
 450     {
 451         double df = a->val[i] - b->val[i];
 452         ret += df*df;
 453     }
 454     return sqrt(ret);
 455 }
 456
 457
 458 /* check whether two lambda vectors are the same */
 459 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 460 {
 461     int i;
 462
 463     if (a->lc != b->lc)
 464     {
 465         return FALSE;
 466     }
 467     if (a->dhdl < 0)
 468     {
 469         for (i = 0; i < a->lc->N; i++)
 470         {
 471             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 472             {
 473                 return FALSE;
 474             }
 475         }
 476         return TRUE;
 477     }
 478     else
 479     {
 480         /* they're derivatives, so we check whether the indices match */
 481         return (a->dhdl == b->dhdl);
 482     }
 483 }
 484
 485 /* Compare the sort order of two foreign lambda vectors
 486
 487     returns 1 if a is 'bigger' than b,
 488     returns 0 if they're the same,
 489     returns -1 if a is 'smaller' than b.*/
 490 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 491                                        const lambda_vec_t *b)
 492 {
 493     int      i;
 494     double   norm_a    = 0, norm_b = 0;
 495     gmx_bool different = FALSE;
 496
 497     if (a->lc != b->lc)
 498     {
 499         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 500     }
 501     /* if either one has an index we sort based on that */
 502     if ((a->index >= 0) || (b->index >= 0))
 503     {
 504         if (a->index == b->index)
 505         {
 506             return 0;
 507         }
 508         return (a->index > b->index) ? 1 : -1;
 509     }
 510     if (a->dhdl >= 0 || b->dhdl >= 0)
 511     {
 512         /* lambda vectors that are derivatives always sort higher than those
 513            without derivatives */
 514         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 515         {
 516             return (a->dhdl >= 0) ? 1 : -1;
 517         }
 518         return a->dhdl > b->dhdl;
 519     }
 520
 521     /* neither has an index, so we can only sort on the lambda components,
 522        which is only valid if there is one component */
 523     for (i = 0; i < a->lc->N; i++)
 524     {
 525         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 526         {
 527             different = TRUE;
 528         }
 529         norm_a += a->val[i]*a->val[i];
 530         norm_b += b->val[i]*b->val[i];
 531     }
 532     if (!different)
 533     {
 534         return 0;
 535     }
 536     return norm_a > norm_b;
 537 }
 538
 539 /* Compare the sort order of two native lambda vectors
 540
 541     returns 1 if a is 'bigger' than b,
 542     returns 0 if they're the same,
 543     returns -1 if a is 'smaller' than b.*/
 544 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 545                                       const lambda_vec_t *b)
 546 {
 547     int i;
 548
 549     if (a->lc != b->lc)
 550     {
 551         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 552     }
 553     /* if either one has an index we sort based on that */
 554     if ((a->index >= 0) || (b->index >= 0))
 555     {
 556         if (a->index == b->index)
 557         {
 558             return 0;
 559         }
 560         return (a->index > b->index) ? 1 : -1;
 561     }
 562     /* neither has an index, so we can only sort on the lambda components,
 563        which is only valid if there is one component */
 564     if (a->lc->N > 1)
 565     {
 566         gmx_fatal(FARGS,
 567                   "Can't compare lambdas with no index and > 1 component");
 568     }
 569     if (a->dhdl >= 0 || b->dhdl >= 0)
 570     {
 571         gmx_fatal(FARGS,
 572                   "Can't compare native lambdas that are derivatives");
 573     }
 574     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 575     {
 576         return 0;
 577     }
 578     return a->val[0] > b->val[0] ? 1 : -1;
 579 }
 580
 581
 582
 583
 584 static void hist_init(hist_t *h, int nhist, int *nbin)
 585 {
 586     int i;
 587     if (nhist > 2)
 588     {
 589         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 590     }
 591     for (i = 0; i < nhist; i++)
 592     {
 593         snew(h->bin[i], nbin[i]);
 594         h->x0[i]      = 0;
 595         h->nbin[i]    = nbin[i];
 596         h->start_time = h->delta_time = 0;
 597         h->dx[i]      = 0;
 598     }
 599     h->sum   = 0;
 600     h->nhist = nhist;
 601 }
 602
 603 static void hist_destroy(hist_t *h)
 604 {
 605     sfree(h->bin);
 606 }
 607
 608
 609 static void xvg_init(xvg_t *ba)
 610 {
 611     ba->filename = NULL;
 612     ba->nset     = 0;
 613     ba->np_alloc = 0;
 614     ba->np       = NULL;
 615     ba->y        = NULL;
 616 }
 617
 618 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 619                          lambda_vec_t *foreign_lambda, double temp,
 620                          gmx_bool derivative, const char *filename)
 621 {
 622     s->native_lambda  = native_lambda;
 623     s->foreign_lambda = foreign_lambda;
 624     s->temp           = temp;
 625     s->derivative     = derivative;
 626
 627     s->ndu        = 0;
 628     s->du         = NULL;
 629     s->t          = NULL;
 630     s->start_time = s->delta_time = 0;
 631     s->hist       = NULL;
 632     s->du_alloc   = NULL;
 633     s->t_alloc    = NULL;
 634     s->hist_alloc = NULL;
 635     s->ndu_alloc  = 0;
 636     s->nt_alloc   = 0;
 637
 638     s->ntot     = 0;
 639     s->filename = filename;
 640 }
 641
 642 static void sample_range_init(sample_range_t *r, samples_t *s)
 643 {
 644     r->start = 0;
 645     r->end   = s->ndu;
 646     r->use   = TRUE;
 647     r->s     = NULL;
 648 }
 649
 650 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 651                              lambda_vec_t *foreign_lambda, double temp)
 652 {
 653     sc->native_lambda  = native_lambda;
 654     sc->foreign_lambda = foreign_lambda;
 655     sc->temp           = temp;
 656
 657     sc->nsamples       = 0;
 658     sc->s              = NULL;
 659     sc->r              = NULL;
 660     sc->nsamples_alloc = 0;
 661
 662     sc->ntot = 0;
 663     sc->next = sc->prev = NULL;
 664 }
 665
 666 static void sample_coll_destroy(sample_coll_t *sc)
 667 {
 668     /* don't free the samples themselves */
 669     sfree(sc->r);
 670     sfree(sc->s);
 671 }
 672
 673
 674 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 675                              double temp)
 676 {
 677     l->lambda = native_lambda;
 678     l->temp   = temp;
 679
 680     l->next = NULL;
 681     l->prev = NULL;
 682
 683     l->sc = &(l->sc_head);
 684
 685     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 686     l->sc->next = l->sc;
 687     l->sc->prev = l->sc;
 688 }
 689
 690 static void barres_init(barres_t *br)
 691 {
 692     br->dg            = 0;
 693     br->dg_err        = 0;
 694     br->sa            = 0;
 695     br->sa_err        = 0;
 696     br->sb            = 0;
 697     br->sb_err        = 0;
 698     br->dg_stddev     = 0;
 699     br->dg_stddev_err = 0;
 700
 701     br->a = NULL;
 702     br->b = NULL;
 703 }
 704
 705
 706 /* calculate the total number of samples in a sample collection */
 707 static void sample_coll_calc_ntot(sample_coll_t *sc)
 708 {
 709     int i;
 710
 711     sc->ntot = 0;
 712     for (i = 0; i < sc->nsamples; i++)
 713     {
 714         if (sc->r[i].use)
 715         {
 716             if (sc->s[i]->hist)
 717             {
 718                 sc->ntot += sc->s[i]->ntot;
 719             }
 720             else
 721             {
 722                 sc->ntot += sc->r[i].end - sc->r[i].start;
 723             }
 724         }
 725     }
 726 }
 727
 728
 729 /* find the barsamples_t associated with a lambda that corresponds to
 730    a specific foreign lambda */
 731 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 732                                                    lambda_vec_t  *foreign_lambda)
 733 {
 734     sample_coll_t *sc = l->sc->next;
 735
 736     while (sc != l->sc)
 737     {
 738         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 739         {
 740             return sc;
 741         }
 742         sc = sc->next;
 743     }
 744
 745     return NULL;
 746 }
 747
 748 /* insert li into an ordered list of lambda_colls */
 749 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 750 {
 751     sample_coll_t *scn = l->sc->next;
 752     while ( (scn != l->sc) )
 753     {
 754         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 755         {
 756             break;
 757         }
 758         scn = scn->next;
 759     }
 760     /* now insert it before the found scn */
 761     sc->next        = scn;
 762     sc->prev        = scn->prev;
 763     scn->prev->next = sc;
 764     scn->prev       = sc;
 765 }
 766
 767 /* insert li into an ordered list of lambdas */
 768 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 769 {
 770     lambda_data_t *lc = head->next;
 771     while (lc != head)
 772     {
 773         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 774         {
 775             break;
 776         }
 777         lc = lc->next;
 778     }
 779     /* now insert ourselves before the found lc */
 780     li->next       = lc;
 781     li->prev       = lc->prev;
 782     lc->prev->next = li;
 783     lc->prev       = li;
 784 }
 785
 786 /* insert a sample and a sample_range into a sample_coll. The
 787     samples are stored as a pointer, the range is copied. */
 788 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 789                                       sample_range_t *r)
 790 {
 791     /* first check if it belongs here */
 792     if (sc->temp != s->temp)
 793     {
 794         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 795                   s->filename, sc->next->s[0]->filename);
 796     }
 797     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 798     {
 799         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 800                   s->filename, sc->next->s[0]->filename);
 801     }
 802     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 803     {
 804         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 805                   s->filename, sc->next->s[0]->filename);
 806     }
 807
 808     /* check if there's room */
 809     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 810     {
 811         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 812         srenew(sc->s, sc->nsamples_alloc);
 813         srenew(sc->r, sc->nsamples_alloc);
 814     }
 815     sc->s[sc->nsamples] = s;
 816     sc->r[sc->nsamples] = *r;
 817     sc->nsamples++;
 818
 819     sample_coll_calc_ntot(sc);
 820 }
 821
 822 /* insert a sample into a lambda_list, creating the right sample_coll if
 823    neccesary */
 824 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 825 {
 826     gmx_bool       found = FALSE;
 827     sample_coll_t *sc;
 828     sample_range_t r;
 829
 830     lambda_data_t *l = head->next;
 831
 832     /* first search for the right lambda_data_t */
 833     while (l != head)
 834     {
 835         if (lambda_vec_same(l->lambda, s->native_lambda) )
 836         {
 837             found = TRUE;
 838             break;
 839         }
 840         l = l->next;
 841     }
 842
 843     if (!found)
 844     {
 845         snew(l, 1);                                     /* allocate a new one */
 846         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 847         lambda_data_insert_lambda(head, l);             /* add it to the list */
 848     }
 849
 850     /* now look for a sample collection */
 851     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 852     if (!sc)
 853     {
 854         snew(sc, 1); /* allocate a new one */
 855         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 856         lambda_data_insert_sample_coll(l, sc);
 857     }
 858
 859     /* now insert the samples into the sample coll */
 860     sample_range_init(&r, s);
 861     sample_coll_insert_sample(sc, s, &r);
 862 }
 863
 864
 865 /* make a histogram out of a sample collection */
 866 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 867                                   int *nbin_alloc, int *nbin,
 868                                   double *dx, double *xmin, int nbin_default)
 869 {
 870     int      i, j, k;
 871     gmx_bool dx_set   = FALSE;
 872     gmx_bool xmin_set = FALSE;
 873
 874     gmx_bool xmax_set      = FALSE;
 875     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 876                                        limits of a histogram */
 877     double   xmax          = -1;
 878
 879     /* first determine dx and xmin; try the histograms */
 880     for (i = 0; i < sc->nsamples; i++)
 881     {
 882         if (sc->s[i]->hist)
 883         {
 884             hist_t *hist = sc->s[i]->hist;
 885             for (k = 0; k < hist->nhist; k++)
 886             {
 887                 double hdx      = hist->dx[k];
 888                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 889
 890                 /* we use the biggest dx*/
 891                 if ( (!dx_set) || hist->dx[0] > *dx)
 892                 {
 893                     dx_set = TRUE;
 894                     *dx    = hist->dx[0];
 895                 }
 896                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 897                 {
 898                     xmin_set = TRUE;
 899                     *xmin    = (hist->x0[k]*hdx);
 900                 }
 901
 902                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 903                 {
 904                     xmax_set = TRUE;
 905                     xmax     = xmax_now;
 906                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 907                     {
 908                         xmax_set_hard = TRUE;
 909                     }
 910                 }
 911                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 912                 {
 913                     xmax_set_hard = TRUE;
 914                     xmax          = xmax_now;
 915                 }
 916             }
 917         }
 918     }
 919     /* and the delta us */
 920     for (i = 0; i < sc->nsamples; i++)
 921     {
 922         if (sc->s[i]->ndu > 0)
 923         {
 924             /* determine min and max */
 925             int    starti  = sc->r[i].start;
 926             int    endi    = sc->r[i].end;
 927             double du_xmin = sc->s[i]->du[starti];
 928             double du_xmax = sc->s[i]->du[starti];
 929             for (j = starti+1; j < endi; j++)
 930             {
 931                 if (sc->s[i]->du[j] < du_xmin)
 932                 {
 933                     du_xmin = sc->s[i]->du[j];
 934                 }
 935                 if (sc->s[i]->du[j] > du_xmax)
 936                 {
 937                     du_xmax = sc->s[i]->du[j];
 938                 }
 939             }
 940
 941             /* and now change the limits */
 942             if ( (!xmin_set) || (du_xmin < *xmin) )
 943             {
 944                 xmin_set = TRUE;
 945                 *xmin    = du_xmin;
 946             }
 947             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 948             {
 949                 xmax_set = TRUE;
 950                 xmax     = du_xmax;
 951             }
 952         }
 953     }
 954
 955     if (!xmax_set || !xmin_set)
 956     {
 957         *nbin = 0;
 958         return;
 959     }
 960
 961
 962     if (!dx_set)
 963     {
 964         *nbin = nbin_default;
 965         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 966                                                be 0, and we count from 0 */
 967     }
 968     else
 969     {
 970         *nbin = (xmax-(*xmin))/(*dx);
 971     }
 972
 973     if (*nbin > *nbin_alloc)
 974     {
 975         *nbin_alloc = *nbin;
 976         srenew(*bin, *nbin_alloc);
 977     }
 978
 979     /* reset the histogram */
 980     for (i = 0; i < (*nbin); i++)
 981     {
 982         (*bin)[i] = 0;
 983     }
 984
 985     /* now add the actual data */
 986     for (i = 0; i < sc->nsamples; i++)
 987     {
 988         if (sc->s[i]->hist)
 989         {
 990             hist_t *hist = sc->s[i]->hist;
 991             for (k = 0; k < hist->nhist; k++)
 992             {
 993                 double hdx       = hist->dx[k];
 994                 double xmin_hist = hist->x0[k]*hdx;
 995                 for (j = 0; j < hist->nbin[k]; j++)
 996                 {
 997                     /* calculate the bin corresponding to the middle of the
 998                        original bin */
 999                     double x     = hdx*(j+0.5) + xmin_hist;
1000                     int    binnr = (int)((x-(*xmin))/(*dx));
1001
1002                     if (binnr >= *nbin || binnr < 0)
1003                     {
1004                         binnr = (*nbin)-1;
1005                     }
1006
1007                     (*bin)[binnr] += hist->bin[k][j];
1008                 }
1009             }
1010         }
1011         else
1012         {
1013             int starti = sc->r[i].start;
1014             int endi   = sc->r[i].end;
1015             for (j = starti; j < endi; j++)
1016             {
1017                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1018                 if (binnr >= *nbin || binnr < 0)
1019                 {
1020                     binnr = (*nbin)-1;
1021                 }
1022
1023                 (*bin)[binnr]++;
1024             }
1025         }
1026     }
1027 }
1028
1029 /* write a collection of histograms to a file */
1030 void sim_data_histogram(sim_data_t *sd, const char *filename,
1031                         int nbin_default, const output_env_t oenv)
1032 {
1033     char           label_x[STRLEN];
1034     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1035     const char    *title   = "N(\\DeltaH)";
1036     const char    *label_y = "Samples";
1037     FILE          *fp;
1038     lambda_data_t *bl;
1039     int            nsets     = 0;
1040     char         **setnames  = NULL;
1041     gmx_bool       first_set = FALSE;
1042     /* histogram data: */
1043     int           *hist       = NULL;
1044     int            nbin       = 0;
1045     int            nbin_alloc = 0;
1046     double         dx         = 0;
1047     double         min        = 0;
1048     int            i;
1049     lambda_data_t *bl_head = sd->lb;
1050
1051     printf("\nWriting histogram to %s\n", filename);
1052     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1053
1054     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1055
1056     /* first get all the set names */
1057     bl = bl_head->next;
1058     /* iterate over all lambdas */
1059     while (bl != bl_head)
1060     {
1061         sample_coll_t *sc = bl->sc->next;
1062
1063         /* iterate over all samples */
1064         while (sc != bl->sc)
1065         {
1066             char buf[STRLEN], buf2[STRLEN];
1067
1068             nsets++;
1069             srenew(setnames, nsets);
1070             snew(setnames[nsets-1], STRLEN);
1071             if (sc->foreign_lambda->dhdl < 0)
1072             {
1073                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1074                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1075                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1076                         deltag, lambda, buf2, lambda, buf);
1077             }
1078             else
1079             {
1080                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1081                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1082                         dhdl, lambda, buf);
1083             }
1084             sc = sc->next;
1085         }
1086
1087         bl = bl->next;
1088     }
1089     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1090
1091
1092     /* now make the histograms */
1093     bl = bl_head->next;
1094     /* iterate over all lambdas */
1095     while (bl != bl_head)
1096     {
1097         sample_coll_t *sc = bl->sc->next;
1098
1099         /* iterate over all samples */
1100         while (sc != bl->sc)
1101         {
1102             if (!first_set)
1103             {
1104                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1105             }
1106
1107             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1108                                   nbin_default);
1109
1110             for (i = 0; i < nbin; i++)
1111             {
1112                 double xmin = i*dx + min;
1113                 double xmax = (i+1)*dx + min;
1114
1115                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1116             }
1117
1118             first_set = FALSE;
1119             sc        = sc->next;
1120         }
1121
1122         bl = bl->next;
1123     }
1124
1125     if (hist)
1126     {
1127         sfree(hist);
1128     }
1129
1130     xvgrclose(fp);
1131 }
1132
1133 static int
1134 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1135 {
1136     int n = 0;
1137
1138     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1139     if (lambda->index >= 0)
1140     {
1141         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1142     }
1143     if (lambda->dhdl >= 0)
1144     {
1145         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1146     }
1147     else
1148     {
1149         int i;
1150         for (i = 0; i < lambda->lc->N; i++)
1151         {
1152             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1153         }
1154     }
1155     return n;
1156 }
1157
1158 /* create a collection (array) of barres_t object given a ordered linked list
1159    of barlamda_t sample collections */
1160 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1161                                     gmx_bool use_dhdl)
1162 {
1163     lambda_data_t *bl;
1164     int            nlambda = 0;
1165     barres_t      *res;
1166     int            i;
1167     gmx_bool       dhdl    = FALSE;
1168     gmx_bool       first   = TRUE;
1169     lambda_data_t *bl_head = sd->lb;
1170
1171     /* first count the lambdas */
1172     bl = bl_head->next;
1173     while (bl != bl_head)
1174     {
1175         nlambda++;
1176         bl = bl->next;
1177     }
1178     snew(res, nlambda-1);
1179
1180     /* next put the right samples in the res */
1181     *nres = 0;
1182     bl    = bl_head->next->next; /* we start with the second one. */
1183     while (bl != bl_head)
1184     {
1185         sample_coll_t *sc, *scprev;
1186         barres_t      *br = &(res[*nres]);
1187         /* there is always a previous one. we search for that as a foreign
1188            lambda: */
1189         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1190         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1191
1192         barres_init(br);
1193
1194         if (use_dhdl)
1195         {
1196             /* we use dhdl */
1197
1198             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1199             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1200
1201             if (first)
1202             {
1203                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1204                 dhdl = TRUE;
1205             }
1206             if (!dhdl)
1207             {
1208                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1209             }
1210         }
1211         else if (!scprev && !sc)
1212         {
1213             char descX[STRLEN], descY[STRLEN];
1214             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1215             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1216
1217             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1218         }
1219
1220         /* normal delta H */
1221         if (!scprev)
1222         {
1223             char descX[STRLEN], descY[STRLEN];
1224             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1225             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1226             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1227         }
1228         if (!sc)
1229         {
1230             char descX[STRLEN], descY[STRLEN];
1231             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1232             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1233             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1234         }
1235         br->a = scprev;
1236         br->b = sc;
1237
1238         first = FALSE;
1239         (*nres)++;
1240         bl = bl->next;
1241     }
1242     return res;
1243 }
1244
1245 /* estimate the maximum discretization error */
1246 static double barres_list_max_disc_err(barres_t *res, int nres)
1247 {
1248     int    i, j;
1249     double disc_err = 0.;
1250     double delta_lambda;
1251
1252     for (i = 0; i < nres; i++)
1253     {
1254         barres_t *br = &(res[i]);
1255
1256         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1257                                            br->a->native_lambda);
1258
1259         for (j = 0; j < br->a->nsamples; j++)
1260         {
1261             if (br->a->s[j]->hist)
1262             {
1263                 double Wfac = 1.;
1264                 if (br->a->s[j]->derivative)
1265                 {
1266                     Wfac =  delta_lambda;
1267                 }
1268
1269                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1270             }
1271         }
1272         for (j = 0; j < br->b->nsamples; j++)
1273         {
1274             if (br->b->s[j]->hist)
1275             {
1276                 double Wfac = 1.;
1277                 if (br->b->s[j]->derivative)
1278                 {
1279                     Wfac =  delta_lambda;
1280                 }
1281                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1282             }
1283         }
1284     }
1285     return disc_err;
1286 }
1287
1288
1289 /* impose start and end times on a sample collection, updating sample_ranges */
1290 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1291                                      double end_t)
1292 {
1293     int i;
1294     for (i = 0; i < sc->nsamples; i++)
1295     {
1296         samples_t      *s = sc->s[i];
1297         sample_range_t *r = &(sc->r[i]);
1298         if (s->hist)
1299         {
1300             double end_time = s->hist->delta_time*s->hist->sum +
1301                 s->hist->start_time;
1302             if (s->hist->start_time < begin_t || end_time > end_t)
1303             {
1304                 r->use = FALSE;
1305             }
1306         }
1307         else
1308         {
1309             if (!s->t)
1310             {
1311                 double end_time;
1312                 if (s->start_time < begin_t)
1313                 {
1314                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1315                 }
1316                 end_time = s->delta_time*s->ndu + s->start_time;
1317                 if (end_time > end_t)
1318                 {
1319                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1320                 }
1321             }
1322             else
1323             {
1324                 int j;
1325                 for (j = 0; j < s->ndu; j++)
1326                 {
1327                     if (s->t[j] < begin_t)
1328                     {
1329                         r->start = j;
1330                     }
1331
1332                     if (s->t[j] >= end_t)
1333                     {
1334                         r->end = j;
1335                         break;
1336                     }
1337                 }
1338             }
1339             if (r->start > r->end)
1340             {
1341                 r->use = FALSE;
1342             }
1343         }
1344     }
1345     sample_coll_calc_ntot(sc);
1346 }
1347
1348 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1349 {
1350     double         first_t, last_t;
1351     double         begin_t, end_t;
1352     lambda_data_t *lc;
1353     lambda_data_t *head = sd->lb;
1354     int            j;
1355
1356     if (begin <= 0 && end < 0)
1357     {
1358         return;
1359     }
1360
1361     /* first determine the global start and end times */
1362     first_t = -1;
1363     last_t  = -1;
1364     lc      = head->next;
1365     while (lc != head)
1366     {
1367         sample_coll_t *sc = lc->sc->next;
1368         while (sc != lc->sc)
1369         {
1370             for (j = 0; j < sc->nsamples; j++)
1371             {
1372                 double start_t, end_t;
1373
1374                 start_t = sc->s[j]->start_time;
1375                 end_t   =   sc->s[j]->start_time;
1376                 if (sc->s[j]->hist)
1377                 {
1378                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1379                 }
1380                 else
1381                 {
1382                     if (sc->s[j]->t)
1383                     {
1384                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1385                     }
1386                     else
1387                     {
1388                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1389                     }
1390                 }
1391
1392                 if (start_t < first_t || first_t < 0)
1393                 {
1394                     first_t = start_t;
1395                 }
1396                 if (end_t > last_t)
1397                 {
1398                     last_t = end_t;
1399                 }
1400             }
1401             sc = sc->next;
1402         }
1403         lc = lc->next;
1404     }
1405
1406     /* calculate the actual times */
1407     if (begin > 0)
1408     {
1409         begin_t = begin;
1410     }
1411     else
1412     {
1413         begin_t = first_t;
1414     }
1415
1416     if (end > 0)
1417     {
1418         end_t = end;
1419     }
1420     else
1421     {
1422         end_t = last_t;
1423     }
1424     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1425
1426     if (begin_t > end_t)
1427     {
1428         return;
1429     }
1430     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1431
1432     /* then impose them */
1433     lc = head->next;
1434     while (lc != head)
1435     {
1436         sample_coll_t *sc = lc->sc->next;
1437         while (sc != lc->sc)
1438         {
1439             sample_coll_impose_times(sc, begin_t, end_t);
1440             sc = sc->next;
1441         }
1442         lc = lc->next;
1443     }
1444 }
1445
1446
1447 /* create subsample i out of ni from an existing sample_coll */
1448 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1449                                              sample_coll_t *sc_orig,
1450                                              int i, int ni)
1451 {
1452     int             j;
1453     int             hist_start, hist_end;
1454
1455     gmx_int64_t     ntot_start;
1456     gmx_int64_t     ntot_end;
1457     gmx_int64_t     ntot_so_far;
1458
1459     *sc = *sc_orig; /* just copy all fields */
1460
1461     /* allocate proprietary memory */
1462     snew(sc->s, sc_orig->nsamples);
1463     snew(sc->r, sc_orig->nsamples);
1464
1465     /* copy the samples */
1466     for (j = 0; j < sc_orig->nsamples; j++)
1467     {
1468         sc->s[j] = sc_orig->s[j];
1469         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1470     }
1471
1472     /* now fix start and end fields */
1473     /* the casts avoid possible overflows */
1474     ntot_start  = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1475     ntot_end    = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1476     ntot_so_far = 0;
1477     for (j = 0; j < sc->nsamples; j++)
1478     {
1479         gmx_int64_t ntot_add;
1480         gmx_int64_t new_start, new_end;
1481
1482         if (sc->r[j].use)
1483         {
1484             if (sc->s[j]->hist)
1485             {
1486                 ntot_add = sc->s[j]->hist->sum;
1487             }
1488             else
1489             {
1490                 ntot_add = sc->r[j].end - sc->r[j].start;
1491             }
1492         }
1493         else
1494         {
1495             ntot_add = 0;
1496         }
1497
1498         if (!sc->s[j]->hist)
1499         {
1500             if (ntot_so_far < ntot_start)
1501             {
1502                 /* adjust starting point */
1503                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1504             }
1505             else
1506             {
1507                 new_start = sc->r[j].start;
1508             }
1509             /* adjust end point */
1510             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1511             if (new_end > sc->r[j].end)
1512             {
1513                 new_end = sc->r[j].end;
1514             }
1515
1516             /* check if we're in range at all */
1517             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1518             {
1519                 new_start = 0;
1520                 new_end   = 0;
1521             }
1522             /* and write the new range */
1523             sc->r[j].start = (int)new_start;
1524             sc->r[j].end   = (int)new_end;
1525         }
1526         else
1527         {
1528             if (sc->r[j].use)
1529             {
1530                 double overlap;
1531                 double ntot_start_norm, ntot_end_norm;
1532                 /* calculate the amount of overlap of the
1533                    desired range (ntot_start -- ntot_end) onto
1534                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1535
1536                 /* first calculate normalized bounds
1537                    (where 0 is the start of the hist range, and 1 the end) */
1538                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1539                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1540
1541                 /* now fix the boundaries */
1542                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1543                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1544
1545                 /* and calculate the overlap */
1546                 overlap = ntot_end_norm - ntot_start_norm;
1547
1548                 if (overlap > 0.95) /* we allow for 5% slack */
1549                 {
1550                     sc->r[j].use = TRUE;
1551                 }
1552                 else if (overlap < 0.05)
1553                 {
1554                     sc->r[j].use = FALSE;
1555                 }
1556                 else
1557                 {
1558                     return FALSE;
1559                 }
1560             }
1561         }
1562         ntot_so_far += ntot_add;
1563     }
1564     sample_coll_calc_ntot(sc);
1565
1566     return TRUE;
1567 }
1568
1569 /* calculate minimum and maximum work values in sample collection */
1570 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1571                                 double *Wmin, double *Wmax)
1572 {
1573     int i, j;
1574
1575     *Wmin = FLT_MAX;
1576     *Wmax = -FLT_MAX;
1577
1578     for (i = 0; i < sc->nsamples; i++)
1579     {
1580         samples_t      *s = sc->s[i];
1581         sample_range_t *r = &(sc->r[i]);
1582         if (r->use)
1583         {
1584             if (!s->hist)
1585             {
1586                 for (j = r->start; j < r->end; j++)
1587                 {
1588                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1589                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1590                 }
1591             }
1592             else
1593             {
1594                 int    hd = 0; /* determine the histogram direction: */
1595                 double dx;
1596                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1597                 {
1598                     hd = 1;
1599                 }
1600                 dx = s->hist->dx[hd];
1601
1602                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1603                 {
1604                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1605                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1606                     /* look for the highest value bin with values */
1607                     if (s->hist->bin[hd][j] > 0)
1608                     {
1609                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1610                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1611                         break;
1612                     }
1613                 }
1614             }
1615         }
1616     }
1617 }
1618
1619 /* Initialize a sim_data structure */
1620 static void sim_data_init(sim_data_t *sd)
1621 {
1622     /* make linked list */
1623     sd->lb       = &(sd->lb_head);
1624     sd->lb->next = sd->lb;
1625     sd->lb->prev = sd->lb;
1626
1627     lambda_components_init(&(sd->lc));
1628 }
1629
1630
1631 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1632 {
1633     int    i;
1634     double sum;
1635
1636     sum = 0;
1637
1638     for (i = 0; i < n; i++)
1639     {
1640         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1641     }
1642
1643     return sum;
1644 }
1645
1646 /* calculate the BAR average given a histogram
1647
1648     if type== 0, calculate the best estimate for the average,
1649     if type==-1, calculate the minimum possible value given the histogram
1650     if type== 1, calculate the maximum possible value given the histogram */
1651 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1652                                 int type)
1653 {
1654     double sum = 0.;
1655     int    i;
1656     int    max;
1657     /* normalization factor multiplied with bin width and
1658        number of samples (we normalize through M): */
1659     double normdx = 1.;
1660     int    hd     = 0; /* determine the histogram direction: */
1661     double dx;
1662
1663     if ( (hist->nhist > 1) && (Wfac < 0) )
1664     {
1665         hd = 1;
1666     }
1667     dx  = hist->dx[hd];
1668     max = hist->nbin[hd]-1;
1669     if (type == 1)
1670     {
1671         max = hist->nbin[hd]; /* we also add whatever was out of range */
1672     }
1673
1674     for (i = 0; i < max; i++)
1675     {
1676         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1677         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1678
1679         sum += pxdx/(1. + exp(x + sbMmDG));
1680     }
1681
1682     return sum;
1683 }
1684
1685 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1686                                 double temp, double tol, int type)
1687 {
1688     double kT, beta, M;
1689     double DG;
1690     int    i, j;
1691     double Wfac1, Wfac2, Wmin, Wmax;
1692     double DG0, DG1, DG2, dDG1;
1693     double sum1, sum2;
1694     double n1, n2; /* numbers of samples as doubles */
1695
1696     kT   = BOLTZ*temp;
1697     beta = 1/kT;
1698
1699     /* count the numbers of samples */
1700     n1 = ca->ntot;
1701     n2 = cb->ntot;
1702
1703     M = log(n1/n2);
1704
1705     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1706     if (ca->foreign_lambda->dhdl < 0)
1707     {
1708         /* this is the case when the delta U were calculated directly
1709            (i.e. we're not scaling dhdl) */
1710         Wfac1 = beta;
1711         Wfac2 = beta;
1712     }
1713     else
1714     {
1715         /* we're using dhdl, so delta_lambda needs to be a
1716            multiplication factor.  */
1717         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1718         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1719                                                   ca->native_lambda);
1720         if (cb->native_lambda->lc->N > 1)
1721         {
1722             gmx_fatal(FARGS,
1723                       "Can't (yet) do multi-component dhdl interpolation");
1724         }
1725
1726         Wfac1 =  beta*delta_lambda;
1727         Wfac2 = -beta*delta_lambda;
1728     }
1729
1730     if (beta < 1)
1731     {
1732         /* We print the output both in kT and kJ/mol.
1733          * Here we determine DG in kT, so when beta < 1
1734          * the precision has to be increased.
1735          */
1736         tol *= beta;
1737     }
1738
1739     /* Calculate minimum and maximum work to give an initial estimate of
1740      * delta G  as their average.
1741      */
1742     {
1743         double Wmin1, Wmin2, Wmax1, Wmax2;
1744         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1745         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1746
1747         Wmin = min(Wmin1, Wmin2);
1748         Wmax = max(Wmax1, Wmax2);
1749     }
1750
1751     DG0 = Wmin;
1752     DG2 = Wmax;
1753
1754     if (debug)
1755     {
1756         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1757     }
1758     /* We approximate by bisection: given our initial estimates
1759        we keep checking whether the halfway point is greater or
1760        smaller than what we get out of the BAR averages.
1761
1762        For the comparison we can use twice the tolerance. */
1763     while (DG2 - DG0 > 2*tol)
1764     {
1765         DG1 = 0.5*(DG0 + DG2);
1766
1767         /* calculate the BAR averages */
1768         dDG1 = 0.;
1769
1770         for (i = 0; i < ca->nsamples; i++)
1771         {
1772             samples_t      *s = ca->s[i];
1773             sample_range_t *r = &(ca->r[i]);
1774             if (r->use)
1775             {
1776                 if (s->hist)
1777                 {
1778                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1779                 }
1780                 else
1781                 {
1782                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1783                                          Wfac1, (M-DG1));
1784                 }
1785             }
1786         }
1787         for (i = 0; i < cb->nsamples; i++)
1788         {
1789             samples_t      *s = cb->s[i];
1790             sample_range_t *r = &(cb->r[i]);
1791             if (r->use)
1792             {
1793                 if (s->hist)
1794                 {
1795                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1796                 }
1797                 else
1798                 {
1799                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1800                                          Wfac2, -(M-DG1));
1801                 }
1802             }
1803         }
1804
1805         if (dDG1 < 0)
1806         {
1807             DG0 = DG1;
1808         }
1809         else
1810         {
1811             DG2 = DG1;
1812         }
1813         if (debug)
1814         {
1815             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1816         }
1817     }
1818
1819     return 0.5*(DG0 + DG2);
1820 }
1821
1822 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1823                              double temp, double dg, double *sa, double *sb)
1824 {
1825     int    i, j;
1826     double W_ab = 0.;
1827     double W_ba = 0.;
1828     double kT, beta;
1829     double Wfac1, Wfac2;
1830     double n1, n2;
1831
1832     kT   = BOLTZ*temp;
1833     beta = 1/kT;
1834
1835     /* count the numbers of samples */
1836     n1 = ca->ntot;
1837     n2 = cb->ntot;
1838
1839     /* to ensure the work values are the same as during the delta_G */
1840     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1841     if (ca->foreign_lambda->dhdl < 0)
1842     {
1843         /* this is the case when the delta U were calculated directly
1844            (i.e. we're not scaling dhdl) */
1845         Wfac1 = beta;
1846         Wfac2 = beta;
1847     }
1848     else
1849     {
1850         /* we're using dhdl, so delta_lambda needs to be a
1851            multiplication factor.  */
1852         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1853                                                   ca->native_lambda);
1854         Wfac1 =  beta*delta_lambda;
1855         Wfac2 = -beta*delta_lambda;
1856     }
1857
1858     /* first calculate the average work in both directions */
1859     for (i = 0; i < ca->nsamples; i++)
1860     {
1861         samples_t      *s = ca->s[i];
1862         sample_range_t *r = &(ca->r[i]);
1863         if (r->use)
1864         {
1865             if (!s->hist)
1866             {
1867                 for (j = r->start; j < r->end; j++)
1868                 {
1869                     W_ab += Wfac1*s->du[j];
1870                 }
1871             }
1872             else
1873             {
1874                 /* normalization factor multiplied with bin width and
1875                    number of samples (we normalize through M): */
1876                 double normdx = 1.;
1877                 double dx;
1878                 int    hd = 0; /* histogram direction */
1879                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1880                 {
1881                     hd = 1;
1882                 }
1883                 dx = s->hist->dx[hd];
1884
1885                 for (j = 0; j < s->hist->nbin[0]; j++)
1886                 {
1887                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1888                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1889                     W_ab += pxdx*x;
1890                 }
1891             }
1892         }
1893     }
1894     W_ab /= n1;
1895
1896     for (i = 0; i < cb->nsamples; i++)
1897     {
1898         samples_t      *s = cb->s[i];
1899         sample_range_t *r = &(cb->r[i]);
1900         if (r->use)
1901         {
1902             if (!s->hist)
1903             {
1904                 for (j = r->start; j < r->end; j++)
1905                 {
1906                     W_ba += Wfac1*s->du[j];
1907                 }
1908             }
1909             else
1910             {
1911                 /* normalization factor multiplied with bin width and
1912                    number of samples (we normalize through M): */
1913                 double normdx = 1.;
1914                 double dx;
1915                 int    hd = 0; /* histogram direction */
1916                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1917                 {
1918                     hd = 1;
1919                 }
1920                 dx = s->hist->dx[hd];
1921
1922                 for (j = 0; j < s->hist->nbin[0]; j++)
1923                 {
1924                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1925                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1926                     W_ba += pxdx*x;
1927                 }
1928             }
1929         }
1930     }
1931     W_ba /= n2;
1932
1933     /* then calculate the relative entropies */
1934     *sa = (W_ab - dg);
1935     *sb = (W_ba + dg);
1936 }
1937
1938 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1939                            double temp, double dg, double *stddev)
1940 {
1941     int    i, j;
1942     double M;
1943     double sigmafact = 0.;
1944     double kT, beta;
1945     double Wfac1, Wfac2;
1946     double n1, n2;
1947
1948     kT   = BOLTZ*temp;
1949     beta = 1/kT;
1950
1951     /* count the numbers of samples */
1952     n1 = ca->ntot;
1953     n2 = cb->ntot;
1954
1955     /* to ensure the work values are the same as during the delta_G */
1956     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1957     if (ca->foreign_lambda->dhdl < 0)
1958     {
1959         /* this is the case when the delta U were calculated directly
1960            (i.e. we're not scaling dhdl) */
1961         Wfac1 = beta;
1962         Wfac2 = beta;
1963     }
1964     else
1965     {
1966         /* we're using dhdl, so delta_lambda needs to be a
1967            multiplication factor.  */
1968         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1969                                                   ca->native_lambda);
1970         Wfac1 =  beta*delta_lambda;
1971         Wfac2 = -beta*delta_lambda;
1972     }
1973
1974     M = log(n1/n2);
1975
1976
1977     /* calculate average in both directions */
1978     for (i = 0; i < ca->nsamples; i++)
1979     {
1980         samples_t      *s = ca->s[i];
1981         sample_range_t *r = &(ca->r[i]);
1982         if (r->use)
1983         {
1984             if (!s->hist)
1985             {
1986                 for (j = r->start; j < r->end; j++)
1987                 {
1988                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1989                 }
1990             }
1991             else
1992             {
1993                 /* normalization factor multiplied with bin width and
1994                    number of samples (we normalize through M): */
1995                 double normdx = 1.;
1996                 double dx;
1997                 int    hd = 0; /* histogram direction */
1998                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1999                 {
2000                     hd = 1;
2001                 }
2002                 dx = s->hist->dx[hd];
2003
2004                 for (j = 0; j < s->hist->nbin[0]; j++)
2005                 {
2006                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2007                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2008
2009                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2010                 }
2011             }
2012         }
2013     }
2014     for (i = 0; i < cb->nsamples; i++)
2015     {
2016         samples_t      *s = cb->s[i];
2017         sample_range_t *r = &(cb->r[i]);
2018         if (r->use)
2019         {
2020             if (!s->hist)
2021             {
2022                 for (j = r->start; j < r->end; j++)
2023                 {
2024                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2025                 }
2026             }
2027             else
2028             {
2029                 /* normalization factor multiplied with bin width and
2030                    number of samples (we normalize through M): */
2031                 double normdx = 1.;
2032                 double dx;
2033                 int    hd = 0; /* histogram direction */
2034                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2035                 {
2036                     hd = 1;
2037                 }
2038                 dx = s->hist->dx[hd];
2039
2040                 for (j = 0; j < s->hist->nbin[0]; j++)
2041                 {
2042                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2043                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2044
2045                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2046                 }
2047             }
2048         }
2049     }
2050
2051     sigmafact /= (n1 + n2);
2052
2053
2054     /* Eq. 10 from
2055        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2056     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2057 }
2058
2059
2060
2061 static void calc_bar(barres_t *br, double tol,
2062                      int npee_min, int npee_max, gmx_bool *bEE,
2063                      double *partsum)
2064 {
2065     int      npee, p;
2066     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2067                                                         for calculated quantities */
2068     int      nsample1, nsample2;
2069     double   temp = br->a->temp;
2070     int      i, j;
2071     double   dg_min, dg_max;
2072     gmx_bool have_hist = FALSE;
2073
2074     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2075
2076     br->dg_disc_err      = 0.;
2077     br->dg_histrange_err = 0.;
2078
2079     /* check if there are histograms */
2080     for (i = 0; i < br->a->nsamples; i++)
2081     {
2082         if (br->a->r[i].use && br->a->s[i]->hist)
2083         {
2084             have_hist = TRUE;
2085             break;
2086         }
2087     }
2088     if (!have_hist)
2089     {
2090         for (i = 0; i < br->b->nsamples; i++)
2091         {
2092             if (br->b->r[i].use && br->b->s[i]->hist)
2093             {
2094                 have_hist = TRUE;
2095                 break;
2096             }
2097         }
2098     }
2099
2100     /* calculate histogram-specific errors */
2101     if (have_hist)
2102     {
2103         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2104         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2105
2106         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2107         {
2108             /* the histogram range  error is the biggest of the differences
2109                between the best estimate and the extremes */
2110             br->dg_histrange_err = fabs(dg_max - dg_min);
2111         }
2112         br->dg_disc_err = 0.;
2113         for (i = 0; i < br->a->nsamples; i++)
2114         {
2115             if (br->a->s[i]->hist)
2116             {
2117                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2118             }
2119         }
2120         for (i = 0; i < br->b->nsamples; i++)
2121         {
2122             if (br->b->s[i]->hist)
2123             {
2124                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2125             }
2126         }
2127     }
2128     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2129
2130     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2131
2132     dg_sig2     = 0;
2133     sa_sig2     = 0;
2134     sb_sig2     = 0;
2135     stddev_sig2 = 0;
2136
2137     *bEE = TRUE;
2138     {
2139         sample_coll_t ca, cb;
2140
2141         /* initialize the samples */
2142         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2143                          br->a->temp);
2144         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2145                          br->b->temp);
2146
2147         for (npee = npee_min; npee <= npee_max; npee++)
2148         {
2149             double dgs      = 0;
2150             double dgs2     = 0;
2151             double dsa      = 0;
2152             double dsb      = 0;
2153             double dsa2     = 0;
2154             double dsb2     = 0;
2155             double dstddev  = 0;
2156             double dstddev2 = 0;
2157
2158
2159             for (p = 0; p < npee; p++)
2160             {
2161                 double   dgp;
2162                 double   stddevc;
2163                 double   sac, sbc;
2164                 gmx_bool cac, cbc;
2165
2166                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2167                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2168
2169                 if (!cac || !cbc)
2170                 {
2171                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2172                     *bEE = FALSE;
2173                     if (cac)
2174                     {
2175                         sample_coll_destroy(&ca);
2176                     }
2177                     if (cbc)
2178                     {
2179                         sample_coll_destroy(&cb);
2180                     }
2181                     return;
2182                 }
2183
2184                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2185                 dgs  += dgp;
2186                 dgs2 += dgp*dgp;
2187
2188                 partsum[npee*(npee_max+1)+p] += dgp;
2189
2190                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2191                 dsa  += sac;
2192                 dsa2 += sac*sac;
2193                 dsb  += sbc;
2194                 dsb2 += sbc*sbc;
2195                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2196
2197                 dstddev  += stddevc;
2198                 dstddev2 += stddevc*stddevc;
2199
2200                 sample_coll_destroy(&ca);
2201                 sample_coll_destroy(&cb);
2202             }
2203             dgs     /= npee;
2204             dgs2    /= npee;
2205             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2206
2207             dsa     /= npee;
2208             dsa2    /= npee;
2209             dsb     /= npee;
2210             dsb2    /= npee;
2211             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2212             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2213
2214             dstddev     /= npee;
2215             dstddev2    /= npee;
2216             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2217         }
2218         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2219         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2220         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2221         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2222     }
2223 }
2224
2225
2226 static double bar_err(int nbmin, int nbmax, const double *partsum)
2227 {
2228     int    nb, b;
2229     double svar, s, s2, dg;
2230
2231     svar = 0;
2232     for (nb = nbmin; nb <= nbmax; nb++)
2233     {
2234         s  = 0;
2235         s2 = 0;
2236         for (b = 0; b < nb; b++)
2237         {
2238             dg  = partsum[nb*(nbmax+1)+b];
2239             s  += dg;
2240             s2 += dg*dg;
2241         }
2242         s    /= nb;
2243         s2   /= nb;
2244         svar += (s2 - s*s)/(nb - 1);
2245     }
2246
2247     return sqrt(svar/(nbmax + 1 - nbmin));
2248 }
2249
2250
2251 /* Seek the end of an identifier (consecutive non-spaces), followed by
2252    an optional number of spaces or '='-signs. Returns a pointer to the
2253    first non-space value found after that. Returns NULL if the string
2254    ends before that.
2255  */
2256 static const char *find_value(const char *str)
2257 {
2258     gmx_bool name_end_found = FALSE;
2259
2260     /* if the string is a NULL pointer, return a NULL pointer. */
2261     if (str == NULL)
2262     {
2263         return NULL;
2264     }
2265     while (*str != '\0')
2266     {
2267         /* first find the end of the name */
2268         if (!name_end_found)
2269         {
2270             if (isspace(*str) || (*str == '=') )
2271             {
2272                 name_end_found = TRUE;
2273             }
2274         }
2275         else
2276         {
2277             if (!( isspace(*str) || (*str == '=') ))
2278             {
2279                 return str;
2280             }
2281         }
2282         str++;
2283     }
2284     return NULL;
2285 }
2286
2287
2288
2289 /* read a vector-notation description of a lambda vector */
2290 static gmx_bool read_lambda_compvec(const char                *str,
2291                                     lambda_vec_t              *lv,
2292                                     const lambda_components_t *lc_in,
2293                                     lambda_components_t       *lc_out,
2294                                     const char               **end,
2295                                     const char                *fn)
2296 {
2297     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2298                                           components, or to check them */
2299     gmx_bool    start_reached = FALSE; /* whether the start of component names
2300                                           has been reached */
2301     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2302     int         n             = 0;     /* current component number */
2303     const char *val_start     = NULL;  /* start of the component name, or NULL
2304                                           if not in a value */
2305     char       *strtod_end;
2306     gmx_bool    OK = TRUE;
2307
2308     if (end)
2309     {
2310         *end = str;
2311     }
2312
2313
2314     if (lc_out && lc_out->N == 0)
2315     {
2316         initialize_lc = TRUE;
2317     }
2318
2319     if (lc_in == NULL)
2320     {
2321         lc_in = lc_out;
2322     }
2323
2324     while (1)
2325     {
2326         if (!start_reached)
2327         {
2328             if (isalnum(*str))
2329             {
2330                 vector        = FALSE;
2331                 start_reached = TRUE;
2332                 val_start     = str;
2333             }
2334             else if (*str == '(')
2335             {
2336                 vector        = TRUE;
2337                 start_reached = TRUE;
2338             }
2339             else if (!isspace(*str))
2340             {
2341                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2342             }
2343         }
2344         else
2345         {
2346             if (val_start)
2347             {
2348                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2349                 {
2350                     /* end of value */
2351                     if (lv == NULL)
2352                     {
2353                         if (initialize_lc)
2354                         {
2355                             lambda_components_add(lc_out, val_start,
2356                                                   (str-val_start));
2357                         }
2358                         else
2359                         {
2360                             if (!lambda_components_check(lc_out, n, val_start,
2361                                                          (str-val_start)))
2362                             {
2363                                 return FALSE;
2364                             }
2365                         }
2366                     }
2367                     else
2368                     {
2369                         /* add a vector component to lv */
2370                         lv->val[n] = strtod(val_start, &strtod_end);
2371                         if (val_start == strtod_end)
2372                         {
2373                             gmx_fatal(FARGS,
2374                                       "Error reading lambda vector in %s", fn);
2375                         }
2376                     }
2377                     /* reset for the next identifier */
2378                     val_start = NULL;
2379                     n++;
2380                     if (!vector)
2381                     {
2382                         return OK;
2383                     }
2384                 }
2385             }
2386             else if (isalnum(*str))
2387             {
2388                 val_start = str;
2389             }
2390             if (*str == ')')
2391             {
2392                 str++;
2393                 if (end)
2394                 {
2395                     *end = str;
2396                 }
2397                 if (!vector)
2398                 {
2399                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2400                 }
2401                 else
2402                 {
2403                     if (n == lc_in->N)
2404                     {
2405                         return OK;
2406                     }
2407                     else if (lv == NULL)
2408                     {
2409                         return FALSE;
2410                     }
2411                     else
2412                     {
2413                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2414                                   fn);
2415                         return FALSE;
2416                     }
2417
2418                 }
2419             }
2420         }
2421         if (*str == '\0')
2422         {
2423             break;
2424         }
2425         str++;
2426         if (end)
2427         {
2428             *end = str;
2429         }
2430     }
2431     if (vector)
2432     {
2433         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2434         return FALSE;
2435     }
2436     return OK;
2437 }
2438
2439 /* read and check the component names from a string */
2440 static gmx_bool read_lambda_components(const char          *str,
2441                                        lambda_components_t *lc,
2442                                        const char         **end,
2443                                        const char          *fn)
2444 {
2445     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2446 }
2447
2448 /* read an initialized lambda vector from a string */
2449 static gmx_bool read_lambda_vector(const char   *str,
2450                                    lambda_vec_t *lv,
2451                                    const char  **end,
2452                                    const char   *fn)
2453 {
2454     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2455 }
2456
2457
2458
2459 /* deduce lambda value from legend.
2460     fn = the file name
2461     legend = the legend string
2462     ba = the xvg data
2463     lam = the initialized lambda vector
2464    returns whether to use the data in this set.
2465  */
2466 static gmx_bool legend2lambda(const char   *fn,
2467                               const char   *legend,
2468                               lambda_vec_t *lam)
2469 {
2470     double        lambda = 0;
2471     const char   *ptr    = NULL, *ptr2 = NULL;
2472     gmx_bool      ok     = FALSE;
2473     gmx_bool      bdhdl  = FALSE;
2474     const char   *tostr  = " to ";
2475
2476     if (legend == NULL)
2477     {
2478         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2479     }
2480
2481     /* look for the last 'to': */
2482     ptr2 = legend;
2483     do
2484     {
2485         ptr2 = strstr(ptr2, tostr);
2486         if (ptr2 != NULL)
2487         {
2488             ptr = ptr2;
2489             ptr2++;
2490         }
2491     }
2492     while (ptr2 != NULL && *ptr2 != '\0');
2493
2494     if (ptr)
2495     {
2496         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2497     }
2498     else
2499     {
2500         /* look for the = sign */
2501         ptr = strrchr(legend, '=');
2502         if (!ptr)
2503         {
2504             /* otherwise look for the last space */
2505             ptr = strrchr(legend, ' ');
2506         }
2507     }
2508
2509     if (strstr(legend, "dH"))
2510     {
2511         ok    = TRUE;
2512         bdhdl = TRUE;
2513     }
2514     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2515     {
2516         ok    = TRUE;
2517         bdhdl = FALSE;
2518     }
2519     else /*if (strstr(legend, "pV"))*/
2520     {
2521         return FALSE;
2522     }
2523     if (!ptr)
2524     {
2525         ok = FALSE;
2526     }
2527
2528     if (!ok)
2529     {
2530         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2531     }
2532     if (!bdhdl)
2533     {
2534         ptr = find_value(ptr);
2535         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2536         {
2537             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2538         }
2539     }
2540     else
2541     {
2542         int         dhdl_index;
2543         const char *end;
2544         char        buf[STRLEN];
2545
2546         ptr = strrchr(legend, '=');
2547         end = ptr;
2548         if (ptr)
2549         {
2550             /* there must be a component name */
2551             ptr--;
2552             if (ptr < legend)
2553             {
2554                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2555             }
2556             /* now backtrack to the start of the identifier */
2557             while (isspace(*ptr))
2558             {
2559                 end = ptr;
2560                 ptr--;
2561                 if (ptr < legend)
2562                 {
2563                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2564                 }
2565             }
2566             while (!isspace(*ptr))
2567             {
2568                 ptr--;
2569                 if (ptr < legend)
2570                 {
2571                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2572                 }
2573             }
2574             ptr++;
2575             strncpy(buf, ptr, (end-ptr));
2576             buf[(end-ptr)] = '\0';
2577             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2578             if (dhdl_index < 0)
2579             {
2580                 char buf[STRLEN];
2581                 strncpy(buf, ptr, (end-ptr));
2582                 buf[(end-ptr)] = '\0';
2583                 gmx_fatal(FARGS,
2584                           "Did not find lambda component for '%s' in %s",
2585                           buf, fn);
2586             }
2587         }
2588         else
2589         {
2590             if (lam->lc->N > 1)
2591             {
2592                 gmx_fatal(FARGS,
2593                           "dhdl without component name with >1 lambda component in %s",
2594                           fn);
2595             }
2596             dhdl_index = 0;
2597         }
2598         lam->dhdl = dhdl_index;
2599     }
2600     return TRUE;
2601 }
2602
2603 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2604                                 lambda_components_t *lc)
2605 {
2606     gmx_bool    bFound;
2607     const char *ptr;
2608     char       *end;
2609     double      native_lambda;
2610
2611     bFound = FALSE;
2612
2613     /* first check for a state string */
2614     ptr = strstr(subtitle, "state");
2615     if (ptr)
2616     {
2617         int         index = -1;
2618         const char *val_end;
2619
2620         /* the new 4.6 style lambda vectors */
2621         ptr = find_value(ptr);
2622         if (ptr)
2623         {
2624             index = strtol(ptr, &end, 10);
2625             if (ptr == end)
2626             {
2627                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2628                 return FALSE;
2629             }
2630             ptr = end;
2631         }
2632         else
2633         {
2634             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2635             return FALSE;
2636         }
2637         /* now find the lambda vector component names */
2638         while (*ptr != '(' && !isalnum(*ptr))
2639         {
2640             ptr++;
2641             if (*ptr == '\0')
2642             {
2643                 gmx_fatal(FARGS,
2644                           "Incomplete lambda vector component data in %s", fn);
2645                 return FALSE;
2646             }
2647         }
2648         val_end = ptr;
2649         if (!read_lambda_components(ptr, lc, &val_end, fn))
2650         {
2651             gmx_fatal(FARGS,
2652                       "lambda vector components in %s don't match those previously read",
2653                       fn);
2654         }
2655         ptr = find_value(val_end);
2656         if (!ptr)
2657         {
2658             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2659             return FALSE;
2660         }
2661         lambda_vec_init(&(ba->native_lambda), lc);
2662         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2663         {
2664             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2665         }
2666         ba->native_lambda.index = index;
2667         bFound                  = TRUE;
2668     }
2669     else
2670     {
2671         /* compatibility mode: check for lambda in other ways. */
2672         /* plain text lambda string */
2673         ptr = strstr(subtitle, "lambda");
2674         if (ptr == NULL)
2675         {
2676             /* xmgrace formatted lambda string */
2677             ptr = strstr(subtitle, "\\xl\\f{}");
2678         }
2679         if (ptr == NULL)
2680         {
2681             /* xmgr formatted lambda string */
2682             ptr = strstr(subtitle, "\\8l\\4");
2683         }
2684         if (ptr != NULL)
2685         {
2686             ptr = strstr(ptr, "=");
2687         }
2688         if (ptr != NULL)
2689         {
2690             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2691             /* add the lambda component name as an empty string */
2692             if (lc->N > 0)
2693             {
2694                 if (!lambda_components_check(lc, 0, "", 0))
2695                 {
2696                     gmx_fatal(FARGS,
2697                               "lambda vector components in %s don't match those previously read",
2698                               fn);
2699                 }
2700             }
2701             else
2702             {
2703                 lambda_components_add(lc, "", 0);
2704             }
2705             lambda_vec_init(&(ba->native_lambda), lc);
2706             ba->native_lambda.val[0] = native_lambda;
2707         }
2708     }
2709
2710     return bFound;
2711 }
2712
2713 static void filename2lambda(const char *fn)
2714 {
2715     double        lambda;
2716     const char   *ptr, *digitptr;
2717     char         *endptr;
2718     int           dirsep;
2719     ptr = fn;
2720     /* go to the end of the path string and search backward to find the last
2721        directory in the path which has to contain the value of lambda
2722      */
2723     while (ptr[1] != '\0')
2724     {
2725         ptr++;
2726     }
2727     /* searching backward to find the second directory separator */
2728     dirsep   = 0;
2729     digitptr = NULL;
2730     while (ptr >= fn)
2731     {
2732         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2733         {
2734             if (dirsep == 1)
2735             {
2736                 break;
2737             }
2738             dirsep++;
2739         }
2740         /* save the last position of a digit between the last two
2741            separators = in the last dirname */
2742         if (dirsep > 0 && isdigit(*ptr))
2743         {
2744             digitptr = ptr;
2745         }
2746         ptr--;
2747     }
2748     if (!digitptr)
2749     {
2750         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2751                   " last directory in the path '%s' does not contain a number", fn);
2752     }
2753     if (digitptr[-1] == '-')
2754     {
2755         digitptr--;
2756     }
2757     lambda = strtod(digitptr, &endptr);
2758     if (endptr == digitptr)
2759     {
2760         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2761     }
2762 }
2763
2764 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2765                                   lambda_components_t *lc)
2766 {
2767     int          i;
2768     char        *subtitle, **legend, *ptr;
2769     int          np;
2770     gmx_bool     native_lambda_read = FALSE;
2771     char         buf[STRLEN];
2772     lambda_vec_t lv;
2773
2774     xvg_init(ba);
2775
2776     ba->filename = fn;
2777
2778     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2779     if (!ba->y)
2780     {
2781         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2782     }
2783     /* Reorder the data */
2784     ba->t  = ba->y[0];
2785     for (i = 1; i < ba->nset; i++)
2786     {
2787         ba->y[i-1] = ba->y[i];
2788     }
2789     ba->nset--;
2790
2791     snew(ba->np, ba->nset);
2792     for (i = 0; i < ba->nset; i++)
2793     {
2794         ba->np[i] = np;
2795     }
2796
2797     ba->temp = -1;
2798     if (subtitle != NULL)
2799     {
2800         /* try to extract temperature */
2801         ptr = strstr(subtitle, "T =");
2802         if (ptr != NULL)
2803         {
2804             ptr += 3;
2805             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2806             {
2807                 if (ba->temp <= 0)
2808                 {
2809                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2810                               ba->temp, fn);
2811                 }
2812             }
2813         }
2814     }
2815     if (ba->temp < 0)
2816     {
2817         if (*temp <= 0)
2818         {
2819             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2820         }
2821         ba->temp = *temp;
2822     }
2823
2824     /* Try to deduce lambda from the subtitle */
2825     if (subtitle)
2826     {
2827         if (subtitle2lambda(subtitle, ba, fn, lc))
2828         {
2829             native_lambda_read = TRUE;
2830         }
2831     }
2832     snew(ba->lambda, ba->nset);
2833     if (legend == NULL)
2834     {
2835         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2836         if (ba->nset == 1)
2837         {
2838             if (!native_lambda_read)
2839             {
2840                 /* Deduce lambda from the file name */
2841                 filename2lambda(fn);
2842                 native_lambda_read = TRUE;
2843             }
2844             ba->lambda[0] = ba->native_lambda;
2845         }
2846         else
2847         {
2848             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2849         }
2850     }
2851     else
2852     {
2853         for (i = 0; i < ba->nset; )
2854         {
2855             gmx_bool use = FALSE;
2856             /* Read lambda from the legend */
2857             lambda_vec_init( &(ba->lambda[i]), lc );
2858             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2859             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2860             if (use)
2861             {
2862                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2863                 i++;
2864             }
2865             else
2866             {
2867                 int j;
2868                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2869                 for (j = i+1; j < ba->nset; j++)
2870                 {
2871                     ba->y[j-1]  = ba->y[j];
2872                     legend[j-1] = legend[j];
2873                 }
2874                 ba->nset--;
2875             }
2876         }
2877     }
2878
2879     if (!native_lambda_read)
2880     {
2881         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2882     }
2883
2884     if (legend != NULL)
2885     {
2886         for (i = 0; i < ba->nset-1; i++)
2887         {
2888             sfree(legend[i]);
2889         }
2890         sfree(legend);
2891     }
2892 }
2893
2894 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2895 {
2896     xvg_t     *barsim;
2897     samples_t *s;
2898     int        i;
2899     double    *lambda;
2900
2901     snew(barsim, 1);
2902
2903     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2904
2905     if (barsim->nset < 1)
2906     {
2907         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2908     }
2909
2910     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2911     {
2912         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2913     }
2914     *temp = barsim->temp;
2915
2916     /* now create a series of samples_t */
2917     snew(s, barsim->nset);
2918     for (i = 0; i < barsim->nset; i++)
2919     {
2920         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2921                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2922                                                    &(barsim->lambda[i])),
2923                      fn);
2924         s[i].du  = barsim->y[i];
2925         s[i].ndu = barsim->np[i];
2926         s[i].t   = barsim->t;
2927
2928         lambda_data_list_insert_sample(sd->lb, s+i);
2929     }
2930     {
2931         char buf[STRLEN];
2932
2933         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2934         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2935                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2936         for (i = 0; i < barsim->nset; i++)
2937         {
2938             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2939             printf("        %s (%d pts)\n", buf, s[i].ndu);
2940         }
2941     }
2942     printf("\n\n");
2943 }
2944
2945 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2946                                  double start_time, double delta_time,
2947                                  lambda_vec_t *native_lambda, double temp,
2948                                  double *last_t, const char *filename)
2949 {
2950     int           i, j;
2951     gmx_bool      allocated;
2952     double        old_foreign_lambda;
2953     lambda_vec_t *foreign_lambda;
2954     int           type;
2955     samples_t    *s; /* convenience pointer */
2956     int           startj;
2957
2958     /* check the block types etc. */
2959     if ( (blk->nsub < 3) ||
2960          (blk->sub[0].type != xdr_datatype_int) ||
2961          (blk->sub[1].type != xdr_datatype_double) ||
2962          (
2963              (blk->sub[2].type != xdr_datatype_float) &&
2964              (blk->sub[2].type != xdr_datatype_double)
2965          ) ||
2966          (blk->sub[0].nr < 1) ||
2967          (blk->sub[1].nr < 1) )
2968     {
2969         gmx_fatal(FARGS,
2970                   "Unexpected/corrupted block data in file %s around time %f.",
2971                   filename, start_time);
2972     }
2973
2974     snew(foreign_lambda, 1);
2975     lambda_vec_init(foreign_lambda, native_lambda->lc);
2976     lambda_vec_copy(foreign_lambda, native_lambda);
2977     type = blk->sub[0].ival[0];
2978     if (type == dhbtDH)
2979     {
2980         for (i = 0; i < native_lambda->lc->N; i++)
2981         {
2982             foreign_lambda->val[i] = blk->sub[1].dval[i];
2983         }
2984     }
2985     else
2986     {
2987         if (blk->sub[0].nr > 1)
2988         {
2989             foreign_lambda->dhdl = blk->sub[0].ival[1];
2990         }
2991         else
2992         {
2993             foreign_lambda->dhdl = 0;
2994         }
2995     }
2996
2997     if (!*smp)
2998     {
2999         /* initialize the samples structure if it's empty. */
3000         snew(*smp, 1);
3001         samples_init(*smp, native_lambda, foreign_lambda, temp,
3002                      type == dhbtDHDL, filename);
3003         (*smp)->start_time = start_time;
3004         (*smp)->delta_time = delta_time;
3005     }
3006
3007     /* set convenience pointer */
3008     s = *smp;
3009
3010     /* now double check */
3011     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3012     {
3013         char buf[STRLEN], buf2[STRLEN];
3014         lambda_vec_print(foreign_lambda, buf, FALSE);
3015         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3016         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3017         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3018                   filename, start_time);
3019     }
3020
3021     /* make room for the data */
3022     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3023     {
3024         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3025             blk->sub[2].nr*2 : s->ndu_alloc;
3026         srenew(s->du_alloc, s->ndu_alloc);
3027         s->du = s->du_alloc;
3028     }
3029     startj   = s->ndu;
3030     s->ndu  += blk->sub[2].nr;
3031     s->ntot += blk->sub[2].nr;
3032     *ndu     = blk->sub[2].nr;
3033
3034     /* and copy the data*/
3035     for (j = 0; j < blk->sub[2].nr; j++)
3036     {
3037         if (blk->sub[2].type == xdr_datatype_float)
3038         {
3039             s->du[startj+j] = blk->sub[2].fval[j];
3040         }
3041         else
3042         {
3043             s->du[startj+j] = blk->sub[2].dval[j];
3044         }
3045     }
3046     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3047     {
3048         *last_t = start_time + blk->sub[2].nr*delta_time;
3049     }
3050 }
3051
3052 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3053                                       double start_time, double delta_time,
3054                                       lambda_vec_t *native_lambda, double temp,
3055                                       double *last_t, const char *filename)
3056 {
3057     int           i, j;
3058     samples_t    *s;
3059     int           nhist;
3060     double        old_foreign_lambda;
3061     lambda_vec_t *foreign_lambda;
3062     int           type;
3063     int           nbins[2];
3064
3065     /* check the block types etc. */
3066     if ( (blk->nsub < 2) ||
3067          (blk->sub[0].type != xdr_datatype_double) ||
3068          (blk->sub[1].type != xdr_datatype_int64) ||
3069          (blk->sub[0].nr < 2)  ||
3070          (blk->sub[1].nr < 2) )
3071     {
3072         gmx_fatal(FARGS,
3073                   "Unexpected/corrupted block data in file %s around time %f",
3074                   filename, start_time);
3075     }
3076
3077     nhist = blk->nsub-2;
3078     if (nhist == 0)
3079     {
3080         return NULL;
3081     }
3082     if (nhist > 2)
3083     {
3084         gmx_fatal(FARGS,
3085                   "Unexpected/corrupted block data in file %s around time %f",
3086                   filename, start_time);
3087     }
3088
3089     snew(s, 1);
3090     *nsamples = 1;
3091
3092     snew(foreign_lambda, 1);
3093     lambda_vec_init(foreign_lambda, native_lambda->lc);
3094     lambda_vec_copy(foreign_lambda, native_lambda);
3095     type = (int)(blk->sub[1].lval[1]);
3096     if (type == dhbtDH)
3097     {
3098         double old_foreign_lambda;
3099
3100         old_foreign_lambda = blk->sub[0].dval[0];
3101         if (old_foreign_lambda >= 0)
3102         {
3103             foreign_lambda->val[0] = old_foreign_lambda;
3104             if (foreign_lambda->lc->N > 1)
3105             {
3106                 gmx_fatal(FARGS,
3107                           "Single-component lambda in multi-component file %s",
3108                           filename);
3109             }
3110         }
3111         else
3112         {
3113             for (i = 0; i < native_lambda->lc->N; i++)
3114             {
3115                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3116             }
3117         }
3118     }
3119     else
3120     {
3121         if (foreign_lambda->lc->N > 1)
3122         {
3123             if (blk->sub[1].nr < 3 + nhist)
3124             {
3125                 gmx_fatal(FARGS,
3126                           "Missing derivative coord in multi-component file %s",
3127                           filename);
3128             }
3129             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3130         }
3131         else
3132         {
3133             foreign_lambda->dhdl = 0;
3134         }
3135     }
3136
3137     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3138                  filename);
3139     snew(s->hist, 1);
3140
3141     for (i = 0; i < nhist; i++)
3142     {
3143         nbins[i] = blk->sub[i+2].nr;
3144     }
3145
3146     hist_init(s->hist, nhist, nbins);
3147
3148     for (i = 0; i < nhist; i++)
3149     {
3150         s->hist->x0[i] = blk->sub[1].lval[2+i];
3151         s->hist->dx[i] = blk->sub[0].dval[1];
3152         if (i == 1)
3153         {
3154             s->hist->dx[i] = -s->hist->dx[i];
3155         }
3156     }
3157
3158     s->hist->start_time = start_time;
3159     s->hist->delta_time = delta_time;
3160     s->start_time       = start_time;
3161     s->delta_time       = delta_time;
3162
3163     for (i = 0; i < nhist; i++)
3164     {
3165         int             nbin;
3166         gmx_int64_t     sum = 0;
3167
3168         for (j = 0; j < s->hist->nbin[i]; j++)
3169         {
3170             int binv = (int)(blk->sub[i+2].ival[j]);
3171
3172             s->hist->bin[i][j] = binv;
3173             sum               += binv;
3174
3175         }
3176         if (i == 0)
3177         {
3178             s->ntot      = sum;
3179             s->hist->sum = sum;
3180         }
3181         else
3182         {
3183             if (s->ntot != sum)
3184             {
3185                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3186                           filename);
3187             }
3188         }
3189     }
3190
3191     if (start_time + s->hist->sum*delta_time > *last_t)
3192     {
3193         *last_t = start_time + s->hist->sum*delta_time;
3194     }
3195     return s;
3196 }
3197
3198
3199 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3200 {
3201     int            i, j;
3202     ener_file_t    fp;
3203     t_enxframe    *fr;
3204     int            nre;
3205     gmx_enxnm_t   *enm           = NULL;
3206     double         first_t       = -1;
3207     double         last_t        = -1;
3208     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3209     int           *nhists        = NULL; /* array to keep count & print at end */
3210     int           *npts          = NULL; /* array to keep count & print at end */
3211     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3212     lambda_vec_t  *native_lambda;
3213     double         end_time;             /* the end time of the last batch of samples */
3214     int            nsamples = 0;
3215     lambda_vec_t   start_lambda;
3216
3217     fp = open_enx(fn, "r");
3218     do_enxnms(fp, &nre, &enm);
3219     snew(fr, 1);
3220
3221     snew(native_lambda, 1);
3222     start_lambda.lc = NULL;
3223
3224     while (do_enx(fp, fr))
3225     {
3226         /* count the data blocks */
3227         int    nblocks_raw  = 0;
3228         int    nblocks_hist = 0;
3229         int    nlam         = 0;
3230         int    k;
3231         /* DHCOLL block information: */
3232         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3233         double rtemp      = 0;
3234
3235         /* count the blocks and handle collection information: */
3236         for (i = 0; i < fr->nblock; i++)
3237         {
3238             if (fr->block[i].id == enxDHHIST)
3239             {
3240                 nblocks_hist++;
3241             }
3242             if (fr->block[i].id == enxDH)
3243             {
3244                 nblocks_raw++;
3245             }
3246             if (fr->block[i].id == enxDHCOLL)
3247             {
3248                 nlam++;
3249                 if ( (fr->block[i].nsub < 1) ||
3250                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3251                      (fr->block[i].sub[0].nr < 5))
3252                 {
3253                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3254                 }
3255
3256                 /* read the data from the DHCOLL block */
3257                 rtemp            =        fr->block[i].sub[0].dval[0];
3258                 start_time       =   fr->block[i].sub[0].dval[1];
3259                 delta_time       =   fr->block[i].sub[0].dval[2];
3260                 old_start_lambda = fr->block[i].sub[0].dval[3];
3261                 delta_lambda     = fr->block[i].sub[0].dval[4];
3262
3263                 if (delta_lambda > 0)
3264                 {
3265                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3266                 }
3267                 if ( ( *temp != rtemp) && (*temp > 0) )
3268                 {
3269                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3270                 }
3271                 *temp = rtemp;
3272
3273                 if (old_start_lambda >= 0)
3274                 {
3275                     if (sd->lc.N > 0)
3276                     {
3277                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3278                         {
3279                             gmx_fatal(FARGS,
3280                                       "lambda vector components in %s don't match those previously read",
3281                                       fn);
3282                         }
3283                     }
3284                     else
3285                     {
3286                         lambda_components_add(&(sd->lc), "", 0);
3287                     }
3288                     if (!start_lambda.lc)
3289                     {
3290                         lambda_vec_init(&start_lambda, &(sd->lc));
3291                     }
3292                     start_lambda.val[0] = old_start_lambda;
3293                 }
3294                 else
3295                 {
3296                     /* read lambda vector */
3297                     int      n_lambda_vec;
3298                     gmx_bool check = (sd->lc.N > 0);
3299                     if (fr->block[i].nsub < 2)
3300                     {
3301                         gmx_fatal(FARGS,
3302                                   "No lambda vector, but start_lambda=%f\n",
3303                                   old_start_lambda);
3304                     }
3305                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3306                     for (j = 0; j < n_lambda_vec; j++)
3307                     {
3308                         const char *name =
3309                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3310                         if (check)
3311                         {
3312                             /* check the components */
3313                             lambda_components_check(&(sd->lc), j, name,
3314                                                     strlen(name));
3315                         }
3316                         else
3317                         {
3318                             lambda_components_add(&(sd->lc), name,
3319                                                   strlen(name));
3320                         }
3321                     }
3322                     lambda_vec_init(&start_lambda, &(sd->lc));
3323                     start_lambda.index = fr->block[i].sub[1].ival[0];
3324                     for (j = 0; j < n_lambda_vec; j++)
3325                     {
3326                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3327                     }
3328                 }
3329                 if (first_t < 0)
3330                 {
3331                     first_t = start_time;
3332                 }
3333             }
3334         }
3335
3336         if (nlam != 1)
3337         {
3338             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3339         }
3340         if (nblocks_raw > 0 && nblocks_hist > 0)
3341         {
3342             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3343         }
3344
3345         if (nsamples > 0)
3346         {
3347             /* check the native lambda */
3348             if (!lambda_vec_same(&start_lambda, native_lambda) )
3349             {
3350                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3351                           fn, native_lambda, start_lambda, start_time);
3352             }
3353             /* check the number of samples against the previous number */
3354             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3355             {
3356                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3357                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3358             }
3359             /* check whether last iterations's end time matches with
3360                the currrent start time */
3361             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3362             {
3363                 /* it didn't. We need to store our samples and reallocate */
3364                 for (i = 0; i < nsamples; i++)
3365                 {
3366                     if (samples_rawdh[i])
3367                     {
3368                         /* insert it into the existing list */
3369                         lambda_data_list_insert_sample(sd->lb,
3370                                                        samples_rawdh[i]);
3371                         /* and make sure we'll allocate a new one this time
3372                            around */
3373                         samples_rawdh[i] = NULL;
3374                     }
3375                 }
3376             }
3377         }
3378         else
3379         {
3380             /* this is the first round; allocate the associated data
3381                structures */
3382             /*native_lambda=start_lambda;*/
3383             lambda_vec_init(native_lambda, &(sd->lc));
3384             lambda_vec_copy(native_lambda, &start_lambda);
3385             nsamples = nblocks_raw+nblocks_hist;
3386             snew(nhists, nsamples);
3387             snew(npts, nsamples);
3388             snew(lambdas, nsamples);
3389             snew(samples_rawdh, nsamples);
3390             for (i = 0; i < nsamples; i++)
3391             {
3392                 nhists[i]        = 0;
3393                 npts[i]          = 0;
3394                 lambdas[i]       = NULL;
3395                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3396                                             ones contain values */
3397             }
3398         }
3399
3400         /* and read them */
3401         k = 0; /* counter for the lambdas, etc. arrays */
3402         for (i = 0; i < fr->nblock; i++)
3403         {
3404             if (fr->block[i].id == enxDH)
3405             {
3406                 int type = (fr->block[i].sub[0].ival[0]);
3407                 if (type == dhbtDH || type == dhbtDHDL)
3408                 {
3409                     int ndu;
3410                     read_edr_rawdh_block(&(samples_rawdh[k]),
3411                                          &ndu,
3412                                          &(fr->block[i]),
3413                                          start_time, delta_time,
3414                                          native_lambda, rtemp,
3415                                          &last_t, fn);
3416                     npts[k] += ndu;
3417                     if (samples_rawdh[k])
3418                     {
3419                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3420                     }
3421                     k++;
3422                 }
3423             }
3424             else if (fr->block[i].id == enxDHHIST)
3425             {
3426                 int type = (int)(fr->block[i].sub[1].lval[1]);
3427                 if (type == dhbtDH || type == dhbtDHDL)
3428                 {
3429                     int        j;
3430                     int        nb = 0;
3431                     samples_t *s; /* this is where the data will go */
3432                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3433                                             start_time, delta_time,
3434                                             native_lambda, rtemp,
3435                                             &last_t, fn);
3436                     nhists[k] += nb;
3437                     if (nb > 0)
3438                     {
3439                         lambdas[k] = s->foreign_lambda;
3440                     }
3441                     k++;
3442                     /* and insert the new sample immediately */
3443                     for (j = 0; j < nb; j++)
3444                     {
3445                         lambda_data_list_insert_sample(sd->lb, s+j);
3446                     }
3447                 }
3448             }
3449         }
3450     }
3451     /* Now store all our extant sample collections */
3452     for (i = 0; i < nsamples; i++)
3453     {
3454         if (samples_rawdh[i])
3455         {
3456             /* insert it into the existing list */
3457             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3458         }
3459     }
3460
3461
3462     {
3463         char buf[STRLEN];
3464         printf("\n");
3465         lambda_vec_print(native_lambda, buf, FALSE);
3466         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3467                fn, first_t, last_t, buf);
3468         for (i = 0; i < nsamples; i++)
3469         {
3470             if (lambdas[i])
3471             {
3472                 lambda_vec_print(lambdas[i], buf, TRUE);
3473                 if (nhists[i] > 0)
3474                 {
3475                     printf("        %s (%d hists)\n", buf, nhists[i]);
3476                 }
3477                 else
3478                 {
3479                     printf("        %s (%d pts)\n", buf, npts[i]);
3480                 }
3481             }
3482         }
3483     }
3484     printf("\n\n");
3485     sfree(npts);
3486     sfree(nhists);
3487     sfree(lambdas);
3488 }
3489
3490
3491 int gmx_bar(int argc, char *argv[])
3492 {
3493     static const char *desc[] = {
3494         "[THISMODULE] calculates free energy difference estimates through ",
3495         "Bennett's acceptance ratio method (BAR). It also automatically",
3496         "adds series of individual free energies obtained with BAR into",
3497         "a combined free energy estimate.[PAR]",
3498
3499         "Every individual BAR free energy difference relies on two ",
3500         "simulations at different states: say state A and state B, as",
3501         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3502         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3503         "average of the Hamiltonian difference of state B given state A and",
3504         "vice versa.",
3505         "The energy differences to the other state must be calculated",
3506         "explicitly during the simulation. This can be done with",
3507         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3508
3509         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3510         "Two types of input files are supported:[BR]",
3511         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3512         "The files should have columns ",
3513         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3514         "The [GRK]lambda[grk] values are inferred ",
3515         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3516         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3517         "legends of Delta H",
3518         "[BR]",
3519         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3520         "[TT]-extp[tt] option for these files, it is assumed",
3521         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3522         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3523         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3524         "subtitle (if present), otherwise from a number in the subdirectory ",
3525         "in the file name.[PAR]",
3526
3527         "The [GRK]lambda[grk] of the simulation is parsed from ",
3528         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3529         "foreign [GRK]lambda[grk] values from the legend containing the ",
3530         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3531         "the legend line containing 'T ='.[PAR]",
3532
3533         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3534         "These can contain either lists of energy differences (see the ",
3535         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3536         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3537         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3538         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3539
3540         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3541         "the energy difference can also be extrapolated from the ",
3542         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3543         "option, which assumes that the system's Hamiltonian depends linearly",
3544         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3545
3546         "The free energy estimates are determined using BAR with bisection, ",
3547         "with the precision of the output set with [TT]-prec[tt]. ",
3548         "An error estimate taking into account time correlations ",
3549         "is made by splitting the data into blocks and determining ",
3550         "the free energy differences over those blocks and assuming ",
3551         "the blocks are independent. ",
3552         "The final error estimate is determined from the average variance ",
3553         "over 5 blocks. A range of block numbers for error estimation can ",
3554         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3555
3556         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3557         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3558         "samples. [BB]Note[bb] that when aggregating energy ",
3559         "differences/derivatives with different sampling intervals, this is ",
3560         "almost certainly not correct. Usually subsequent energies are ",
3561         "correlated and different time intervals mean different degrees ",
3562         "of correlation between samples.[PAR]",
3563
3564         "The results are split in two parts: the last part contains the final ",
3565         "results in kJ/mol, together with the error estimate for each part ",
3566         "and the total. The first part contains detailed free energy ",
3567         "difference estimates and phase space overlap measures in units of ",
3568         "kT (together with their computed error estimate). The printed ",
3569         "values are:[BR]",
3570         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3571         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3572         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3573         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3574         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3575         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3576
3577         "The relative entropy of both states in each other's ensemble can be ",
3578         "interpreted as a measure of phase space overlap: ",
3579         "the relative entropy s_A of the work samples of lambda_B in the ",
3580         "ensemble of lambda_A (and vice versa for s_B), is a ",
3581         "measure of the 'distance' between Boltzmann distributions of ",
3582         "the two states, that goes to zero for identical distributions. See ",
3583         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3584         "[PAR]",
3585         "The estimate of the expected per-sample standard deviation, as given ",
3586         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3587         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3588         "of the actual statistical error, because it assumes independent samples).[PAR]",
3589
3590         "To get a visual estimate of the phase space overlap, use the ",
3591         "[TT]-oh[tt] option to write series of histograms, together with the ",
3592         "[TT]-nbin[tt] option.[PAR]"
3593     };
3594     static real        begin    = 0, end = -1, temp = -1;
3595     int                nd       = 2, nbmin = 5, nbmax = 5;
3596     int                nbin     = 100;
3597     gmx_bool           use_dhdl = FALSE;
3598     gmx_bool           calc_s, calc_v;
3599     t_pargs            pa[] = {
3600         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3601         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3602         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3603         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3604         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3605         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3606         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3607         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3608     };
3609
3610     t_filenm           fnm[] = {
3611         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3612         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3613         { efXVG, "-o",  "bar",    ffOPTWR },
3614         { efXVG, "-oi", "barint", ffOPTWR },
3615         { efXVG, "-oh", "histogram", ffOPTWR }
3616     };
3617 #define NFILE asize(fnm)
3618
3619     int          f, i, j;
3620     int          nf = 0;    /* file counter */
3621     int          nbs;
3622     int          nfile_tot; /* total number of input files */
3623     int          nxvgfile = 0;
3624     int          nedrfile = 0;
3625     char       **fxvgnms;
3626     char       **fedrnms;
3627     sim_data_t   sim_data; /* the simulation data */
3628     barres_t    *results;  /* the results */
3629     int          nresults; /* number of results in results array */
3630
3631     double      *partsum;
3632     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3633     FILE        *fpb, *fpi;
3634     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3635     char         buf[STRLEN], buf2[STRLEN];
3636     char         ktformat[STRLEN], sktformat[STRLEN];
3637     char         kteformat[STRLEN], skteformat[STRLEN];
3638     output_env_t oenv;
3639     double       kT, beta;
3640     gmx_bool     result_OK = TRUE, bEE = TRUE;
3641
3642     gmx_bool     disc_err          = FALSE;
3643     double       sum_disc_err      = 0.; /* discretization error */
3644     gmx_bool     histrange_err     = FALSE;
3645     double       sum_histrange_err = 0.; /* histogram range error */
3646     double       stat_err          = 0.; /* statistical error */
3647
3648     if (!parse_common_args(&argc, argv,
3649                            PCA_CAN_VIEW,
3650                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3651     {
3652         return 0;
3653     }
3654
3655     if (opt2bSet("-f", NFILE, fnm))
3656     {
3657         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3658     }
3659     if (opt2bSet("-g", NFILE, fnm))
3660     {
3661         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3662     }
3663
3664     sim_data_init(&sim_data);
3665 #if 0
3666     /* make linked list */
3667     lb = &lambda_head;
3668     lambda_data_init(lb, 0, 0);
3669     lb->next = lb;
3670     lb->prev = lb;
3671 #endif
3672
3673
3674     nfile_tot = nxvgfile + nedrfile;
3675
3676     if (nfile_tot == 0)
3677     {
3678         gmx_fatal(FARGS, "No input files!");
3679     }
3680
3681     if (nd < 0)
3682     {
3683         gmx_fatal(FARGS, "Can not have negative number of digits");
3684     }
3685     prec = pow(10, -nd);
3686
3687     snew(partsum, (nbmax+1)*(nbmax+1));
3688     nf = 0;
3689
3690     /* read in all files. First xvg files */
3691     for (f = 0; f < nxvgfile; f++)
3692     {
3693         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3694         nf++;
3695     }
3696     /* then .edr files */
3697     for (f = 0; f < nedrfile; f++)
3698     {
3699         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3700         nf++;
3701     }
3702
3703     /* fix the times to allow for equilibration */
3704     sim_data_impose_times(&sim_data, begin, end);
3705
3706     if (opt2bSet("-oh", NFILE, fnm))
3707     {
3708         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3709     }
3710
3711     /* assemble the output structures from the lambdas */
3712     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3713
3714     sum_disc_err = barres_list_max_disc_err(results, nresults);
3715
3716     if (nresults == 0)
3717     {
3718         printf("\nNo results to calculate.\n");
3719         return 0;
3720     }
3721
3722     if (sum_disc_err > prec)
3723     {
3724         prec = sum_disc_err;
3725         nd   = ceil(-log10(prec));
3726         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3727     }
3728
3729
3730     /*sprintf(lamformat,"%%6.3f");*/
3731     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3732     /* the format strings of the results in kT */
3733     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3734     sprintf( sktformat, "%%%ds", 6+nd);
3735     /* the format strings of the errors in kT */
3736     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3737     sprintf( skteformat, "%%%ds", 4+nd);
3738     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3739     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3740
3741
3742
3743     fpb = NULL;
3744     if (opt2bSet("-o", NFILE, fnm))
3745     {
3746         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3747         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3748                             "\\lambda", buf, exvggtXYDY, oenv);
3749     }
3750
3751     fpi = NULL;
3752     if (opt2bSet("-oi", NFILE, fnm))
3753     {
3754         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3755         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3756                        "\\lambda", buf, oenv);
3757     }
3758
3759
3760
3761     if (nbmin > nbmax)
3762     {
3763         nbmin = nbmax;
3764     }
3765
3766     /* first calculate results */
3767     bEE      = TRUE;
3768     disc_err = FALSE;
3769     for (f = 0; f < nresults; f++)
3770     {
3771         /* Determine the free energy difference with a factor of 10
3772          * more accuracy than requested for printing.
3773          */
3774         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3775                  &bEE, partsum);
3776
3777         if (results[f].dg_disc_err > prec/10.)
3778         {
3779             disc_err = TRUE;
3780         }
3781         if (results[f].dg_histrange_err > prec/10.)
3782         {
3783             histrange_err = TRUE;
3784         }
3785     }
3786
3787     /* print results in kT */
3788     kT   = BOLTZ*temp;
3789     beta = 1/kT;
3790
3791     printf("\nTemperature: %g K\n", temp);
3792
3793     printf("\nDetailed results in kT (see help for explanation):\n\n");
3794     printf("%6s ", " lam_A");
3795     printf("%6s ", " lam_B");
3796     printf(sktformat,  "DG ");
3797     if (bEE)
3798     {
3799         printf(skteformat, "+/- ");
3800     }
3801     if (disc_err)
3802     {
3803         printf(skteformat, "disc ");
3804     }
3805     if (histrange_err)
3806     {
3807         printf(skteformat, "range ");
3808     }
3809     printf(sktformat,  "s_A ");
3810     if (bEE)
3811     {
3812         printf(skteformat, "+/- " );
3813     }
3814     printf(sktformat,  "s_B ");
3815     if (bEE)
3816     {
3817         printf(skteformat, "+/- " );
3818     }
3819     printf(sktformat,  "stdev ");
3820     if (bEE)
3821     {
3822         printf(skteformat, "+/- ");
3823     }
3824     printf("\n");
3825     for (f = 0; f < nresults; f++)
3826     {
3827         lambda_vec_print_short(results[f].a->native_lambda, buf);
3828         printf("%s ", buf);
3829         lambda_vec_print_short(results[f].b->native_lambda, buf);
3830         printf("%s ", buf);
3831         printf(ktformat,  results[f].dg);
3832         printf(" ");
3833         if (bEE)
3834         {
3835             printf(kteformat, results[f].dg_err);
3836             printf(" ");
3837         }
3838         if (disc_err)
3839         {
3840             printf(kteformat, results[f].dg_disc_err);
3841             printf(" ");
3842         }
3843         if (histrange_err)
3844         {
3845             printf(kteformat, results[f].dg_histrange_err);
3846             printf(" ");
3847         }
3848         printf(ktformat,  results[f].sa);
3849         printf(" ");
3850         if (bEE)
3851         {
3852             printf(kteformat, results[f].sa_err);
3853             printf(" ");
3854         }
3855         printf(ktformat,  results[f].sb);
3856         printf(" ");
3857         if (bEE)
3858         {
3859             printf(kteformat, results[f].sb_err);
3860             printf(" ");
3861         }
3862         printf(ktformat,  results[f].dg_stddev);
3863         printf(" ");
3864         if (bEE)
3865         {
3866             printf(kteformat, results[f].dg_stddev_err);
3867         }
3868         printf("\n");
3869
3870         /* Check for negative relative entropy with a 95% certainty. */
3871         if (results[f].sa < -2*results[f].sa_err ||
3872             results[f].sb < -2*results[f].sb_err)
3873         {
3874             result_OK = FALSE;
3875         }
3876     }
3877
3878     if (!result_OK)
3879     {
3880         printf("\nWARNING: Some of these results violate the Second Law of "
3881                "Thermodynamics: \n"
3882                "         This is can be the result of severe undersampling, or "
3883                "(more likely)\n"
3884                "         there is something wrong with the simulations.\n");
3885     }
3886
3887
3888     /* final results in kJ/mol */
3889     printf("\n\nFinal results in kJ/mol:\n\n");
3890     dg_tot  = 0;
3891     for (f = 0; f < nresults; f++)
3892     {
3893
3894         if (fpi != NULL)
3895         {
3896             lambda_vec_print_short(results[f].a->native_lambda, buf);
3897             fprintf(fpi, xvg2format, buf, dg_tot);
3898         }
3899
3900
3901         if (fpb != NULL)
3902         {
3903             lambda_vec_print_intermediate(results[f].a->native_lambda,
3904                                           results[f].b->native_lambda,
3905                                           buf);
3906
3907             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3908         }
3909
3910         printf("point ");
3911         lambda_vec_print_short(results[f].a->native_lambda, buf);
3912         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3913         printf("%s - %s", buf, buf2);
3914         printf(",   DG ");
3915
3916         printf(dgformat, results[f].dg*kT);
3917         if (bEE)
3918         {
3919             printf(" +/- ");
3920             printf(dgformat, results[f].dg_err*kT);
3921         }
3922         if (histrange_err)
3923         {
3924             printf(" (max. range err. = ");
3925             printf(dgformat, results[f].dg_histrange_err*kT);
3926             printf(")");
3927             sum_histrange_err += results[f].dg_histrange_err*kT;
3928         }
3929
3930         printf("\n");
3931         dg_tot += results[f].dg;
3932     }
3933     printf("\n");
3934     printf("total ");
3935     lambda_vec_print_short(results[0].a->native_lambda, buf);
3936     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3937     printf("%s - %s", buf, buf2);
3938     printf(",   DG ");
3939
3940     printf(dgformat, dg_tot*kT);
3941     if (bEE)
3942     {
3943         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3944         printf(" +/- ");
3945         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3946     }
3947     printf("\n");
3948     if (disc_err)
3949     {
3950         printf("\nmaximum discretization error = ");
3951         printf(dgformat, sum_disc_err);
3952         if (bEE && stat_err < sum_disc_err)
3953         {
3954             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3955         }
3956     }
3957     if (histrange_err)
3958     {
3959         printf("\nmaximum histogram range error = ");
3960         printf(dgformat, sum_histrange_err);
3961         if (bEE && stat_err < sum_histrange_err)
3962         {
3963             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3964         }
3965
3966     }
3967     printf("\n");
3968
3969
3970     if (fpi != NULL)
3971     {
3972         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3973         fprintf(fpi, xvg2format, buf, dg_tot);
3974         gmx_ffclose(fpi);
3975     }
3976
3977     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3978     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3979
3980     return 0;
3981 }