src/gromacs/gmxana/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <ctype.h>
  38 #include <float.h>
  39 #include <math.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include "gromacs/commandline/pargs.h"
  44 #include "gromacs/fileio/enxio.h"
  45 #include "gromacs/fileio/xvgr.h"
  46 #include "gromacs/gmxana/gmx_ana.h"
  47 #include "gromacs/legacyheaders/macros.h"
  48 #include "gromacs/legacyheaders/mdebin.h"
  49 #include "gromacs/legacyheaders/names.h"
  50 #include "gromacs/legacyheaders/typedefs.h"
  51 #include "gromacs/legacyheaders/viewit.h"
  52 #include "gromacs/math/units.h"
  53 #include "gromacs/math/utilities.h"
  54 #include "gromacs/utility/cstringutil.h"
  55 #include "gromacs/utility/dir_separator.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/smalloc.h"
  58 #include "gromacs/utility/snprintf.h"
  59
  60
  61 /* Structure for the names of lambda vector components */
  62 typedef struct lambda_components_t
  63 {
  64     char **names;  /* Array of strings with names for the lambda vector
  65                       components */
  66     int    N;      /* The number of components */
  67     int    Nalloc; /* The number of allocated components */
  68 } lambda_components_t;
  69
  70 /* Structure for a lambda vector or a dhdl derivative direction */
  71 typedef struct lambda_vec_t
  72 {
  73     double                    *val;   /* The lambda vector component values. Only valid if
  74                                          dhdl == -1 */
  75     int                        dhdl;  /* The coordinate index for the derivative described by this
  76                                          structure, or -1 */
  77     const lambda_components_t *lc;    /* the associated lambda_components
  78                                          structure */
  79     int                        index; /* The state number (init-lambda-state) of this lambda
  80                                          vector, if known. If not, it is set to -1 */
  81 } lambda_vec_t;
  82
  83 /* the dhdl.xvg data from a simulation */
  84 typedef struct xvg_t
  85 {
  86     const char   *filename;
  87     int           ftp;           /* file type */
  88     int           nset;          /* number of lambdas, including dhdl */
  89     int          *np;            /* number of data points (du or hists) per lambda */
  90     int           np_alloc;      /* number of points (du or hists) allocated */
  91     double        temp;          /* temperature */
  92     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  93     double       *t;             /* the times (of second index for y) */
  94     double      **y;             /* the dU values. y[0] holds the derivative, while
  95                                     further ones contain the energy differences between
  96                                     the native lambda and the 'foreign' lambdas. */
  97     lambda_vec_t  native_lambda; /* the native lambda */
  98
  99     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 100 } xvg_t;
 101
 102
 103 typedef struct hist_t
 104 {
 105     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 106     double          dx[2];                  /* the histogram spacing. The reverse
 107                                                dx is the negative of the forward dx.*/
 108     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 109                                                    point(s) as int */
 110
 111     int             nbin[2];                /* the (forward+reverse) number of bins */
 112     gmx_int64_t     sum;                    /* the total number of counts. Must be
 113                                                    the same for forward + reverse.  */
 114     int             nhist;                  /* number of hist datas (forward or reverse) */
 115
 116     double          start_time, delta_time; /* start time, end time of histogram */
 117 } hist_t;
 118
 119
 120 /* an aggregate of samples for partial free energy calculation */
 121 typedef struct samples_t
 122 {
 123     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 124     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 125     double        temp;           /* the temperature */
 126     gmx_bool      derivative;     /* whether this sample is a derivative */
 127
 128     /* The samples come either as either delta U lists: */
 129     int     ndu;                    /* the number of delta U samples */
 130     double *du;                     /* the delta u's */
 131     double *t;                      /* the times associated with those samples, or: */
 132     double  start_time, delta_time; /*start time and delta time for linear time*/
 133
 134     /* or as histograms: */
 135     hist_t *hist; /* a histogram */
 136
 137     /* allocation data: (not NULL for data 'owned' by this struct) */
 138     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 139     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 140     hist_t         *hist_alloc;          /* allocated hist */
 141
 142     gmx_int64_t     ntot;                /* total number of samples */
 143     const char     *filename;            /* the file name this sample comes from */
 144 } samples_t;
 145
 146 /* a sample range (start to end for du-style data, or boolean
 147     for both du-style data and histograms */
 148 typedef struct sample_range_t
 149 {
 150     int        start, end; /* start and end index for du style data */
 151     gmx_bool   use;        /* whether to use this sample */
 152
 153     samples_t *s;          /* the samples this range belongs to */
 154 } sample_range_t;
 155
 156
 157 /* a collection of samples for a partial free energy calculation
 158     (i.e. the collection of samples from one native lambda to one
 159     foreign lambda) */
 160 typedef struct sample_coll_t
 161 {
 162     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 163                                           in the histogram */
 164     lambda_vec_t   *foreign_lambda;    /* collection */
 165     double          temp;              /* the temperature */
 166
 167     int             nsamples;          /* the number of samples */
 168     samples_t     **s;                 /* the samples themselves */
 169     sample_range_t *r;                 /* the sample ranges */
 170     int             nsamples_alloc;    /* number of allocated samples */
 171
 172     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 173                                               this collection */
 174
 175     struct sample_coll_t *next, *prev; /* next and previous in the list */
 176 } sample_coll_t;
 177
 178 /* all the samples associated with a lambda point */
 179 typedef struct lambda_data_t
 180 {
 181     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 182     double                temp;        /* temperature */
 183
 184     sample_coll_t        *sc;          /* the samples */
 185
 186     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 187
 188     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 189 } lambda_data_t;
 190
 191 /* Top-level data structure of simulation data */
 192 typedef struct sim_data_t
 193 {
 194     lambda_data_t      *lb;      /* a lambda data linked list */
 195     lambda_data_t       lb_head; /* The head element of the linked list */
 196
 197     lambda_components_t lc;      /* the allowed components of the lambda
 198                                     vectors */
 199 } sim_data_t;
 200
 201 /* Top-level data structure with calculated values. */
 202 typedef struct {
 203     sample_coll_t *a, *b;            /* the simulation data */
 204
 205     double         dg;               /* the free energy difference */
 206     double         dg_err;           /* the free energy difference */
 207
 208     double         dg_disc_err;      /* discretization error */
 209     double         dg_histrange_err; /* histogram range error */
 210
 211     double         sa;               /* relative entropy of b in state a */
 212     double         sa_err;           /* error in sa */
 213     double         sb;               /* relative entropy of a in state b */
 214     double         sb_err;           /* error in sb */
 215
 216     double         dg_stddev;        /* expected dg stddev per sample */
 217     double         dg_stddev_err;    /* error in dg_stddev */
 218 } barres_t;
 219
 220
 221 /* Initialize a lambda_components structure */
 222 static void lambda_components_init(lambda_components_t *lc)
 223 {
 224     lc->N      = 0;
 225     lc->Nalloc = 2;
 226     snew(lc->names, lc->Nalloc);
 227 }
 228
 229 /* Add a component to a lambda_components structure */
 230 static void lambda_components_add(lambda_components_t *lc,
 231                                   const char *name, size_t name_length)
 232 {
 233     while (lc->N + 1 > lc->Nalloc)
 234     {
 235         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 236         srenew(lc->names, lc->Nalloc);
 237     }
 238     snew(lc->names[lc->N], name_length+1);
 239     strncpy(lc->names[lc->N], name, name_length);
 240     lc->N++;
 241 }
 242
 243 /* check whether a component with index 'index' matches the given name, or
 244    is also NULL. Returns TRUE if this is the case.
 245    the string name does not need to end */
 246 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 247                                         int                        index,
 248                                         const char                *name,
 249                                         size_t                     name_length)
 250 {
 251     size_t len;
 252     if (index >= lc->N)
 253     {
 254         return FALSE;
 255     }
 256     if (name == NULL && lc->names[index] == NULL)
 257     {
 258         return TRUE;
 259     }
 260     if ((name == NULL) != (lc->names[index] == NULL))
 261     {
 262         return FALSE;
 263     }
 264     len = strlen(lc->names[index]);
 265     if (len != name_length)
 266     {
 267         return FALSE;
 268     }
 269     if (strncmp(lc->names[index], name, name_length) == 0)
 270     {
 271         return TRUE;
 272     }
 273     return FALSE;
 274 }
 275
 276 /* Find the index of a given lambda component name, or -1 if not found */
 277 static int lambda_components_find(const lambda_components_t *lc,
 278                                   const char                *name,
 279                                   size_t                     name_length)
 280 {
 281     int i;
 282
 283     for (i = 0; i < lc->N; i++)
 284     {
 285         if (strncmp(lc->names[i], name, name_length) == 0)
 286         {
 287             return i;
 288         }
 289     }
 290     return -1;
 291 }
 292
 293
 294
 295 /* initialize a lambda vector */
 296 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 297 {
 298     snew(lv->val, lc->N);
 299     lv->index = -1;
 300     lv->dhdl  = -1;
 301     lv->lc    = lc;
 302 }
 303
 304 static void lambda_vec_destroy(lambda_vec_t *lv)
 305 {
 306     sfree(lv->val);
 307 }
 308
 309 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 310 {
 311     int i;
 312
 313     lambda_vec_init(lv, orig->lc);
 314     lv->dhdl  = orig->dhdl;
 315     lv->index = orig->index;
 316     for (i = 0; i < lv->lc->N; i++)
 317     {
 318         lv->val[i] = orig->val[i];
 319     }
 320 }
 321
 322 /* write a lambda vec to a preallocated string */
 323 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 324 {
 325     int    i;
 326     size_t np;
 327
 328     str[0] = 0; /* reset the string */
 329     if (lv->dhdl < 0)
 330     {
 331         if (named)
 332         {
 333             str += sprintf(str, "delta H to ");
 334         }
 335         if (lv->lc->N > 1)
 336         {
 337             str += sprintf(str, "(");
 338         }
 339         for (i = 0; i < lv->lc->N; i++)
 340         {
 341             str += sprintf(str, "%g", lv->val[i]);
 342             if (i < lv->lc->N-1)
 343             {
 344                 str += sprintf(str, ", ");
 345             }
 346         }
 347         if (lv->lc->N > 1)
 348         {
 349             str += sprintf(str, ")");
 350         }
 351     }
 352     else
 353     {
 354         /* this lambda vector describes a derivative */
 355         str += sprintf(str, "dH/dl");
 356         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 357         {
 358             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 359         }
 360     }
 361 }
 362
 363 /* write a shortened version of the lambda vec to a preallocated string */
 364 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 365 {
 366     int    i;
 367     size_t np;
 368
 369     if (lv->index >= 0)
 370     {
 371         sprintf(str, "%6d", lv->index);
 372     }
 373     else
 374     {
 375         if (lv->dhdl < 0)
 376         {
 377             sprintf(str, "%6.3f", lv->val[0]);
 378         }
 379         else
 380         {
 381             sprintf(str, "dH/dl[%d]", lv->dhdl);
 382         }
 383     }
 384 }
 385
 386 /* write an intermediate version of two lambda vecs to a preallocated string */
 387 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 388                                           const lambda_vec_t *b, char *str)
 389 {
 390     int    i;
 391     size_t np;
 392
 393     str[0] = 0;
 394     if ( (a->index >= 0) && (b->index >= 0) )
 395     {
 396         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 397     }
 398     else
 399     {
 400         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 401         {
 402             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 403         }
 404     }
 405 }
 406
 407
 408
 409 /* calculate the difference in lambda vectors: c = a-b.
 410    c must be initialized already, and a and b must describe non-derivative
 411    lambda points */
 412 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 413                             lambda_vec_t *c)
 414 {
 415     int i;
 416
 417     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 418     {
 419         gmx_fatal(FARGS,
 420                   "Trying to calculate the difference between derivatives instead of lambda points");
 421     }
 422     if ((a->lc != b->lc) || (a->lc != c->lc) )
 423     {
 424         gmx_fatal(FARGS,
 425                   "Trying to calculate the difference lambdas with differing basis set");
 426     }
 427     for (i = 0; i < a->lc->N; i++)
 428     {
 429         c->val[i] = a->val[i] - b->val[i];
 430     }
 431 }
 432
 433 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 434    a and b must describe non-derivative lambda points */
 435 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 436 {
 437     int    i;
 438     double ret = 0.;
 439
 440     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 441     {
 442         gmx_fatal(FARGS,
 443                   "Trying to calculate the difference between derivatives instead of lambda points");
 444     }
 445     if (a->lc != b->lc)
 446     {
 447         gmx_fatal(FARGS,
 448                   "Trying to calculate the difference lambdas with differing basis set");
 449     }
 450     for (i = 0; i < a->lc->N; i++)
 451     {
 452         double df = a->val[i] - b->val[i];
 453         ret += df*df;
 454     }
 455     return sqrt(ret);
 456 }
 457
 458
 459 /* check whether two lambda vectors are the same */
 460 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 461 {
 462     int i;
 463
 464     if (a->lc != b->lc)
 465     {
 466         return FALSE;
 467     }
 468     if (a->dhdl < 0)
 469     {
 470         for (i = 0; i < a->lc->N; i++)
 471         {
 472             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 473             {
 474                 return FALSE;
 475             }
 476         }
 477         return TRUE;
 478     }
 479     else
 480     {
 481         /* they're derivatives, so we check whether the indices match */
 482         return (a->dhdl == b->dhdl);
 483     }
 484 }
 485
 486 /* Compare the sort order of two foreign lambda vectors
 487
 488     returns 1 if a is 'bigger' than b,
 489     returns 0 if they're the same,
 490     returns -1 if a is 'smaller' than b.*/
 491 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 492                                        const lambda_vec_t *b)
 493 {
 494     int      i;
 495     double   norm_a    = 0, norm_b = 0;
 496     gmx_bool different = FALSE;
 497
 498     if (a->lc != b->lc)
 499     {
 500         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 501     }
 502     /* if either one has an index we sort based on that */
 503     if ((a->index >= 0) || (b->index >= 0))
 504     {
 505         if (a->index == b->index)
 506         {
 507             return 0;
 508         }
 509         return (a->index > b->index) ? 1 : -1;
 510     }
 511     if (a->dhdl >= 0 || b->dhdl >= 0)
 512     {
 513         /* lambda vectors that are derivatives always sort higher than those
 514            without derivatives */
 515         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 516         {
 517             return (a->dhdl >= 0) ? 1 : -1;
 518         }
 519         return a->dhdl > b->dhdl;
 520     }
 521
 522     /* neither has an index, so we can only sort on the lambda components,
 523        which is only valid if there is one component */
 524     for (i = 0; i < a->lc->N; i++)
 525     {
 526         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 527         {
 528             different = TRUE;
 529         }
 530         norm_a += a->val[i]*a->val[i];
 531         norm_b += b->val[i]*b->val[i];
 532     }
 533     if (!different)
 534     {
 535         return 0;
 536     }
 537     return norm_a > norm_b;
 538 }
 539
 540 /* Compare the sort order of two native lambda vectors
 541
 542     returns 1 if a is 'bigger' than b,
 543     returns 0 if they're the same,
 544     returns -1 if a is 'smaller' than b.*/
 545 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 546                                       const lambda_vec_t *b)
 547 {
 548     int i;
 549
 550     if (a->lc != b->lc)
 551     {
 552         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 553     }
 554     /* if either one has an index we sort based on that */
 555     if ((a->index >= 0) || (b->index >= 0))
 556     {
 557         if (a->index == b->index)
 558         {
 559             return 0;
 560         }
 561         return (a->index > b->index) ? 1 : -1;
 562     }
 563     /* neither has an index, so we can only sort on the lambda components,
 564        which is only valid if there is one component */
 565     if (a->lc->N > 1)
 566     {
 567         gmx_fatal(FARGS,
 568                   "Can't compare lambdas with no index and > 1 component");
 569     }
 570     if (a->dhdl >= 0 || b->dhdl >= 0)
 571     {
 572         gmx_fatal(FARGS,
 573                   "Can't compare native lambdas that are derivatives");
 574     }
 575     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 576     {
 577         return 0;
 578     }
 579     return a->val[0] > b->val[0] ? 1 : -1;
 580 }
 581
 582
 583
 584
 585 static void hist_init(hist_t *h, int nhist, int *nbin)
 586 {
 587     int i;
 588     if (nhist > 2)
 589     {
 590         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 591     }
 592     for (i = 0; i < nhist; i++)
 593     {
 594         snew(h->bin[i], nbin[i]);
 595         h->x0[i]      = 0;
 596         h->nbin[i]    = nbin[i];
 597         h->start_time = h->delta_time = 0;
 598         h->dx[i]      = 0;
 599     }
 600     h->sum   = 0;
 601     h->nhist = nhist;
 602 }
 603
 604 static void hist_destroy(hist_t *h)
 605 {
 606     sfree(h->bin);
 607 }
 608
 609
 610 static void xvg_init(xvg_t *ba)
 611 {
 612     ba->filename = NULL;
 613     ba->nset     = 0;
 614     ba->np_alloc = 0;
 615     ba->np       = NULL;
 616     ba->y        = NULL;
 617 }
 618
 619 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 620                          lambda_vec_t *foreign_lambda, double temp,
 621                          gmx_bool derivative, const char *filename)
 622 {
 623     s->native_lambda  = native_lambda;
 624     s->foreign_lambda = foreign_lambda;
 625     s->temp           = temp;
 626     s->derivative     = derivative;
 627
 628     s->ndu        = 0;
 629     s->du         = NULL;
 630     s->t          = NULL;
 631     s->start_time = s->delta_time = 0;
 632     s->hist       = NULL;
 633     s->du_alloc   = NULL;
 634     s->t_alloc    = NULL;
 635     s->hist_alloc = NULL;
 636     s->ndu_alloc  = 0;
 637     s->nt_alloc   = 0;
 638
 639     s->ntot     = 0;
 640     s->filename = filename;
 641 }
 642
 643 static void sample_range_init(sample_range_t *r, samples_t *s)
 644 {
 645     r->start = 0;
 646     r->end   = s->ndu;
 647     r->use   = TRUE;
 648     r->s     = NULL;
 649 }
 650
 651 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 652                              lambda_vec_t *foreign_lambda, double temp)
 653 {
 654     sc->native_lambda  = native_lambda;
 655     sc->foreign_lambda = foreign_lambda;
 656     sc->temp           = temp;
 657
 658     sc->nsamples       = 0;
 659     sc->s              = NULL;
 660     sc->r              = NULL;
 661     sc->nsamples_alloc = 0;
 662
 663     sc->ntot = 0;
 664     sc->next = sc->prev = NULL;
 665 }
 666
 667 static void sample_coll_destroy(sample_coll_t *sc)
 668 {
 669     /* don't free the samples themselves */
 670     sfree(sc->r);
 671     sfree(sc->s);
 672 }
 673
 674
 675 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 676                              double temp)
 677 {
 678     l->lambda = native_lambda;
 679     l->temp   = temp;
 680
 681     l->next = NULL;
 682     l->prev = NULL;
 683
 684     l->sc = &(l->sc_head);
 685
 686     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 687     l->sc->next = l->sc;
 688     l->sc->prev = l->sc;
 689 }
 690
 691 static void barres_init(barres_t *br)
 692 {
 693     br->dg            = 0;
 694     br->dg_err        = 0;
 695     br->sa            = 0;
 696     br->sa_err        = 0;
 697     br->sb            = 0;
 698     br->sb_err        = 0;
 699     br->dg_stddev     = 0;
 700     br->dg_stddev_err = 0;
 701
 702     br->a = NULL;
 703     br->b = NULL;
 704 }
 705
 706
 707 /* calculate the total number of samples in a sample collection */
 708 static void sample_coll_calc_ntot(sample_coll_t *sc)
 709 {
 710     int i;
 711
 712     sc->ntot = 0;
 713     for (i = 0; i < sc->nsamples; i++)
 714     {
 715         if (sc->r[i].use)
 716         {
 717             if (sc->s[i]->hist)
 718             {
 719                 sc->ntot += sc->s[i]->ntot;
 720             }
 721             else
 722             {
 723                 sc->ntot += sc->r[i].end - sc->r[i].start;
 724             }
 725         }
 726     }
 727 }
 728
 729
 730 /* find the barsamples_t associated with a lambda that corresponds to
 731    a specific foreign lambda */
 732 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 733                                                    lambda_vec_t  *foreign_lambda)
 734 {
 735     sample_coll_t *sc = l->sc->next;
 736
 737     while (sc != l->sc)
 738     {
 739         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 740         {
 741             return sc;
 742         }
 743         sc = sc->next;
 744     }
 745
 746     return NULL;
 747 }
 748
 749 /* insert li into an ordered list of lambda_colls */
 750 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 751 {
 752     sample_coll_t *scn = l->sc->next;
 753     while ( (scn != l->sc) )
 754     {
 755         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 756         {
 757             break;
 758         }
 759         scn = scn->next;
 760     }
 761     /* now insert it before the found scn */
 762     sc->next        = scn;
 763     sc->prev        = scn->prev;
 764     scn->prev->next = sc;
 765     scn->prev       = sc;
 766 }
 767
 768 /* insert li into an ordered list of lambdas */
 769 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 770 {
 771     lambda_data_t *lc = head->next;
 772     while (lc != head)
 773     {
 774         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 775         {
 776             break;
 777         }
 778         lc = lc->next;
 779     }
 780     /* now insert ourselves before the found lc */
 781     li->next       = lc;
 782     li->prev       = lc->prev;
 783     lc->prev->next = li;
 784     lc->prev       = li;
 785 }
 786
 787 /* insert a sample and a sample_range into a sample_coll. The
 788     samples are stored as a pointer, the range is copied. */
 789 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 790                                       sample_range_t *r)
 791 {
 792     /* first check if it belongs here */
 793     if (sc->temp != s->temp)
 794     {
 795         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 796                   s->filename, sc->next->s[0]->filename);
 797     }
 798     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 799     {
 800         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 801                   s->filename, sc->next->s[0]->filename);
 802     }
 803     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 804     {
 805         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 806                   s->filename, sc->next->s[0]->filename);
 807     }
 808
 809     /* check if there's room */
 810     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 811     {
 812         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 813         srenew(sc->s, sc->nsamples_alloc);
 814         srenew(sc->r, sc->nsamples_alloc);
 815     }
 816     sc->s[sc->nsamples] = s;
 817     sc->r[sc->nsamples] = *r;
 818     sc->nsamples++;
 819
 820     sample_coll_calc_ntot(sc);
 821 }
 822
 823 /* insert a sample into a lambda_list, creating the right sample_coll if
 824    neccesary */
 825 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 826 {
 827     gmx_bool       found = FALSE;
 828     sample_coll_t *sc;
 829     sample_range_t r;
 830
 831     lambda_data_t *l = head->next;
 832
 833     /* first search for the right lambda_data_t */
 834     while (l != head)
 835     {
 836         if (lambda_vec_same(l->lambda, s->native_lambda) )
 837         {
 838             found = TRUE;
 839             break;
 840         }
 841         l = l->next;
 842     }
 843
 844     if (!found)
 845     {
 846         snew(l, 1);                                     /* allocate a new one */
 847         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 848         lambda_data_insert_lambda(head, l);             /* add it to the list */
 849     }
 850
 851     /* now look for a sample collection */
 852     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 853     if (!sc)
 854     {
 855         snew(sc, 1); /* allocate a new one */
 856         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 857         lambda_data_insert_sample_coll(l, sc);
 858     }
 859
 860     /* now insert the samples into the sample coll */
 861     sample_range_init(&r, s);
 862     sample_coll_insert_sample(sc, s, &r);
 863 }
 864
 865
 866 /* make a histogram out of a sample collection */
 867 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 868                                   int *nbin_alloc, int *nbin,
 869                                   double *dx, double *xmin, int nbin_default)
 870 {
 871     int      i, j, k;
 872     gmx_bool dx_set   = FALSE;
 873     gmx_bool xmin_set = FALSE;
 874
 875     gmx_bool xmax_set      = FALSE;
 876     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 877                                        limits of a histogram */
 878     double   xmax          = -1;
 879
 880     /* first determine dx and xmin; try the histograms */
 881     for (i = 0; i < sc->nsamples; i++)
 882     {
 883         if (sc->s[i]->hist)
 884         {
 885             hist_t *hist = sc->s[i]->hist;
 886             for (k = 0; k < hist->nhist; k++)
 887             {
 888                 double hdx      = hist->dx[k];
 889                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 890
 891                 /* we use the biggest dx*/
 892                 if ( (!dx_set) || hist->dx[0] > *dx)
 893                 {
 894                     dx_set = TRUE;
 895                     *dx    = hist->dx[0];
 896                 }
 897                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 898                 {
 899                     xmin_set = TRUE;
 900                     *xmin    = (hist->x0[k]*hdx);
 901                 }
 902
 903                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 904                 {
 905                     xmax_set = TRUE;
 906                     xmax     = xmax_now;
 907                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 908                     {
 909                         xmax_set_hard = TRUE;
 910                     }
 911                 }
 912                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 913                 {
 914                     xmax_set_hard = TRUE;
 915                     xmax          = xmax_now;
 916                 }
 917             }
 918         }
 919     }
 920     /* and the delta us */
 921     for (i = 0; i < sc->nsamples; i++)
 922     {
 923         if (sc->s[i]->ndu > 0)
 924         {
 925             /* determine min and max */
 926             int    starti  = sc->r[i].start;
 927             int    endi    = sc->r[i].end;
 928             double du_xmin = sc->s[i]->du[starti];
 929             double du_xmax = sc->s[i]->du[starti];
 930             for (j = starti+1; j < endi; j++)
 931             {
 932                 if (sc->s[i]->du[j] < du_xmin)
 933                 {
 934                     du_xmin = sc->s[i]->du[j];
 935                 }
 936                 if (sc->s[i]->du[j] > du_xmax)
 937                 {
 938                     du_xmax = sc->s[i]->du[j];
 939                 }
 940             }
 941
 942             /* and now change the limits */
 943             if ( (!xmin_set) || (du_xmin < *xmin) )
 944             {
 945                 xmin_set = TRUE;
 946                 *xmin    = du_xmin;
 947             }
 948             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 949             {
 950                 xmax_set = TRUE;
 951                 xmax     = du_xmax;
 952             }
 953         }
 954     }
 955
 956     if (!xmax_set || !xmin_set)
 957     {
 958         *nbin = 0;
 959         return;
 960     }
 961
 962
 963     if (!dx_set)
 964     {
 965         *nbin = nbin_default;
 966         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 967                                                be 0, and we count from 0 */
 968     }
 969     else
 970     {
 971         *nbin = (xmax-(*xmin))/(*dx);
 972     }
 973
 974     if (*nbin > *nbin_alloc)
 975     {
 976         *nbin_alloc = *nbin;
 977         srenew(*bin, *nbin_alloc);
 978     }
 979
 980     /* reset the histogram */
 981     for (i = 0; i < (*nbin); i++)
 982     {
 983         (*bin)[i] = 0;
 984     }
 985
 986     /* now add the actual data */
 987     for (i = 0; i < sc->nsamples; i++)
 988     {
 989         if (sc->s[i]->hist)
 990         {
 991             hist_t *hist = sc->s[i]->hist;
 992             for (k = 0; k < hist->nhist; k++)
 993             {
 994                 double hdx       = hist->dx[k];
 995                 double xmin_hist = hist->x0[k]*hdx;
 996                 for (j = 0; j < hist->nbin[k]; j++)
 997                 {
 998                     /* calculate the bin corresponding to the middle of the
 999                        original bin */
1000                     double x     = hdx*(j+0.5) + xmin_hist;
1001                     int    binnr = (int)((x-(*xmin))/(*dx));
1002
1003                     if (binnr >= *nbin || binnr < 0)
1004                     {
1005                         binnr = (*nbin)-1;
1006                     }
1007
1008                     (*bin)[binnr] += hist->bin[k][j];
1009                 }
1010             }
1011         }
1012         else
1013         {
1014             int starti = sc->r[i].start;
1015             int endi   = sc->r[i].end;
1016             for (j = starti; j < endi; j++)
1017             {
1018                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1019                 if (binnr >= *nbin || binnr < 0)
1020                 {
1021                     binnr = (*nbin)-1;
1022                 }
1023
1024                 (*bin)[binnr]++;
1025             }
1026         }
1027     }
1028 }
1029
1030 /* write a collection of histograms to a file */
1031 void sim_data_histogram(sim_data_t *sd, const char *filename,
1032                         int nbin_default, const output_env_t oenv)
1033 {
1034     char           label_x[STRLEN];
1035     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1036     const char    *title   = "N(\\DeltaH)";
1037     const char    *label_y = "Samples";
1038     FILE          *fp;
1039     lambda_data_t *bl;
1040     int            nsets     = 0;
1041     char         **setnames  = NULL;
1042     gmx_bool       first_set = FALSE;
1043     /* histogram data: */
1044     int           *hist       = NULL;
1045     int            nbin       = 0;
1046     int            nbin_alloc = 0;
1047     double         dx         = 0;
1048     double         min        = 0;
1049     int            i;
1050     lambda_data_t *bl_head = sd->lb;
1051
1052     printf("\nWriting histogram to %s\n", filename);
1053     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1054
1055     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1056
1057     /* first get all the set names */
1058     bl = bl_head->next;
1059     /* iterate over all lambdas */
1060     while (bl != bl_head)
1061     {
1062         sample_coll_t *sc = bl->sc->next;
1063
1064         /* iterate over all samples */
1065         while (sc != bl->sc)
1066         {
1067             char buf[STRLEN], buf2[STRLEN];
1068
1069             nsets++;
1070             srenew(setnames, nsets);
1071             snew(setnames[nsets-1], STRLEN);
1072             if (sc->foreign_lambda->dhdl < 0)
1073             {
1074                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1075                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1076                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1077                         deltag, lambda, buf2, lambda, buf);
1078             }
1079             else
1080             {
1081                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1082                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1083                         dhdl, lambda, buf);
1084             }
1085             sc = sc->next;
1086         }
1087
1088         bl = bl->next;
1089     }
1090     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1091
1092
1093     /* now make the histograms */
1094     bl = bl_head->next;
1095     /* iterate over all lambdas */
1096     while (bl != bl_head)
1097     {
1098         sample_coll_t *sc = bl->sc->next;
1099
1100         /* iterate over all samples */
1101         while (sc != bl->sc)
1102         {
1103             if (!first_set)
1104             {
1105                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1106             }
1107
1108             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1109                                   nbin_default);
1110
1111             for (i = 0; i < nbin; i++)
1112             {
1113                 double xmin = i*dx + min;
1114                 double xmax = (i+1)*dx + min;
1115
1116                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1117             }
1118
1119             first_set = FALSE;
1120             sc        = sc->next;
1121         }
1122
1123         bl = bl->next;
1124     }
1125
1126     if (hist)
1127     {
1128         sfree(hist);
1129     }
1130
1131     xvgrclose(fp);
1132 }
1133
1134 static int
1135 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1136 {
1137     int n = 0;
1138
1139     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1140     if (lambda->index >= 0)
1141     {
1142         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1143     }
1144     if (lambda->dhdl >= 0)
1145     {
1146         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1147     }
1148     else
1149     {
1150         int i;
1151         for (i = 0; i < lambda->lc->N; i++)
1152         {
1153             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1154         }
1155     }
1156     return n;
1157 }
1158
1159 /* create a collection (array) of barres_t object given a ordered linked list
1160    of barlamda_t sample collections */
1161 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1162                                     gmx_bool use_dhdl)
1163 {
1164     lambda_data_t *bl;
1165     int            nlambda = 0;
1166     barres_t      *res;
1167     int            i;
1168     gmx_bool       dhdl    = FALSE;
1169     gmx_bool       first   = TRUE;
1170     lambda_data_t *bl_head = sd->lb;
1171
1172     /* first count the lambdas */
1173     bl = bl_head->next;
1174     while (bl != bl_head)
1175     {
1176         nlambda++;
1177         bl = bl->next;
1178     }
1179     snew(res, nlambda-1);
1180
1181     /* next put the right samples in the res */
1182     *nres = 0;
1183     bl    = bl_head->next->next; /* we start with the second one. */
1184     while (bl != bl_head)
1185     {
1186         sample_coll_t *sc, *scprev;
1187         barres_t      *br = &(res[*nres]);
1188         /* there is always a previous one. we search for that as a foreign
1189            lambda: */
1190         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1191         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1192
1193         barres_init(br);
1194
1195         if (use_dhdl)
1196         {
1197             /* we use dhdl */
1198
1199             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1200             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1201
1202             if (first)
1203             {
1204                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1205                 dhdl = TRUE;
1206             }
1207             if (!dhdl)
1208             {
1209                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1210             }
1211         }
1212         else if (!scprev && !sc)
1213         {
1214             char descX[STRLEN], descY[STRLEN];
1215             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1216             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1217
1218             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1219         }
1220
1221         /* normal delta H */
1222         if (!scprev)
1223         {
1224             char descX[STRLEN], descY[STRLEN];
1225             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1226             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1227             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1228         }
1229         if (!sc)
1230         {
1231             char descX[STRLEN], descY[STRLEN];
1232             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1233             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1234             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1235         }
1236         br->a = scprev;
1237         br->b = sc;
1238
1239         first = FALSE;
1240         (*nres)++;
1241         bl = bl->next;
1242     }
1243     return res;
1244 }
1245
1246 /* estimate the maximum discretization error */
1247 static double barres_list_max_disc_err(barres_t *res, int nres)
1248 {
1249     int    i, j;
1250     double disc_err = 0.;
1251     double delta_lambda;
1252
1253     for (i = 0; i < nres; i++)
1254     {
1255         barres_t *br = &(res[i]);
1256
1257         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1258                                            br->a->native_lambda);
1259
1260         for (j = 0; j < br->a->nsamples; j++)
1261         {
1262             if (br->a->s[j]->hist)
1263             {
1264                 double Wfac = 1.;
1265                 if (br->a->s[j]->derivative)
1266                 {
1267                     Wfac =  delta_lambda;
1268                 }
1269
1270                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1271             }
1272         }
1273         for (j = 0; j < br->b->nsamples; j++)
1274         {
1275             if (br->b->s[j]->hist)
1276             {
1277                 double Wfac = 1.;
1278                 if (br->b->s[j]->derivative)
1279                 {
1280                     Wfac =  delta_lambda;
1281                 }
1282                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1283             }
1284         }
1285     }
1286     return disc_err;
1287 }
1288
1289
1290 /* impose start and end times on a sample collection, updating sample_ranges */
1291 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1292                                      double end_t)
1293 {
1294     int i;
1295     for (i = 0; i < sc->nsamples; i++)
1296     {
1297         samples_t      *s = sc->s[i];
1298         sample_range_t *r = &(sc->r[i]);
1299         if (s->hist)
1300         {
1301             double end_time = s->hist->delta_time*s->hist->sum +
1302                 s->hist->start_time;
1303             if (s->hist->start_time < begin_t || end_time > end_t)
1304             {
1305                 r->use = FALSE;
1306             }
1307         }
1308         else
1309         {
1310             if (!s->t)
1311             {
1312                 double end_time;
1313                 if (s->start_time < begin_t)
1314                 {
1315                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1316                 }
1317                 end_time = s->delta_time*s->ndu + s->start_time;
1318                 if (end_time > end_t)
1319                 {
1320                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1321                 }
1322             }
1323             else
1324             {
1325                 int j;
1326                 for (j = 0; j < s->ndu; j++)
1327                 {
1328                     if (s->t[j] < begin_t)
1329                     {
1330                         r->start = j;
1331                     }
1332
1333                     if (s->t[j] >= end_t)
1334                     {
1335                         r->end = j;
1336                         break;
1337                     }
1338                 }
1339             }
1340             if (r->start > r->end)
1341             {
1342                 r->use = FALSE;
1343             }
1344         }
1345     }
1346     sample_coll_calc_ntot(sc);
1347 }
1348
1349 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1350 {
1351     double         first_t, last_t;
1352     double         begin_t, end_t;
1353     lambda_data_t *lc;
1354     lambda_data_t *head = sd->lb;
1355     int            j;
1356
1357     if (begin <= 0 && end < 0)
1358     {
1359         return;
1360     }
1361
1362     /* first determine the global start and end times */
1363     first_t = -1;
1364     last_t  = -1;
1365     lc      = head->next;
1366     while (lc != head)
1367     {
1368         sample_coll_t *sc = lc->sc->next;
1369         while (sc != lc->sc)
1370         {
1371             for (j = 0; j < sc->nsamples; j++)
1372             {
1373                 double start_t, end_t;
1374
1375                 start_t = sc->s[j]->start_time;
1376                 end_t   =   sc->s[j]->start_time;
1377                 if (sc->s[j]->hist)
1378                 {
1379                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1380                 }
1381                 else
1382                 {
1383                     if (sc->s[j]->t)
1384                     {
1385                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1386                     }
1387                     else
1388                     {
1389                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1390                     }
1391                 }
1392
1393                 if (start_t < first_t || first_t < 0)
1394                 {
1395                     first_t = start_t;
1396                 }
1397                 if (end_t > last_t)
1398                 {
1399                     last_t = end_t;
1400                 }
1401             }
1402             sc = sc->next;
1403         }
1404         lc = lc->next;
1405     }
1406
1407     /* calculate the actual times */
1408     if (begin > 0)
1409     {
1410         begin_t = begin;
1411     }
1412     else
1413     {
1414         begin_t = first_t;
1415     }
1416
1417     if (end > 0)
1418     {
1419         end_t = end;
1420     }
1421     else
1422     {
1423         end_t = last_t;
1424     }
1425     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1426
1427     if (begin_t > end_t)
1428     {
1429         return;
1430     }
1431     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1432
1433     /* then impose them */
1434     lc = head->next;
1435     while (lc != head)
1436     {
1437         sample_coll_t *sc = lc->sc->next;
1438         while (sc != lc->sc)
1439         {
1440             sample_coll_impose_times(sc, begin_t, end_t);
1441             sc = sc->next;
1442         }
1443         lc = lc->next;
1444     }
1445 }
1446
1447
1448 /* create subsample i out of ni from an existing sample_coll */
1449 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1450                                              sample_coll_t *sc_orig,
1451                                              int i, int ni)
1452 {
1453     int             j;
1454     int             hist_start, hist_end;
1455
1456     gmx_int64_t     ntot_start;
1457     gmx_int64_t     ntot_end;
1458     gmx_int64_t     ntot_so_far;
1459
1460     *sc = *sc_orig; /* just copy all fields */
1461
1462     /* allocate proprietary memory */
1463     snew(sc->s, sc_orig->nsamples);
1464     snew(sc->r, sc_orig->nsamples);
1465
1466     /* copy the samples */
1467     for (j = 0; j < sc_orig->nsamples; j++)
1468     {
1469         sc->s[j] = sc_orig->s[j];
1470         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1471     }
1472
1473     /* now fix start and end fields */
1474     /* the casts avoid possible overflows */
1475     ntot_start  = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1476     ntot_end    = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1477     ntot_so_far = 0;
1478     for (j = 0; j < sc->nsamples; j++)
1479     {
1480         gmx_int64_t ntot_add;
1481         gmx_int64_t new_start, new_end;
1482
1483         if (sc->r[j].use)
1484         {
1485             if (sc->s[j]->hist)
1486             {
1487                 ntot_add = sc->s[j]->hist->sum;
1488             }
1489             else
1490             {
1491                 ntot_add = sc->r[j].end - sc->r[j].start;
1492             }
1493         }
1494         else
1495         {
1496             ntot_add = 0;
1497         }
1498
1499         if (!sc->s[j]->hist)
1500         {
1501             if (ntot_so_far < ntot_start)
1502             {
1503                 /* adjust starting point */
1504                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1505             }
1506             else
1507             {
1508                 new_start = sc->r[j].start;
1509             }
1510             /* adjust end point */
1511             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1512             if (new_end > sc->r[j].end)
1513             {
1514                 new_end = sc->r[j].end;
1515             }
1516
1517             /* check if we're in range at all */
1518             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1519             {
1520                 new_start = 0;
1521                 new_end   = 0;
1522             }
1523             /* and write the new range */
1524             sc->r[j].start = (int)new_start;
1525             sc->r[j].end   = (int)new_end;
1526         }
1527         else
1528         {
1529             if (sc->r[j].use)
1530             {
1531                 double overlap;
1532                 double ntot_start_norm, ntot_end_norm;
1533                 /* calculate the amount of overlap of the
1534                    desired range (ntot_start -- ntot_end) onto
1535                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1536
1537                 /* first calculate normalized bounds
1538                    (where 0 is the start of the hist range, and 1 the end) */
1539                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1540                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1541
1542                 /* now fix the boundaries */
1543                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1544                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1545
1546                 /* and calculate the overlap */
1547                 overlap = ntot_end_norm - ntot_start_norm;
1548
1549                 if (overlap > 0.95) /* we allow for 5% slack */
1550                 {
1551                     sc->r[j].use = TRUE;
1552                 }
1553                 else if (overlap < 0.05)
1554                 {
1555                     sc->r[j].use = FALSE;
1556                 }
1557                 else
1558                 {
1559                     return FALSE;
1560                 }
1561             }
1562         }
1563         ntot_so_far += ntot_add;
1564     }
1565     sample_coll_calc_ntot(sc);
1566
1567     return TRUE;
1568 }
1569
1570 /* calculate minimum and maximum work values in sample collection */
1571 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1572                                 double *Wmin, double *Wmax)
1573 {
1574     int i, j;
1575
1576     *Wmin = FLT_MAX;
1577     *Wmax = -FLT_MAX;
1578
1579     for (i = 0; i < sc->nsamples; i++)
1580     {
1581         samples_t      *s = sc->s[i];
1582         sample_range_t *r = &(sc->r[i]);
1583         if (r->use)
1584         {
1585             if (!s->hist)
1586             {
1587                 for (j = r->start; j < r->end; j++)
1588                 {
1589                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1590                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1591                 }
1592             }
1593             else
1594             {
1595                 int    hd = 0; /* determine the histogram direction: */
1596                 double dx;
1597                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1598                 {
1599                     hd = 1;
1600                 }
1601                 dx = s->hist->dx[hd];
1602
1603                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1604                 {
1605                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1606                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1607                     /* look for the highest value bin with values */
1608                     if (s->hist->bin[hd][j] > 0)
1609                     {
1610                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1611                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1612                         break;
1613                     }
1614                 }
1615             }
1616         }
1617     }
1618 }
1619
1620 /* Initialize a sim_data structure */
1621 static void sim_data_init(sim_data_t *sd)
1622 {
1623     /* make linked list */
1624     sd->lb       = &(sd->lb_head);
1625     sd->lb->next = sd->lb;
1626     sd->lb->prev = sd->lb;
1627
1628     lambda_components_init(&(sd->lc));
1629 }
1630
1631
1632 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1633 {
1634     int    i;
1635     double sum;
1636
1637     sum = 0;
1638
1639     for (i = 0; i < n; i++)
1640     {
1641         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1642     }
1643
1644     return sum;
1645 }
1646
1647 /* calculate the BAR average given a histogram
1648
1649     if type== 0, calculate the best estimate for the average,
1650     if type==-1, calculate the minimum possible value given the histogram
1651     if type== 1, calculate the maximum possible value given the histogram */
1652 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1653                                 int type)
1654 {
1655     double sum = 0.;
1656     int    i;
1657     int    max;
1658     /* normalization factor multiplied with bin width and
1659        number of samples (we normalize through M): */
1660     double normdx = 1.;
1661     int    hd     = 0; /* determine the histogram direction: */
1662     double dx;
1663
1664     if ( (hist->nhist > 1) && (Wfac < 0) )
1665     {
1666         hd = 1;
1667     }
1668     dx  = hist->dx[hd];
1669     max = hist->nbin[hd]-1;
1670     if (type == 1)
1671     {
1672         max = hist->nbin[hd]; /* we also add whatever was out of range */
1673     }
1674
1675     for (i = 0; i < max; i++)
1676     {
1677         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1678         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1679
1680         sum += pxdx/(1. + exp(x + sbMmDG));
1681     }
1682
1683     return sum;
1684 }
1685
1686 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1687                                 double temp, double tol, int type)
1688 {
1689     double kT, beta, M;
1690     double DG;
1691     int    i, j;
1692     double Wfac1, Wfac2, Wmin, Wmax;
1693     double DG0, DG1, DG2, dDG1;
1694     double sum1, sum2;
1695     double n1, n2; /* numbers of samples as doubles */
1696
1697     kT   = BOLTZ*temp;
1698     beta = 1/kT;
1699
1700     /* count the numbers of samples */
1701     n1 = ca->ntot;
1702     n2 = cb->ntot;
1703
1704     M = log(n1/n2);
1705
1706     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1707     if (ca->foreign_lambda->dhdl < 0)
1708     {
1709         /* this is the case when the delta U were calculated directly
1710            (i.e. we're not scaling dhdl) */
1711         Wfac1 = beta;
1712         Wfac2 = beta;
1713     }
1714     else
1715     {
1716         /* we're using dhdl, so delta_lambda needs to be a
1717            multiplication factor.  */
1718         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1719         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1720                                                   ca->native_lambda);
1721         if (cb->native_lambda->lc->N > 1)
1722         {
1723             gmx_fatal(FARGS,
1724                       "Can't (yet) do multi-component dhdl interpolation");
1725         }
1726
1727         Wfac1 =  beta*delta_lambda;
1728         Wfac2 = -beta*delta_lambda;
1729     }
1730
1731     if (beta < 1)
1732     {
1733         /* We print the output both in kT and kJ/mol.
1734          * Here we determine DG in kT, so when beta < 1
1735          * the precision has to be increased.
1736          */
1737         tol *= beta;
1738     }
1739
1740     /* Calculate minimum and maximum work to give an initial estimate of
1741      * delta G  as their average.
1742      */
1743     {
1744         double Wmin1, Wmin2, Wmax1, Wmax2;
1745         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1746         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1747
1748         Wmin = min(Wmin1, Wmin2);
1749         Wmax = max(Wmax1, Wmax2);
1750     }
1751
1752     DG0 = Wmin;
1753     DG2 = Wmax;
1754
1755     if (debug)
1756     {
1757         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1758     }
1759     /* We approximate by bisection: given our initial estimates
1760        we keep checking whether the halfway point is greater or
1761        smaller than what we get out of the BAR averages.
1762
1763        For the comparison we can use twice the tolerance. */
1764     while (DG2 - DG0 > 2*tol)
1765     {
1766         DG1 = 0.5*(DG0 + DG2);
1767
1768         /* calculate the BAR averages */
1769         dDG1 = 0.;
1770
1771         for (i = 0; i < ca->nsamples; i++)
1772         {
1773             samples_t      *s = ca->s[i];
1774             sample_range_t *r = &(ca->r[i]);
1775             if (r->use)
1776             {
1777                 if (s->hist)
1778                 {
1779                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1780                 }
1781                 else
1782                 {
1783                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1784                                          Wfac1, (M-DG1));
1785                 }
1786             }
1787         }
1788         for (i = 0; i < cb->nsamples; i++)
1789         {
1790             samples_t      *s = cb->s[i];
1791             sample_range_t *r = &(cb->r[i]);
1792             if (r->use)
1793             {
1794                 if (s->hist)
1795                 {
1796                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1797                 }
1798                 else
1799                 {
1800                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1801                                          Wfac2, -(M-DG1));
1802                 }
1803             }
1804         }
1805
1806         if (dDG1 < 0)
1807         {
1808             DG0 = DG1;
1809         }
1810         else
1811         {
1812             DG2 = DG1;
1813         }
1814         if (debug)
1815         {
1816             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1817         }
1818     }
1819
1820     return 0.5*(DG0 + DG2);
1821 }
1822
1823 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1824                              double temp, double dg, double *sa, double *sb)
1825 {
1826     int    i, j;
1827     double W_ab = 0.;
1828     double W_ba = 0.;
1829     double kT, beta;
1830     double Wfac1, Wfac2;
1831     double n1, n2;
1832
1833     kT   = BOLTZ*temp;
1834     beta = 1/kT;
1835
1836     /* count the numbers of samples */
1837     n1 = ca->ntot;
1838     n2 = cb->ntot;
1839
1840     /* to ensure the work values are the same as during the delta_G */
1841     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1842     if (ca->foreign_lambda->dhdl < 0)
1843     {
1844         /* this is the case when the delta U were calculated directly
1845            (i.e. we're not scaling dhdl) */
1846         Wfac1 = beta;
1847         Wfac2 = beta;
1848     }
1849     else
1850     {
1851         /* we're using dhdl, so delta_lambda needs to be a
1852            multiplication factor.  */
1853         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1854                                                   ca->native_lambda);
1855         Wfac1 =  beta*delta_lambda;
1856         Wfac2 = -beta*delta_lambda;
1857     }
1858
1859     /* first calculate the average work in both directions */
1860     for (i = 0; i < ca->nsamples; i++)
1861     {
1862         samples_t      *s = ca->s[i];
1863         sample_range_t *r = &(ca->r[i]);
1864         if (r->use)
1865         {
1866             if (!s->hist)
1867             {
1868                 for (j = r->start; j < r->end; j++)
1869                 {
1870                     W_ab += Wfac1*s->du[j];
1871                 }
1872             }
1873             else
1874             {
1875                 /* normalization factor multiplied with bin width and
1876                    number of samples (we normalize through M): */
1877                 double normdx = 1.;
1878                 double dx;
1879                 int    hd = 0; /* histogram direction */
1880                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1881                 {
1882                     hd = 1;
1883                 }
1884                 dx = s->hist->dx[hd];
1885
1886                 for (j = 0; j < s->hist->nbin[0]; j++)
1887                 {
1888                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1889                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1890                     W_ab += pxdx*x;
1891                 }
1892             }
1893         }
1894     }
1895     W_ab /= n1;
1896
1897     for (i = 0; i < cb->nsamples; i++)
1898     {
1899         samples_t      *s = cb->s[i];
1900         sample_range_t *r = &(cb->r[i]);
1901         if (r->use)
1902         {
1903             if (!s->hist)
1904             {
1905                 for (j = r->start; j < r->end; j++)
1906                 {
1907                     W_ba += Wfac1*s->du[j];
1908                 }
1909             }
1910             else
1911             {
1912                 /* normalization factor multiplied with bin width and
1913                    number of samples (we normalize through M): */
1914                 double normdx = 1.;
1915                 double dx;
1916                 int    hd = 0; /* histogram direction */
1917                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1918                 {
1919                     hd = 1;
1920                 }
1921                 dx = s->hist->dx[hd];
1922
1923                 for (j = 0; j < s->hist->nbin[0]; j++)
1924                 {
1925                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1926                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1927                     W_ba += pxdx*x;
1928                 }
1929             }
1930         }
1931     }
1932     W_ba /= n2;
1933
1934     /* then calculate the relative entropies */
1935     *sa = (W_ab - dg);
1936     *sb = (W_ba + dg);
1937 }
1938
1939 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1940                            double temp, double dg, double *stddev)
1941 {
1942     int    i, j;
1943     double M;
1944     double sigmafact = 0.;
1945     double kT, beta;
1946     double Wfac1, Wfac2;
1947     double n1, n2;
1948
1949     kT   = BOLTZ*temp;
1950     beta = 1/kT;
1951
1952     /* count the numbers of samples */
1953     n1 = ca->ntot;
1954     n2 = cb->ntot;
1955
1956     /* to ensure the work values are the same as during the delta_G */
1957     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1958     if (ca->foreign_lambda->dhdl < 0)
1959     {
1960         /* this is the case when the delta U were calculated directly
1961            (i.e. we're not scaling dhdl) */
1962         Wfac1 = beta;
1963         Wfac2 = beta;
1964     }
1965     else
1966     {
1967         /* we're using dhdl, so delta_lambda needs to be a
1968            multiplication factor.  */
1969         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1970                                                   ca->native_lambda);
1971         Wfac1 =  beta*delta_lambda;
1972         Wfac2 = -beta*delta_lambda;
1973     }
1974
1975     M = log(n1/n2);
1976
1977
1978     /* calculate average in both directions */
1979     for (i = 0; i < ca->nsamples; i++)
1980     {
1981         samples_t      *s = ca->s[i];
1982         sample_range_t *r = &(ca->r[i]);
1983         if (r->use)
1984         {
1985             if (!s->hist)
1986             {
1987                 for (j = r->start; j < r->end; j++)
1988                 {
1989                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1990                 }
1991             }
1992             else
1993             {
1994                 /* normalization factor multiplied with bin width and
1995                    number of samples (we normalize through M): */
1996                 double normdx = 1.;
1997                 double dx;
1998                 int    hd = 0; /* histogram direction */
1999                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
2000                 {
2001                     hd = 1;
2002                 }
2003                 dx = s->hist->dx[hd];
2004
2005                 for (j = 0; j < s->hist->nbin[0]; j++)
2006                 {
2007                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2008                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2009
2010                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2011                 }
2012             }
2013         }
2014     }
2015     for (i = 0; i < cb->nsamples; i++)
2016     {
2017         samples_t      *s = cb->s[i];
2018         sample_range_t *r = &(cb->r[i]);
2019         if (r->use)
2020         {
2021             if (!s->hist)
2022             {
2023                 for (j = r->start; j < r->end; j++)
2024                 {
2025                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2026                 }
2027             }
2028             else
2029             {
2030                 /* normalization factor multiplied with bin width and
2031                    number of samples (we normalize through M): */
2032                 double normdx = 1.;
2033                 double dx;
2034                 int    hd = 0; /* histogram direction */
2035                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2036                 {
2037                     hd = 1;
2038                 }
2039                 dx = s->hist->dx[hd];
2040
2041                 for (j = 0; j < s->hist->nbin[0]; j++)
2042                 {
2043                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2044                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2045
2046                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2047                 }
2048             }
2049         }
2050     }
2051
2052     sigmafact /= (n1 + n2);
2053
2054
2055     /* Eq. 10 from
2056        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2057     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2058 }
2059
2060
2061
2062 static void calc_bar(barres_t *br, double tol,
2063                      int npee_min, int npee_max, gmx_bool *bEE,
2064                      double *partsum)
2065 {
2066     int      npee, p;
2067     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2068                                                         for calculated quantities */
2069     int      nsample1, nsample2;
2070     double   temp = br->a->temp;
2071     int      i, j;
2072     double   dg_min, dg_max;
2073     gmx_bool have_hist = FALSE;
2074
2075     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2076
2077     br->dg_disc_err      = 0.;
2078     br->dg_histrange_err = 0.;
2079
2080     /* check if there are histograms */
2081     for (i = 0; i < br->a->nsamples; i++)
2082     {
2083         if (br->a->r[i].use && br->a->s[i]->hist)
2084         {
2085             have_hist = TRUE;
2086             break;
2087         }
2088     }
2089     if (!have_hist)
2090     {
2091         for (i = 0; i < br->b->nsamples; i++)
2092         {
2093             if (br->b->r[i].use && br->b->s[i]->hist)
2094             {
2095                 have_hist = TRUE;
2096                 break;
2097             }
2098         }
2099     }
2100
2101     /* calculate histogram-specific errors */
2102     if (have_hist)
2103     {
2104         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2105         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2106
2107         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2108         {
2109             /* the histogram range  error is the biggest of the differences
2110                between the best estimate and the extremes */
2111             br->dg_histrange_err = fabs(dg_max - dg_min);
2112         }
2113         br->dg_disc_err = 0.;
2114         for (i = 0; i < br->a->nsamples; i++)
2115         {
2116             if (br->a->s[i]->hist)
2117             {
2118                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2119             }
2120         }
2121         for (i = 0; i < br->b->nsamples; i++)
2122         {
2123             if (br->b->s[i]->hist)
2124             {
2125                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2126             }
2127         }
2128     }
2129     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2130
2131     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2132
2133     dg_sig2     = 0;
2134     sa_sig2     = 0;
2135     sb_sig2     = 0;
2136     stddev_sig2 = 0;
2137
2138     *bEE = TRUE;
2139     {
2140         sample_coll_t ca, cb;
2141
2142         /* initialize the samples */
2143         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2144                          br->a->temp);
2145         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2146                          br->b->temp);
2147
2148         for (npee = npee_min; npee <= npee_max; npee++)
2149         {
2150             double dgs      = 0;
2151             double dgs2     = 0;
2152             double dsa      = 0;
2153             double dsb      = 0;
2154             double dsa2     = 0;
2155             double dsb2     = 0;
2156             double dstddev  = 0;
2157             double dstddev2 = 0;
2158
2159
2160             for (p = 0; p < npee; p++)
2161             {
2162                 double   dgp;
2163                 double   stddevc;
2164                 double   sac, sbc;
2165                 gmx_bool cac, cbc;
2166
2167                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2168                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2169
2170                 if (!cac || !cbc)
2171                 {
2172                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2173                     *bEE = FALSE;
2174                     if (cac)
2175                     {
2176                         sample_coll_destroy(&ca);
2177                     }
2178                     if (cbc)
2179                     {
2180                         sample_coll_destroy(&cb);
2181                     }
2182                     return;
2183                 }
2184
2185                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2186                 dgs  += dgp;
2187                 dgs2 += dgp*dgp;
2188
2189                 partsum[npee*(npee_max+1)+p] += dgp;
2190
2191                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2192                 dsa  += sac;
2193                 dsa2 += sac*sac;
2194                 dsb  += sbc;
2195                 dsb2 += sbc*sbc;
2196                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2197
2198                 dstddev  += stddevc;
2199                 dstddev2 += stddevc*stddevc;
2200
2201                 sample_coll_destroy(&ca);
2202                 sample_coll_destroy(&cb);
2203             }
2204             dgs     /= npee;
2205             dgs2    /= npee;
2206             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2207
2208             dsa     /= npee;
2209             dsa2    /= npee;
2210             dsb     /= npee;
2211             dsb2    /= npee;
2212             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2213             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2214
2215             dstddev     /= npee;
2216             dstddev2    /= npee;
2217             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2218         }
2219         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2220         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2221         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2222         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2223     }
2224 }
2225
2226
2227 static double bar_err(int nbmin, int nbmax, const double *partsum)
2228 {
2229     int    nb, b;
2230     double svar, s, s2, dg;
2231
2232     svar = 0;
2233     for (nb = nbmin; nb <= nbmax; nb++)
2234     {
2235         s  = 0;
2236         s2 = 0;
2237         for (b = 0; b < nb; b++)
2238         {
2239             dg  = partsum[nb*(nbmax+1)+b];
2240             s  += dg;
2241             s2 += dg*dg;
2242         }
2243         s    /= nb;
2244         s2   /= nb;
2245         svar += (s2 - s*s)/(nb - 1);
2246     }
2247
2248     return sqrt(svar/(nbmax + 1 - nbmin));
2249 }
2250
2251
2252 /* Seek the end of an identifier (consecutive non-spaces), followed by
2253    an optional number of spaces or '='-signs. Returns a pointer to the
2254    first non-space value found after that. Returns NULL if the string
2255    ends before that.
2256  */
2257 static const char *find_value(const char *str)
2258 {
2259     gmx_bool name_end_found = FALSE;
2260
2261     /* if the string is a NULL pointer, return a NULL pointer. */
2262     if (str == NULL)
2263     {
2264         return NULL;
2265     }
2266     while (*str != '\0')
2267     {
2268         /* first find the end of the name */
2269         if (!name_end_found)
2270         {
2271             if (isspace(*str) || (*str == '=') )
2272             {
2273                 name_end_found = TRUE;
2274             }
2275         }
2276         else
2277         {
2278             if (!( isspace(*str) || (*str == '=') ))
2279             {
2280                 return str;
2281             }
2282         }
2283         str++;
2284     }
2285     return NULL;
2286 }
2287
2288
2289
2290 /* read a vector-notation description of a lambda vector */
2291 static gmx_bool read_lambda_compvec(const char                *str,
2292                                     lambda_vec_t              *lv,
2293                                     const lambda_components_t *lc_in,
2294                                     lambda_components_t       *lc_out,
2295                                     const char               **end,
2296                                     const char                *fn)
2297 {
2298     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2299                                           components, or to check them */
2300     gmx_bool    start_reached = FALSE; /* whether the start of component names
2301                                           has been reached */
2302     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2303     int         n             = 0;     /* current component number */
2304     const char *val_start     = NULL;  /* start of the component name, or NULL
2305                                           if not in a value */
2306     char       *strtod_end;
2307     gmx_bool    OK = TRUE;
2308
2309     if (end)
2310     {
2311         *end = str;
2312     }
2313
2314
2315     if (lc_out && lc_out->N == 0)
2316     {
2317         initialize_lc = TRUE;
2318     }
2319
2320     if (lc_in == NULL)
2321     {
2322         lc_in = lc_out;
2323     }
2324
2325     while (1)
2326     {
2327         if (!start_reached)
2328         {
2329             if (isalnum(*str))
2330             {
2331                 vector        = FALSE;
2332                 start_reached = TRUE;
2333                 val_start     = str;
2334             }
2335             else if (*str == '(')
2336             {
2337                 vector        = TRUE;
2338                 start_reached = TRUE;
2339             }
2340             else if (!isspace(*str))
2341             {
2342                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2343             }
2344         }
2345         else
2346         {
2347             if (val_start)
2348             {
2349                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2350                 {
2351                     /* end of value */
2352                     if (lv == NULL)
2353                     {
2354                         if (initialize_lc)
2355                         {
2356                             lambda_components_add(lc_out, val_start,
2357                                                   (str-val_start));
2358                         }
2359                         else
2360                         {
2361                             if (!lambda_components_check(lc_out, n, val_start,
2362                                                          (str-val_start)))
2363                             {
2364                                 return FALSE;
2365                             }
2366                         }
2367                     }
2368                     else
2369                     {
2370                         /* add a vector component to lv */
2371                         lv->val[n] = strtod(val_start, &strtod_end);
2372                         if (val_start == strtod_end)
2373                         {
2374                             gmx_fatal(FARGS,
2375                                       "Error reading lambda vector in %s", fn);
2376                         }
2377                     }
2378                     /* reset for the next identifier */
2379                     val_start = NULL;
2380                     n++;
2381                     if (!vector)
2382                     {
2383                         return OK;
2384                     }
2385                 }
2386             }
2387             else if (isalnum(*str))
2388             {
2389                 val_start = str;
2390             }
2391             if (*str == ')')
2392             {
2393                 str++;
2394                 if (end)
2395                 {
2396                     *end = str;
2397                 }
2398                 if (!vector)
2399                 {
2400                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2401                 }
2402                 else
2403                 {
2404                     if (n == lc_in->N)
2405                     {
2406                         return OK;
2407                     }
2408                     else if (lv == NULL)
2409                     {
2410                         return FALSE;
2411                     }
2412                     else
2413                     {
2414                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2415                                   fn);
2416                         return FALSE;
2417                     }
2418
2419                 }
2420             }
2421         }
2422         if (*str == '\0')
2423         {
2424             break;
2425         }
2426         str++;
2427         if (end)
2428         {
2429             *end = str;
2430         }
2431     }
2432     if (vector)
2433     {
2434         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2435         return FALSE;
2436     }
2437     return OK;
2438 }
2439
2440 /* read and check the component names from a string */
2441 static gmx_bool read_lambda_components(const char          *str,
2442                                        lambda_components_t *lc,
2443                                        const char         **end,
2444                                        const char          *fn)
2445 {
2446     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2447 }
2448
2449 /* read an initialized lambda vector from a string */
2450 static gmx_bool read_lambda_vector(const char   *str,
2451                                    lambda_vec_t *lv,
2452                                    const char  **end,
2453                                    const char   *fn)
2454 {
2455     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2456 }
2457
2458
2459
2460 /* deduce lambda value from legend.
2461     fn = the file name
2462     legend = the legend string
2463     ba = the xvg data
2464     lam = the initialized lambda vector
2465    returns whether to use the data in this set.
2466  */
2467 static gmx_bool legend2lambda(const char   *fn,
2468                               const char   *legend,
2469                               lambda_vec_t *lam)
2470 {
2471     double        lambda = 0;
2472     const char   *ptr    = NULL, *ptr2 = NULL;
2473     gmx_bool      ok     = FALSE;
2474     gmx_bool      bdhdl  = FALSE;
2475     const char   *tostr  = " to ";
2476
2477     if (legend == NULL)
2478     {
2479         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2480     }
2481
2482     /* look for the last 'to': */
2483     ptr2 = legend;
2484     do
2485     {
2486         ptr2 = strstr(ptr2, tostr);
2487         if (ptr2 != NULL)
2488         {
2489             ptr = ptr2;
2490             ptr2++;
2491         }
2492     }
2493     while (ptr2 != NULL && *ptr2 != '\0');
2494
2495     if (ptr)
2496     {
2497         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2498     }
2499     else
2500     {
2501         /* look for the = sign */
2502         ptr = strrchr(legend, '=');
2503         if (!ptr)
2504         {
2505             /* otherwise look for the last space */
2506             ptr = strrchr(legend, ' ');
2507         }
2508     }
2509
2510     if (strstr(legend, "dH"))
2511     {
2512         ok    = TRUE;
2513         bdhdl = TRUE;
2514     }
2515     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2516     {
2517         ok    = TRUE;
2518         bdhdl = FALSE;
2519     }
2520     else /*if (strstr(legend, "pV"))*/
2521     {
2522         return FALSE;
2523     }
2524     if (!ptr)
2525     {
2526         ok = FALSE;
2527     }
2528
2529     if (!ok)
2530     {
2531         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2532     }
2533     if (!bdhdl)
2534     {
2535         ptr = find_value(ptr);
2536         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2537         {
2538             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2539         }
2540     }
2541     else
2542     {
2543         int         dhdl_index;
2544         const char *end;
2545         char        buf[STRLEN];
2546
2547         ptr = strrchr(legend, '=');
2548         end = ptr;
2549         if (ptr)
2550         {
2551             /* there must be a component name */
2552             ptr--;
2553             if (ptr < legend)
2554             {
2555                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2556             }
2557             /* now backtrack to the start of the identifier */
2558             while (isspace(*ptr))
2559             {
2560                 end = ptr;
2561                 ptr--;
2562                 if (ptr < legend)
2563                 {
2564                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2565                 }
2566             }
2567             while (!isspace(*ptr))
2568             {
2569                 ptr--;
2570                 if (ptr < legend)
2571                 {
2572                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2573                 }
2574             }
2575             ptr++;
2576             strncpy(buf, ptr, (end-ptr));
2577             buf[(end-ptr)] = '\0';
2578             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2579             if (dhdl_index < 0)
2580             {
2581                 char buf[STRLEN];
2582                 strncpy(buf, ptr, (end-ptr));
2583                 buf[(end-ptr)] = '\0';
2584                 gmx_fatal(FARGS,
2585                           "Did not find lambda component for '%s' in %s",
2586                           buf, fn);
2587             }
2588         }
2589         else
2590         {
2591             if (lam->lc->N > 1)
2592             {
2593                 gmx_fatal(FARGS,
2594                           "dhdl without component name with >1 lambda component in %s",
2595                           fn);
2596             }
2597             dhdl_index = 0;
2598         }
2599         lam->dhdl = dhdl_index;
2600     }
2601     return TRUE;
2602 }
2603
2604 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2605                                 lambda_components_t *lc)
2606 {
2607     gmx_bool    bFound;
2608     const char *ptr;
2609     char       *end;
2610     double      native_lambda;
2611
2612     bFound = FALSE;
2613
2614     /* first check for a state string */
2615     ptr = strstr(subtitle, "state");
2616     if (ptr)
2617     {
2618         int         index = -1;
2619         const char *val_end;
2620
2621         /* the new 4.6 style lambda vectors */
2622         ptr = find_value(ptr);
2623         if (ptr)
2624         {
2625             index = strtol(ptr, &end, 10);
2626             if (ptr == end)
2627             {
2628                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2629                 return FALSE;
2630             }
2631             ptr = end;
2632         }
2633         else
2634         {
2635             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2636             return FALSE;
2637         }
2638         /* now find the lambda vector component names */
2639         while (*ptr != '(' && !isalnum(*ptr))
2640         {
2641             ptr++;
2642             if (*ptr == '\0')
2643             {
2644                 gmx_fatal(FARGS,
2645                           "Incomplete lambda vector component data in %s", fn);
2646                 return FALSE;
2647             }
2648         }
2649         val_end = ptr;
2650         if (!read_lambda_components(ptr, lc, &val_end, fn))
2651         {
2652             gmx_fatal(FARGS,
2653                       "lambda vector components in %s don't match those previously read",
2654                       fn);
2655         }
2656         ptr = find_value(val_end);
2657         if (!ptr)
2658         {
2659             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2660             return FALSE;
2661         }
2662         lambda_vec_init(&(ba->native_lambda), lc);
2663         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2664         {
2665             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2666         }
2667         ba->native_lambda.index = index;
2668         bFound                  = TRUE;
2669     }
2670     else
2671     {
2672         /* compatibility mode: check for lambda in other ways. */
2673         /* plain text lambda string */
2674         ptr = strstr(subtitle, "lambda");
2675         if (ptr == NULL)
2676         {
2677             /* xmgrace formatted lambda string */
2678             ptr = strstr(subtitle, "\\xl\\f{}");
2679         }
2680         if (ptr == NULL)
2681         {
2682             /* xmgr formatted lambda string */
2683             ptr = strstr(subtitle, "\\8l\\4");
2684         }
2685         if (ptr != NULL)
2686         {
2687             ptr = strstr(ptr, "=");
2688         }
2689         if (ptr != NULL)
2690         {
2691             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2692             /* add the lambda component name as an empty string */
2693             if (lc->N > 0)
2694             {
2695                 if (!lambda_components_check(lc, 0, "", 0))
2696                 {
2697                     gmx_fatal(FARGS,
2698                               "lambda vector components in %s don't match those previously read",
2699                               fn);
2700                 }
2701             }
2702             else
2703             {
2704                 lambda_components_add(lc, "", 0);
2705             }
2706             lambda_vec_init(&(ba->native_lambda), lc);
2707             ba->native_lambda.val[0] = native_lambda;
2708         }
2709     }
2710
2711     return bFound;
2712 }
2713
2714 static void filename2lambda(const char *fn)
2715 {
2716     double        lambda;
2717     const char   *ptr, *digitptr;
2718     char         *endptr;
2719     int           dirsep;
2720     ptr = fn;
2721     /* go to the end of the path string and search backward to find the last
2722        directory in the path which has to contain the value of lambda
2723      */
2724     while (ptr[1] != '\0')
2725     {
2726         ptr++;
2727     }
2728     /* searching backward to find the second directory separator */
2729     dirsep   = 0;
2730     digitptr = NULL;
2731     while (ptr >= fn)
2732     {
2733         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2734         {
2735             if (dirsep == 1)
2736             {
2737                 break;
2738             }
2739             dirsep++;
2740         }
2741         /* save the last position of a digit between the last two
2742            separators = in the last dirname */
2743         if (dirsep > 0 && isdigit(*ptr))
2744         {
2745             digitptr = ptr;
2746         }
2747         ptr--;
2748     }
2749     if (!digitptr)
2750     {
2751         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2752                   " last directory in the path '%s' does not contain a number", fn);
2753     }
2754     if (digitptr[-1] == '-')
2755     {
2756         digitptr--;
2757     }
2758     lambda = strtod(digitptr, &endptr);
2759     if (endptr == digitptr)
2760     {
2761         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2762     }
2763 }
2764
2765 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2766                                   lambda_components_t *lc)
2767 {
2768     int          i;
2769     char        *subtitle, **legend, *ptr;
2770     int          np;
2771     gmx_bool     native_lambda_read = FALSE;
2772     char         buf[STRLEN];
2773     lambda_vec_t lv;
2774
2775     xvg_init(ba);
2776
2777     ba->filename = fn;
2778
2779     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2780     if (!ba->y)
2781     {
2782         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2783     }
2784     /* Reorder the data */
2785     ba->t  = ba->y[0];
2786     for (i = 1; i < ba->nset; i++)
2787     {
2788         ba->y[i-1] = ba->y[i];
2789     }
2790     ba->nset--;
2791
2792     snew(ba->np, ba->nset);
2793     for (i = 0; i < ba->nset; i++)
2794     {
2795         ba->np[i] = np;
2796     }
2797
2798     ba->temp = -1;
2799     if (subtitle != NULL)
2800     {
2801         /* try to extract temperature */
2802         ptr = strstr(subtitle, "T =");
2803         if (ptr != NULL)
2804         {
2805             ptr += 3;
2806             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2807             {
2808                 if (ba->temp <= 0)
2809                 {
2810                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2811                               ba->temp, fn);
2812                 }
2813             }
2814         }
2815     }
2816     if (ba->temp < 0)
2817     {
2818         if (*temp <= 0)
2819         {
2820             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2821         }
2822         ba->temp = *temp;
2823     }
2824
2825     /* Try to deduce lambda from the subtitle */
2826     if (subtitle)
2827     {
2828         if (subtitle2lambda(subtitle, ba, fn, lc))
2829         {
2830             native_lambda_read = TRUE;
2831         }
2832     }
2833     snew(ba->lambda, ba->nset);
2834     if (legend == NULL)
2835     {
2836         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2837         if (ba->nset == 1)
2838         {
2839             if (!native_lambda_read)
2840             {
2841                 /* Deduce lambda from the file name */
2842                 filename2lambda(fn);
2843                 native_lambda_read = TRUE;
2844             }
2845             ba->lambda[0] = ba->native_lambda;
2846         }
2847         else
2848         {
2849             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2850         }
2851     }
2852     else
2853     {
2854         for (i = 0; i < ba->nset; )
2855         {
2856             gmx_bool use = FALSE;
2857             /* Read lambda from the legend */
2858             lambda_vec_init( &(ba->lambda[i]), lc );
2859             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2860             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2861             if (use)
2862             {
2863                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2864                 i++;
2865             }
2866             else
2867             {
2868                 int j;
2869                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2870                 for (j = i+1; j < ba->nset; j++)
2871                 {
2872                     ba->y[j-1]  = ba->y[j];
2873                     legend[j-1] = legend[j];
2874                 }
2875                 ba->nset--;
2876             }
2877         }
2878     }
2879
2880     if (!native_lambda_read)
2881     {
2882         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2883     }
2884
2885     if (legend != NULL)
2886     {
2887         for (i = 0; i < ba->nset-1; i++)
2888         {
2889             sfree(legend[i]);
2890         }
2891         sfree(legend);
2892     }
2893 }
2894
2895 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2896 {
2897     xvg_t     *barsim;
2898     samples_t *s;
2899     int        i;
2900     double    *lambda;
2901
2902     snew(barsim, 1);
2903
2904     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2905
2906     if (barsim->nset < 1)
2907     {
2908         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2909     }
2910
2911     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2912     {
2913         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2914     }
2915     *temp = barsim->temp;
2916
2917     /* now create a series of samples_t */
2918     snew(s, barsim->nset);
2919     for (i = 0; i < barsim->nset; i++)
2920     {
2921         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2922                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2923                                                    &(barsim->lambda[i])),
2924                      fn);
2925         s[i].du  = barsim->y[i];
2926         s[i].ndu = barsim->np[i];
2927         s[i].t   = barsim->t;
2928
2929         lambda_data_list_insert_sample(sd->lb, s+i);
2930     }
2931     {
2932         char buf[STRLEN];
2933
2934         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2935         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2936                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2937         for (i = 0; i < barsim->nset; i++)
2938         {
2939             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2940             printf("        %s (%d pts)\n", buf, s[i].ndu);
2941         }
2942     }
2943     printf("\n\n");
2944 }
2945
2946 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2947                                  double start_time, double delta_time,
2948                                  lambda_vec_t *native_lambda, double temp,
2949                                  double *last_t, const char *filename)
2950 {
2951     int           i, j;
2952     gmx_bool      allocated;
2953     double        old_foreign_lambda;
2954     lambda_vec_t *foreign_lambda;
2955     int           type;
2956     samples_t    *s; /* convenience pointer */
2957     int           startj;
2958
2959     /* check the block types etc. */
2960     if ( (blk->nsub < 3) ||
2961          (blk->sub[0].type != xdr_datatype_int) ||
2962          (blk->sub[1].type != xdr_datatype_double) ||
2963          (
2964              (blk->sub[2].type != xdr_datatype_float) &&
2965              (blk->sub[2].type != xdr_datatype_double)
2966          ) ||
2967          (blk->sub[0].nr < 1) ||
2968          (blk->sub[1].nr < 1) )
2969     {
2970         gmx_fatal(FARGS,
2971                   "Unexpected/corrupted block data in file %s around time %f.",
2972                   filename, start_time);
2973     }
2974
2975     snew(foreign_lambda, 1);
2976     lambda_vec_init(foreign_lambda, native_lambda->lc);
2977     lambda_vec_copy(foreign_lambda, native_lambda);
2978     type = blk->sub[0].ival[0];
2979     if (type == dhbtDH)
2980     {
2981         for (i = 0; i < native_lambda->lc->N; i++)
2982         {
2983             foreign_lambda->val[i] = blk->sub[1].dval[i];
2984         }
2985     }
2986     else
2987     {
2988         if (blk->sub[0].nr > 1)
2989         {
2990             foreign_lambda->dhdl = blk->sub[0].ival[1];
2991         }
2992         else
2993         {
2994             foreign_lambda->dhdl = 0;
2995         }
2996     }
2997
2998     if (!*smp)
2999     {
3000         /* initialize the samples structure if it's empty. */
3001         snew(*smp, 1);
3002         samples_init(*smp, native_lambda, foreign_lambda, temp,
3003                      type == dhbtDHDL, filename);
3004         (*smp)->start_time = start_time;
3005         (*smp)->delta_time = delta_time;
3006     }
3007
3008     /* set convenience pointer */
3009     s = *smp;
3010
3011     /* now double check */
3012     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3013     {
3014         char buf[STRLEN], buf2[STRLEN];
3015         lambda_vec_print(foreign_lambda, buf, FALSE);
3016         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3017         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3018         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3019                   filename, start_time);
3020     }
3021
3022     /* make room for the data */
3023     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3024     {
3025         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3026             blk->sub[2].nr*2 : s->ndu_alloc;
3027         srenew(s->du_alloc, s->ndu_alloc);
3028         s->du = s->du_alloc;
3029     }
3030     startj   = s->ndu;
3031     s->ndu  += blk->sub[2].nr;
3032     s->ntot += blk->sub[2].nr;
3033     *ndu     = blk->sub[2].nr;
3034
3035     /* and copy the data*/
3036     for (j = 0; j < blk->sub[2].nr; j++)
3037     {
3038         if (blk->sub[2].type == xdr_datatype_float)
3039         {
3040             s->du[startj+j] = blk->sub[2].fval[j];
3041         }
3042         else
3043         {
3044             s->du[startj+j] = blk->sub[2].dval[j];
3045         }
3046     }
3047     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3048     {
3049         *last_t = start_time + blk->sub[2].nr*delta_time;
3050     }
3051 }
3052
3053 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3054                                       double start_time, double delta_time,
3055                                       lambda_vec_t *native_lambda, double temp,
3056                                       double *last_t, const char *filename)
3057 {
3058     int           i, j;
3059     samples_t    *s;
3060     int           nhist;
3061     double        old_foreign_lambda;
3062     lambda_vec_t *foreign_lambda;
3063     int           type;
3064     int           nbins[2];
3065
3066     /* check the block types etc. */
3067     if ( (blk->nsub < 2) ||
3068          (blk->sub[0].type != xdr_datatype_double) ||
3069          (blk->sub[1].type != xdr_datatype_int64) ||
3070          (blk->sub[0].nr < 2)  ||
3071          (blk->sub[1].nr < 2) )
3072     {
3073         gmx_fatal(FARGS,
3074                   "Unexpected/corrupted block data in file %s around time %f",
3075                   filename, start_time);
3076     }
3077
3078     nhist = blk->nsub-2;
3079     if (nhist == 0)
3080     {
3081         return NULL;
3082     }
3083     if (nhist > 2)
3084     {
3085         gmx_fatal(FARGS,
3086                   "Unexpected/corrupted block data in file %s around time %f",
3087                   filename, start_time);
3088     }
3089
3090     snew(s, 1);
3091     *nsamples = 1;
3092
3093     snew(foreign_lambda, 1);
3094     lambda_vec_init(foreign_lambda, native_lambda->lc);
3095     lambda_vec_copy(foreign_lambda, native_lambda);
3096     type = (int)(blk->sub[1].lval[1]);
3097     if (type == dhbtDH)
3098     {
3099         double old_foreign_lambda;
3100
3101         old_foreign_lambda = blk->sub[0].dval[0];
3102         if (old_foreign_lambda >= 0)
3103         {
3104             foreign_lambda->val[0] = old_foreign_lambda;
3105             if (foreign_lambda->lc->N > 1)
3106             {
3107                 gmx_fatal(FARGS,
3108                           "Single-component lambda in multi-component file %s",
3109                           filename);
3110             }
3111         }
3112         else
3113         {
3114             for (i = 0; i < native_lambda->lc->N; i++)
3115             {
3116                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3117             }
3118         }
3119     }
3120     else
3121     {
3122         if (foreign_lambda->lc->N > 1)
3123         {
3124             if (blk->sub[1].nr < 3 + nhist)
3125             {
3126                 gmx_fatal(FARGS,
3127                           "Missing derivative coord in multi-component file %s",
3128                           filename);
3129             }
3130             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3131         }
3132         else
3133         {
3134             foreign_lambda->dhdl = 0;
3135         }
3136     }
3137
3138     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3139                  filename);
3140     snew(s->hist, 1);
3141
3142     for (i = 0; i < nhist; i++)
3143     {
3144         nbins[i] = blk->sub[i+2].nr;
3145     }
3146
3147     hist_init(s->hist, nhist, nbins);
3148
3149     for (i = 0; i < nhist; i++)
3150     {
3151         s->hist->x0[i] = blk->sub[1].lval[2+i];
3152         s->hist->dx[i] = blk->sub[0].dval[1];
3153         if (i == 1)
3154         {
3155             s->hist->dx[i] = -s->hist->dx[i];
3156         }
3157     }
3158
3159     s->hist->start_time = start_time;
3160     s->hist->delta_time = delta_time;
3161     s->start_time       = start_time;
3162     s->delta_time       = delta_time;
3163
3164     for (i = 0; i < nhist; i++)
3165     {
3166         int             nbin;
3167         gmx_int64_t     sum = 0;
3168
3169         for (j = 0; j < s->hist->nbin[i]; j++)
3170         {
3171             int binv = (int)(blk->sub[i+2].ival[j]);
3172
3173             s->hist->bin[i][j] = binv;
3174             sum               += binv;
3175
3176         }
3177         if (i == 0)
3178         {
3179             s->ntot      = sum;
3180             s->hist->sum = sum;
3181         }
3182         else
3183         {
3184             if (s->ntot != sum)
3185             {
3186                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3187                           filename);
3188             }
3189         }
3190     }
3191
3192     if (start_time + s->hist->sum*delta_time > *last_t)
3193     {
3194         *last_t = start_time + s->hist->sum*delta_time;
3195     }
3196     return s;
3197 }
3198
3199
3200 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3201 {
3202     int            i, j;
3203     ener_file_t    fp;
3204     t_enxframe    *fr;
3205     int            nre;
3206     gmx_enxnm_t   *enm           = NULL;
3207     double         first_t       = -1;
3208     double         last_t        = -1;
3209     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3210     int           *nhists        = NULL; /* array to keep count & print at end */
3211     int           *npts          = NULL; /* array to keep count & print at end */
3212     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3213     lambda_vec_t  *native_lambda;
3214     double         end_time;             /* the end time of the last batch of samples */
3215     int            nsamples = 0;
3216     lambda_vec_t   start_lambda;
3217
3218     fp = open_enx(fn, "r");
3219     do_enxnms(fp, &nre, &enm);
3220     snew(fr, 1);
3221
3222     snew(native_lambda, 1);
3223     start_lambda.lc = NULL;
3224
3225     while (do_enx(fp, fr))
3226     {
3227         /* count the data blocks */
3228         int    nblocks_raw  = 0;
3229         int    nblocks_hist = 0;
3230         int    nlam         = 0;
3231         int    k;
3232         /* DHCOLL block information: */
3233         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3234         double rtemp      = 0;
3235
3236         /* count the blocks and handle collection information: */
3237         for (i = 0; i < fr->nblock; i++)
3238         {
3239             if (fr->block[i].id == enxDHHIST)
3240             {
3241                 nblocks_hist++;
3242             }
3243             if (fr->block[i].id == enxDH)
3244             {
3245                 nblocks_raw++;
3246             }
3247             if (fr->block[i].id == enxDHCOLL)
3248             {
3249                 nlam++;
3250                 if ( (fr->block[i].nsub < 1) ||
3251                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3252                      (fr->block[i].sub[0].nr < 5))
3253                 {
3254                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3255                 }
3256
3257                 /* read the data from the DHCOLL block */
3258                 rtemp            =        fr->block[i].sub[0].dval[0];
3259                 start_time       =   fr->block[i].sub[0].dval[1];
3260                 delta_time       =   fr->block[i].sub[0].dval[2];
3261                 old_start_lambda = fr->block[i].sub[0].dval[3];
3262                 delta_lambda     = fr->block[i].sub[0].dval[4];
3263
3264                 if (delta_lambda > 0)
3265                 {
3266                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3267                 }
3268                 if ( ( *temp != rtemp) && (*temp > 0) )
3269                 {
3270                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3271                 }
3272                 *temp = rtemp;
3273
3274                 if (old_start_lambda >= 0)
3275                 {
3276                     if (sd->lc.N > 0)
3277                     {
3278                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3279                         {
3280                             gmx_fatal(FARGS,
3281                                       "lambda vector components in %s don't match those previously read",
3282                                       fn);
3283                         }
3284                     }
3285                     else
3286                     {
3287                         lambda_components_add(&(sd->lc), "", 0);
3288                     }
3289                     if (!start_lambda.lc)
3290                     {
3291                         lambda_vec_init(&start_lambda, &(sd->lc));
3292                     }
3293                     start_lambda.val[0] = old_start_lambda;
3294                 }
3295                 else
3296                 {
3297                     /* read lambda vector */
3298                     int      n_lambda_vec;
3299                     gmx_bool check = (sd->lc.N > 0);
3300                     if (fr->block[i].nsub < 2)
3301                     {
3302                         gmx_fatal(FARGS,
3303                                   "No lambda vector, but start_lambda=%f\n",
3304                                   old_start_lambda);
3305                     }
3306                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3307                     for (j = 0; j < n_lambda_vec; j++)
3308                     {
3309                         const char *name =
3310                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3311                         if (check)
3312                         {
3313                             /* check the components */
3314                             lambda_components_check(&(sd->lc), j, name,
3315                                                     strlen(name));
3316                         }
3317                         else
3318                         {
3319                             lambda_components_add(&(sd->lc), name,
3320                                                   strlen(name));
3321                         }
3322                     }
3323                     lambda_vec_init(&start_lambda, &(sd->lc));
3324                     start_lambda.index = fr->block[i].sub[1].ival[0];
3325                     for (j = 0; j < n_lambda_vec; j++)
3326                     {
3327                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3328                     }
3329                 }
3330                 if (first_t < 0)
3331                 {
3332                     first_t = start_time;
3333                 }
3334             }
3335         }
3336
3337         if (nlam != 1)
3338         {
3339             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3340         }
3341         if (nblocks_raw > 0 && nblocks_hist > 0)
3342         {
3343             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3344         }
3345
3346         if (nsamples > 0)
3347         {
3348             /* check the native lambda */
3349             if (!lambda_vec_same(&start_lambda, native_lambda) )
3350             {
3351                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3352                           fn, native_lambda, start_lambda, start_time);
3353             }
3354             /* check the number of samples against the previous number */
3355             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3356             {
3357                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3358                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3359             }
3360             /* check whether last iterations's end time matches with
3361                the currrent start time */
3362             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3363             {
3364                 /* it didn't. We need to store our samples and reallocate */
3365                 for (i = 0; i < nsamples; i++)
3366                 {
3367                     if (samples_rawdh[i])
3368                     {
3369                         /* insert it into the existing list */
3370                         lambda_data_list_insert_sample(sd->lb,
3371                                                        samples_rawdh[i]);
3372                         /* and make sure we'll allocate a new one this time
3373                            around */
3374                         samples_rawdh[i] = NULL;
3375                     }
3376                 }
3377             }
3378         }
3379         else
3380         {
3381             /* this is the first round; allocate the associated data
3382                structures */
3383             /*native_lambda=start_lambda;*/
3384             lambda_vec_init(native_lambda, &(sd->lc));
3385             lambda_vec_copy(native_lambda, &start_lambda);
3386             nsamples = nblocks_raw+nblocks_hist;
3387             snew(nhists, nsamples);
3388             snew(npts, nsamples);
3389             snew(lambdas, nsamples);
3390             snew(samples_rawdh, nsamples);
3391             for (i = 0; i < nsamples; i++)
3392             {
3393                 nhists[i]        = 0;
3394                 npts[i]          = 0;
3395                 lambdas[i]       = NULL;
3396                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3397                                             ones contain values */
3398             }
3399         }
3400
3401         /* and read them */
3402         k = 0; /* counter for the lambdas, etc. arrays */
3403         for (i = 0; i < fr->nblock; i++)
3404         {
3405             if (fr->block[i].id == enxDH)
3406             {
3407                 int type = (fr->block[i].sub[0].ival[0]);
3408                 if (type == dhbtDH || type == dhbtDHDL)
3409                 {
3410                     int ndu;
3411                     read_edr_rawdh_block(&(samples_rawdh[k]),
3412                                          &ndu,
3413                                          &(fr->block[i]),
3414                                          start_time, delta_time,
3415                                          native_lambda, rtemp,
3416                                          &last_t, fn);
3417                     npts[k] += ndu;
3418                     if (samples_rawdh[k])
3419                     {
3420                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3421                     }
3422                     k++;
3423                 }
3424             }
3425             else if (fr->block[i].id == enxDHHIST)
3426             {
3427                 int type = (int)(fr->block[i].sub[1].lval[1]);
3428                 if (type == dhbtDH || type == dhbtDHDL)
3429                 {
3430                     int        j;
3431                     int        nb = 0;
3432                     samples_t *s; /* this is where the data will go */
3433                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3434                                             start_time, delta_time,
3435                                             native_lambda, rtemp,
3436                                             &last_t, fn);
3437                     nhists[k] += nb;
3438                     if (nb > 0)
3439                     {
3440                         lambdas[k] = s->foreign_lambda;
3441                     }
3442                     k++;
3443                     /* and insert the new sample immediately */
3444                     for (j = 0; j < nb; j++)
3445                     {
3446                         lambda_data_list_insert_sample(sd->lb, s+j);
3447                     }
3448                 }
3449             }
3450         }
3451     }
3452     /* Now store all our extant sample collections */
3453     for (i = 0; i < nsamples; i++)
3454     {
3455         if (samples_rawdh[i])
3456         {
3457             /* insert it into the existing list */
3458             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3459         }
3460     }
3461
3462
3463     {
3464         char buf[STRLEN];
3465         printf("\n");
3466         lambda_vec_print(native_lambda, buf, FALSE);
3467         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3468                fn, first_t, last_t, buf);
3469         for (i = 0; i < nsamples; i++)
3470         {
3471             if (lambdas[i])
3472             {
3473                 lambda_vec_print(lambdas[i], buf, TRUE);
3474                 if (nhists[i] > 0)
3475                 {
3476                     printf("        %s (%d hists)\n", buf, nhists[i]);
3477                 }
3478                 else
3479                 {
3480                     printf("        %s (%d pts)\n", buf, npts[i]);
3481                 }
3482             }
3483         }
3484     }
3485     printf("\n\n");
3486     sfree(npts);
3487     sfree(nhists);
3488     sfree(lambdas);
3489 }
3490
3491
3492 int gmx_bar(int argc, char *argv[])
3493 {
3494     static const char *desc[] = {
3495         "[THISMODULE] calculates free energy difference estimates through ",
3496         "Bennett's acceptance ratio method (BAR). It also automatically",
3497         "adds series of individual free energies obtained with BAR into",
3498         "a combined free energy estimate.[PAR]",
3499
3500         "Every individual BAR free energy difference relies on two ",
3501         "simulations at different states: say state A and state B, as",
3502         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3503         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3504         "average of the Hamiltonian difference of state B given state A and",
3505         "vice versa.",
3506         "The energy differences to the other state must be calculated",
3507         "explicitly during the simulation. This can be done with",
3508         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3509
3510         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3511         "Two types of input files are supported:",
3512         "",
3513         " * Files with more than one [IT]y[it]-value. ",
3514         "   The files should have columns ",
3515         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3516         "   The [GRK]lambda[grk] values are inferred ",
3517         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3518         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3519         "   legends of Delta H",
3520         " * Files with only one [IT]y[it]-value. Using the",
3521         "   [TT]-extp[tt] option for these files, it is assumed",
3522         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3523         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3524         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3525         "   subtitle (if present), otherwise from a number in the subdirectory ",
3526         "   in the file name.",
3527         "",
3528
3529         "The [GRK]lambda[grk] of the simulation is parsed from ",
3530         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3531         "foreign [GRK]lambda[grk] values from the legend containing the ",
3532         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3533         "the legend line containing 'T ='.[PAR]",
3534
3535         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3536         "These can contain either lists of energy differences (see the ",
3537         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3538         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3539         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3540         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3541
3542         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3543         "the energy difference can also be extrapolated from the ",
3544         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3545         "option, which assumes that the system's Hamiltonian depends linearly",
3546         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3547
3548         "The free energy estimates are determined using BAR with bisection, ",
3549         "with the precision of the output set with [TT]-prec[tt]. ",
3550         "An error estimate taking into account time correlations ",
3551         "is made by splitting the data into blocks and determining ",
3552         "the free energy differences over those blocks and assuming ",
3553         "the blocks are independent. ",
3554         "The final error estimate is determined from the average variance ",
3555         "over 5 blocks. A range of block numbers for error estimation can ",
3556         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3557
3558         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3559         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3560         "samples. [BB]Note[bb] that when aggregating energy ",
3561         "differences/derivatives with different sampling intervals, this is ",
3562         "almost certainly not correct. Usually subsequent energies are ",
3563         "correlated and different time intervals mean different degrees ",
3564         "of correlation between samples.[PAR]",
3565
3566         "The results are split in two parts: the last part contains the final ",
3567         "results in kJ/mol, together with the error estimate for each part ",
3568         "and the total. The first part contains detailed free energy ",
3569         "difference estimates and phase space overlap measures in units of ",
3570         "kT (together with their computed error estimate). The printed ",
3571         "values are:",
3572         "",
3573         " * lam_A: the [GRK]lambda[grk] values for point A.",
3574         " * lam_B: the [GRK]lambda[grk] values for point B.",
3575         " *    DG: the free energy estimate.",
3576         " *   s_A: an estimate of the relative entropy of B in A.",
3577         " *   s_B: an estimate of the relative entropy of A in B.",
3578         " * stdev: an estimate expected per-sample standard deviation.",
3579         "",
3580
3581         "The relative entropy of both states in each other's ensemble can be ",
3582         "interpreted as a measure of phase space overlap: ",
3583         "the relative entropy s_A of the work samples of lambda_B in the ",
3584         "ensemble of lambda_A (and vice versa for s_B), is a ",
3585         "measure of the 'distance' between Boltzmann distributions of ",
3586         "the two states, that goes to zero for identical distributions. See ",
3587         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3588         "[PAR]",
3589         "The estimate of the expected per-sample standard deviation, as given ",
3590         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3591         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3592         "of the actual statistical error, because it assumes independent samples).[PAR]",
3593
3594         "To get a visual estimate of the phase space overlap, use the ",
3595         "[TT]-oh[tt] option to write series of histograms, together with the ",
3596         "[TT]-nbin[tt] option.[PAR]"
3597     };
3598     static real        begin    = 0, end = -1, temp = -1;
3599     int                nd       = 2, nbmin = 5, nbmax = 5;
3600     int                nbin     = 100;
3601     gmx_bool           use_dhdl = FALSE;
3602     gmx_bool           calc_s, calc_v;
3603     t_pargs            pa[] = {
3604         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3605         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3606         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3607         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3608         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3609         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3610         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3611         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3612     };
3613
3614     t_filenm           fnm[] = {
3615         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3616         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3617         { efXVG, "-o",  "bar",    ffOPTWR },
3618         { efXVG, "-oi", "barint", ffOPTWR },
3619         { efXVG, "-oh", "histogram", ffOPTWR }
3620     };
3621 #define NFILE asize(fnm)
3622
3623     int          f, i, j;
3624     int          nf = 0;    /* file counter */
3625     int          nbs;
3626     int          nfile_tot; /* total number of input files */
3627     int          nxvgfile = 0;
3628     int          nedrfile = 0;
3629     char       **fxvgnms;
3630     char       **fedrnms;
3631     sim_data_t   sim_data; /* the simulation data */
3632     barres_t    *results;  /* the results */
3633     int          nresults; /* number of results in results array */
3634
3635     double      *partsum;
3636     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3637     FILE        *fpb, *fpi;
3638     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3639     char         buf[STRLEN], buf2[STRLEN];
3640     char         ktformat[STRLEN], sktformat[STRLEN];
3641     char         kteformat[STRLEN], skteformat[STRLEN];
3642     output_env_t oenv;
3643     double       kT, beta;
3644     gmx_bool     result_OK = TRUE, bEE = TRUE;
3645
3646     gmx_bool     disc_err          = FALSE;
3647     double       sum_disc_err      = 0.; /* discretization error */
3648     gmx_bool     histrange_err     = FALSE;
3649     double       sum_histrange_err = 0.; /* histogram range error */
3650     double       stat_err          = 0.; /* statistical error */
3651
3652     if (!parse_common_args(&argc, argv,
3653                            PCA_CAN_VIEW,
3654                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3655     {
3656         return 0;
3657     }
3658
3659     if (opt2bSet("-f", NFILE, fnm))
3660     {
3661         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3662     }
3663     if (opt2bSet("-g", NFILE, fnm))
3664     {
3665         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3666     }
3667
3668     sim_data_init(&sim_data);
3669 #if 0
3670     /* make linked list */
3671     lb = &lambda_head;
3672     lambda_data_init(lb, 0, 0);
3673     lb->next = lb;
3674     lb->prev = lb;
3675 #endif
3676
3677
3678     nfile_tot = nxvgfile + nedrfile;
3679
3680     if (nfile_tot == 0)
3681     {
3682         gmx_fatal(FARGS, "No input files!");
3683     }
3684
3685     if (nd < 0)
3686     {
3687         gmx_fatal(FARGS, "Can not have negative number of digits");
3688     }
3689     prec = pow(10, -nd);
3690
3691     snew(partsum, (nbmax+1)*(nbmax+1));
3692     nf = 0;
3693
3694     /* read in all files. First xvg files */
3695     for (f = 0; f < nxvgfile; f++)
3696     {
3697         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3698         nf++;
3699     }
3700     /* then .edr files */
3701     for (f = 0; f < nedrfile; f++)
3702     {
3703         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3704         nf++;
3705     }
3706
3707     /* fix the times to allow for equilibration */
3708     sim_data_impose_times(&sim_data, begin, end);
3709
3710     if (opt2bSet("-oh", NFILE, fnm))
3711     {
3712         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3713     }
3714
3715     /* assemble the output structures from the lambdas */
3716     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3717
3718     sum_disc_err = barres_list_max_disc_err(results, nresults);
3719
3720     if (nresults == 0)
3721     {
3722         printf("\nNo results to calculate.\n");
3723         return 0;
3724     }
3725
3726     if (sum_disc_err > prec)
3727     {
3728         prec = sum_disc_err;
3729         nd   = ceil(-log10(prec));
3730         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3731     }
3732
3733
3734     /*sprintf(lamformat,"%%6.3f");*/
3735     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3736     /* the format strings of the results in kT */
3737     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3738     sprintf( sktformat, "%%%ds", 6+nd);
3739     /* the format strings of the errors in kT */
3740     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3741     sprintf( skteformat, "%%%ds", 4+nd);
3742     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3743     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3744
3745
3746
3747     fpb = NULL;
3748     if (opt2bSet("-o", NFILE, fnm))
3749     {
3750         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3751         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3752                             "\\lambda", buf, exvggtXYDY, oenv);
3753     }
3754
3755     fpi = NULL;
3756     if (opt2bSet("-oi", NFILE, fnm))
3757     {
3758         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3759         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3760                        "\\lambda", buf, oenv);
3761     }
3762
3763
3764
3765     if (nbmin > nbmax)
3766     {
3767         nbmin = nbmax;
3768     }
3769
3770     /* first calculate results */
3771     bEE      = TRUE;
3772     disc_err = FALSE;
3773     for (f = 0; f < nresults; f++)
3774     {
3775         /* Determine the free energy difference with a factor of 10
3776          * more accuracy than requested for printing.
3777          */
3778         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3779                  &bEE, partsum);
3780
3781         if (results[f].dg_disc_err > prec/10.)
3782         {
3783             disc_err = TRUE;
3784         }
3785         if (results[f].dg_histrange_err > prec/10.)
3786         {
3787             histrange_err = TRUE;
3788         }
3789     }
3790
3791     /* print results in kT */
3792     kT   = BOLTZ*temp;
3793     beta = 1/kT;
3794
3795     printf("\nTemperature: %g K\n", temp);
3796
3797     printf("\nDetailed results in kT (see help for explanation):\n\n");
3798     printf("%6s ", " lam_A");
3799     printf("%6s ", " lam_B");
3800     printf(sktformat,  "DG ");
3801     if (bEE)
3802     {
3803         printf(skteformat, "+/- ");
3804     }
3805     if (disc_err)
3806     {
3807         printf(skteformat, "disc ");
3808     }
3809     if (histrange_err)
3810     {
3811         printf(skteformat, "range ");
3812     }
3813     printf(sktformat,  "s_A ");
3814     if (bEE)
3815     {
3816         printf(skteformat, "+/- " );
3817     }
3818     printf(sktformat,  "s_B ");
3819     if (bEE)
3820     {
3821         printf(skteformat, "+/- " );
3822     }
3823     printf(sktformat,  "stdev ");
3824     if (bEE)
3825     {
3826         printf(skteformat, "+/- ");
3827     }
3828     printf("\n");
3829     for (f = 0; f < nresults; f++)
3830     {
3831         lambda_vec_print_short(results[f].a->native_lambda, buf);
3832         printf("%s ", buf);
3833         lambda_vec_print_short(results[f].b->native_lambda, buf);
3834         printf("%s ", buf);
3835         printf(ktformat,  results[f].dg);
3836         printf(" ");
3837         if (bEE)
3838         {
3839             printf(kteformat, results[f].dg_err);
3840             printf(" ");
3841         }
3842         if (disc_err)
3843         {
3844             printf(kteformat, results[f].dg_disc_err);
3845             printf(" ");
3846         }
3847         if (histrange_err)
3848         {
3849             printf(kteformat, results[f].dg_histrange_err);
3850             printf(" ");
3851         }
3852         printf(ktformat,  results[f].sa);
3853         printf(" ");
3854         if (bEE)
3855         {
3856             printf(kteformat, results[f].sa_err);
3857             printf(" ");
3858         }
3859         printf(ktformat,  results[f].sb);
3860         printf(" ");
3861         if (bEE)
3862         {
3863             printf(kteformat, results[f].sb_err);
3864             printf(" ");
3865         }
3866         printf(ktformat,  results[f].dg_stddev);
3867         printf(" ");
3868         if (bEE)
3869         {
3870             printf(kteformat, results[f].dg_stddev_err);
3871         }
3872         printf("\n");
3873
3874         /* Check for negative relative entropy with a 95% certainty. */
3875         if (results[f].sa < -2*results[f].sa_err ||
3876             results[f].sb < -2*results[f].sb_err)
3877         {
3878             result_OK = FALSE;
3879         }
3880     }
3881
3882     if (!result_OK)
3883     {
3884         printf("\nWARNING: Some of these results violate the Second Law of "
3885                "Thermodynamics: \n"
3886                "         This is can be the result of severe undersampling, or "
3887                "(more likely)\n"
3888                "         there is something wrong with the simulations.\n");
3889     }
3890
3891
3892     /* final results in kJ/mol */
3893     printf("\n\nFinal results in kJ/mol:\n\n");
3894     dg_tot  = 0;
3895     for (f = 0; f < nresults; f++)
3896     {
3897
3898         if (fpi != NULL)
3899         {
3900             lambda_vec_print_short(results[f].a->native_lambda, buf);
3901             fprintf(fpi, xvg2format, buf, dg_tot);
3902         }
3903
3904
3905         if (fpb != NULL)
3906         {
3907             lambda_vec_print_intermediate(results[f].a->native_lambda,
3908                                           results[f].b->native_lambda,
3909                                           buf);
3910
3911             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3912         }
3913
3914         printf("point ");
3915         lambda_vec_print_short(results[f].a->native_lambda, buf);
3916         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3917         printf("%s - %s", buf, buf2);
3918         printf(",   DG ");
3919
3920         printf(dgformat, results[f].dg*kT);
3921         if (bEE)
3922         {
3923             printf(" +/- ");
3924             printf(dgformat, results[f].dg_err*kT);
3925         }
3926         if (histrange_err)
3927         {
3928             printf(" (max. range err. = ");
3929             printf(dgformat, results[f].dg_histrange_err*kT);
3930             printf(")");
3931             sum_histrange_err += results[f].dg_histrange_err*kT;
3932         }
3933
3934         printf("\n");
3935         dg_tot += results[f].dg;
3936     }
3937     printf("\n");
3938     printf("total ");
3939     lambda_vec_print_short(results[0].a->native_lambda, buf);
3940     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3941     printf("%s - %s", buf, buf2);
3942     printf(",   DG ");
3943
3944     printf(dgformat, dg_tot*kT);
3945     if (bEE)
3946     {
3947         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3948         printf(" +/- ");
3949         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3950     }
3951     printf("\n");
3952     if (disc_err)
3953     {
3954         printf("\nmaximum discretization error = ");
3955         printf(dgformat, sum_disc_err);
3956         if (bEE && stat_err < sum_disc_err)
3957         {
3958             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3959         }
3960     }
3961     if (histrange_err)
3962     {
3963         printf("\nmaximum histogram range error = ");
3964         printf(dgformat, sum_histrange_err);
3965         if (bEE && stat_err < sum_histrange_err)
3966         {
3967             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3968         }
3969
3970     }
3971     printf("\n");
3972
3973
3974     if (fpi != NULL)
3975     {
3976         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3977         fprintf(fpi, xvg2format, buf, dg_tot);
3978         xvgrclose(fpi);
3979     }
3980     if (fpb != NULL)
3981     {
3982         xvgrclose(fpb);
3983     }
3984
3985     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3986     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3987
3988     return 0;
3989 }