src/tools/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41 #include <math.h>
  42 #include <string.h>
  43 #include <ctype.h>
  44 #include <math.h>
  45 #include <float.h>
  46
  47 #include "sysstuff.h"
  48 #include "typedefs.h"
  49 #include "smalloc.h"
  50 #include "futil.h"
  51 #include "statutil.h"
  52 #include "copyrite.h"
  53 #include "macros.h"
  54 #include "enxio.h"
  55 #include "physics.h"
  56 #include "gmx_fatal.h"
  57 #include "xvgr.h"
  58 #include "gmx_ana.h"
  59 #include "maths.h"
  60 #include "names.h"
  61 #include "mdebin.h"
  62
  63 /* Suppress Cygwin compiler warnings from using newlib version of
  64  * ctype.h */
  65 #ifdef GMX_CYGWIN
  66 #undef isdigit
  67 #endif
  68
  69
  70 /* Structure for the names of lambda vector components */
  71 typedef struct lambda_components_t
  72 {
  73     char **names;  /* Array of strings with names for the lambda vector
  74                       components */
  75     int    N;      /* The number of components */
  76     int    Nalloc; /* The number of allocated components */
  77 } lambda_components_t;
  78
  79 /* Structure for a lambda vector or a dhdl derivative direction */
  80 typedef struct lambda_vec_t
  81 {
  82     double                    *val;   /* The lambda vector component values. Only valid if
  83                                          dhdl == -1 */
  84     int                        dhdl;  /* The coordinate index for the derivative described by this
  85                                          structure, or -1 */
  86     const lambda_components_t *lc;    /* the associated lambda_components
  87                                          structure */
  88     int                        index; /* The state number (init-lambda-state) of this lambda
  89                                          vector, if known. If not, it is set to -1 */
  90 } lambda_vec_t;
  91
  92 /* the dhdl.xvg data from a simulation */
  93 typedef struct xvg_t
  94 {
  95     const char   *filename;
  96     int           ftp;           /* file type */
  97     int           nset;          /* number of lambdas, including dhdl */
  98     int          *np;            /* number of data points (du or hists) per lambda */
  99     int           np_alloc;      /* number of points (du or hists) allocated */
 100     double        temp;          /* temperature */
 101     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
 102     double       *t;             /* the times (of second index for y) */
 103     double      **y;             /* the dU values. y[0] holds the derivative, while
 104                                     further ones contain the energy differences between
 105                                     the native lambda and the 'foreign' lambdas. */
 106     lambda_vec_t  native_lambda; /* the native lambda */
 107
 108     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 109 } xvg_t;
 110
 111
 112 typedef struct hist_t
 113 {
 114     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 115     double          dx[2];                  /* the histogram spacing. The reverse
 116                                                dx is the negative of the forward dx.*/
 117     gmx_large_int_t x0[2];                  /* the (forward + reverse) histogram start
 118                                                point(s) as int */
 119
 120     int             nbin[2];                /* the (forward+reverse) number of bins */
 121     gmx_large_int_t sum;                    /* the total number of counts. Must be
 122                                                the same for forward + reverse.  */
 123     int             nhist;                  /* number of hist datas (forward or reverse) */
 124
 125     double          start_time, delta_time; /* start time, end time of histogram */
 126 } hist_t;
 127
 128
 129 /* an aggregate of samples for partial free energy calculation */
 130 typedef struct samples_t
 131 {
 132     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 133     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 134     double        temp;           /* the temperature */
 135     gmx_bool      derivative;     /* whether this sample is a derivative */
 136
 137     /* The samples come either as either delta U lists: */
 138     int     ndu;                    /* the number of delta U samples */
 139     double *du;                     /* the delta u's */
 140     double *t;                      /* the times associated with those samples, or: */
 141     double  start_time, delta_time; /*start time and delta time for linear time*/
 142
 143     /* or as histograms: */
 144     hist_t *hist; /* a histogram */
 145
 146     /* allocation data: (not NULL for data 'owned' by this struct) */
 147     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 148     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 149     hist_t         *hist_alloc;          /* allocated hist */
 150
 151     gmx_large_int_t ntot;                /* total number of samples */
 152     const char     *filename;            /* the file name this sample comes from */
 153 } samples_t;
 154
 155 /* a sample range (start to end for du-style data, or boolean
 156     for both du-style data and histograms */
 157 typedef struct sample_range_t
 158 {
 159     int        start, end; /* start and end index for du style data */
 160     gmx_bool   use;        /* whether to use this sample */
 161
 162     samples_t *s;          /* the samples this range belongs to */
 163 } sample_range_t;
 164
 165
 166 /* a collection of samples for a partial free energy calculation
 167     (i.e. the collection of samples from one native lambda to one
 168     foreign lambda) */
 169 typedef struct sample_coll_t
 170 {
 171     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 172                                           in the histogram */
 173     lambda_vec_t   *foreign_lambda;    /* collection */
 174     double          temp;              /* the temperature */
 175
 176     int             nsamples;          /* the number of samples */
 177     samples_t     **s;                 /* the samples themselves */
 178     sample_range_t *r;                 /* the sample ranges */
 179     int             nsamples_alloc;    /* number of allocated samples */
 180
 181     gmx_large_int_t ntot;              /* total number of samples in the ranges of
 182                                           this collection */
 183
 184     struct sample_coll_t *next, *prev; /* next and previous in the list */
 185 } sample_coll_t;
 186
 187 /* all the samples associated with a lambda point */
 188 typedef struct lambda_data_t
 189 {
 190     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 191     double                temp;        /* temperature */
 192
 193     sample_coll_t        *sc;          /* the samples */
 194
 195     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 196
 197     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 198 } lambda_data_t;
 199
 200 /* Top-level data structure of simulation data */
 201 typedef struct sim_data_t
 202 {
 203     lambda_data_t      *lb;      /* a lambda data linked list */
 204     lambda_data_t       lb_head; /* The head element of the linked list */
 205
 206     lambda_components_t lc;      /* the allowed components of the lambda
 207                                     vectors */
 208 } sim_data_t;
 209
 210 /* Top-level data structure with calculated values. */
 211 typedef struct {
 212     sample_coll_t *a, *b;            /* the simulation data */
 213
 214     double         dg;               /* the free energy difference */
 215     double         dg_err;           /* the free energy difference */
 216
 217     double         dg_disc_err;      /* discretization error */
 218     double         dg_histrange_err; /* histogram range error */
 219
 220     double         sa;               /* relative entropy of b in state a */
 221     double         sa_err;           /* error in sa */
 222     double         sb;               /* relative entropy of a in state b */
 223     double         sb_err;           /* error in sb */
 224
 225     double         dg_stddev;        /* expected dg stddev per sample */
 226     double         dg_stddev_err;    /* error in dg_stddev */
 227 } barres_t;
 228
 229
 230 /* Initialize a lambda_components structure */
 231 static void lambda_components_init(lambda_components_t *lc)
 232 {
 233     lc->N      = 0;
 234     lc->Nalloc = 2;
 235     snew(lc->names, lc->Nalloc);
 236 }
 237
 238 /* Add a component to a lambda_components structure */
 239 static void lambda_components_add(lambda_components_t *lc,
 240                                   const char *name, size_t name_length)
 241 {
 242     while (lc->N + 1 > lc->Nalloc)
 243     {
 244         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 245         srealloc( lc->names, lc->Nalloc );
 246     }
 247     snew(lc->names[lc->N], name_length+1);
 248     strncpy(lc->names[lc->N], name, name_length);
 249     lc->N++;
 250 }
 251
 252 /* check whether a component with index 'index' matches the given name, or
 253    is also NULL. Returns TRUE if this is the case.
 254    the string name does not need to end */
 255 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 256                                         int                        index,
 257                                         const char                *name,
 258                                         size_t                     name_length)
 259 {
 260     size_t len;
 261     if (index >= lc->N)
 262     {
 263         return FALSE;
 264     }
 265     if (name == NULL && lc->names[index] == NULL)
 266     {
 267         return TRUE;
 268     }
 269     if ((name == NULL) != (lc->names[index] == NULL))
 270     {
 271         return FALSE;
 272     }
 273     len = strlen(lc->names[index]);
 274     if (len != name_length)
 275     {
 276         return FALSE;
 277     }
 278     if (strncmp(lc->names[index], name, name_length) == 0)
 279     {
 280         return TRUE;
 281     }
 282     return FALSE;
 283 }
 284
 285 /* Find the index of a given lambda component name, or -1 if not found */
 286 static int lambda_components_find(const lambda_components_t *lc,
 287                                   const char                *name,
 288                                   size_t                     name_length)
 289 {
 290     int i;
 291
 292     for (i = 0; i < lc->N; i++)
 293     {
 294         if (strncmp(lc->names[i], name, name_length) == 0)
 295         {
 296             return i;
 297         }
 298     }
 299     return -1;
 300 }
 301
 302
 303
 304 /* initialize a lambda vector */
 305 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 306 {
 307     snew(lv->val, lc->N);
 308     lv->index = -1;
 309     lv->dhdl  = -1;
 310     lv->lc    = lc;
 311 }
 312
 313 static void lambda_vec_destroy(lambda_vec_t *lv)
 314 {
 315     sfree(lv->val);
 316 }
 317
 318 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 319 {
 320     int i;
 321
 322     lambda_vec_init(lv, orig->lc);
 323     lv->dhdl  = orig->dhdl;
 324     lv->index = orig->index;
 325     for (i = 0; i < lv->lc->N; i++)
 326     {
 327         lv->val[i] = orig->val[i];
 328     }
 329 }
 330
 331 /* write a lambda vec to a preallocated string */
 332 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 333 {
 334     int    i;
 335     size_t np;
 336
 337     str[0] = 0; /* reset the string */
 338     if (lv->dhdl < 0)
 339     {
 340         if (named)
 341         {
 342             str += sprintf(str, "delta H to ");
 343         }
 344         if (lv->lc->N > 1)
 345         {
 346             str += sprintf(str, "(");
 347         }
 348         for (i = 0; i < lv->lc->N; i++)
 349         {
 350             str += sprintf(str, "%g", lv->val[i]);
 351             if (i < lv->lc->N-1)
 352             {
 353                 str += sprintf(str, ", ");
 354             }
 355         }
 356         if (lv->lc->N > 1)
 357         {
 358             str += sprintf(str, ")");
 359         }
 360     }
 361     else
 362     {
 363         /* this lambda vector describes a derivative */
 364         str += sprintf(str, "dH/dl");
 365         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 366         {
 367             str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 368         }
 369     }
 370 }
 371
 372 /* write a shortened version of the lambda vec to a preallocated string */
 373 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 374 {
 375     int    i;
 376     size_t np;
 377
 378     if (lv->index >= 0)
 379     {
 380         sprintf(str, "%6d", lv->index);
 381     }
 382     else
 383     {
 384         if (lv->dhdl < 0)
 385         {
 386             sprintf(str, "%6.3f", lv->val[0]);
 387         }
 388         else
 389         {
 390             sprintf(str, "dH/dl[%d]", lv->dhdl);
 391         }
 392     }
 393 }
 394
 395 /* write an intermediate version of two lambda vecs to a preallocated string */
 396 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 397                                           const lambda_vec_t *b, char *str)
 398 {
 399     int    i;
 400     size_t np;
 401
 402     str[0] = 0;
 403     if ( (a->index >= 0) && (b->index >= 0) )
 404     {
 405         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 406     }
 407     else
 408     {
 409         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 410         {
 411             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 412         }
 413     }
 414 }
 415
 416
 417
 418 /* calculate the difference in lambda vectors: c = a-b.
 419    c must be initialized already, and a and b must describe non-derivative
 420    lambda points */
 421 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 422                             lambda_vec_t *c)
 423 {
 424     int i;
 425
 426     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 427     {
 428         gmx_fatal(FARGS,
 429                   "Trying to calculate the difference between derivatives instead of lambda points");
 430     }
 431     if ((a->lc != b->lc) || (a->lc != c->lc) )
 432     {
 433         gmx_fatal(FARGS,
 434                   "Trying to calculate the difference lambdas with differing basis set");
 435     }
 436     for (i = 0; i < a->lc->N; i++)
 437     {
 438         c->val[i] = a->val[i] - b->val[i];
 439     }
 440 }
 441
 442 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 443    a and b must describe non-derivative lambda points */
 444 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 445 {
 446     int    i;
 447     double ret = 0.;
 448
 449     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 450     {
 451         gmx_fatal(FARGS,
 452                   "Trying to calculate the difference between derivatives instead of lambda points");
 453     }
 454     if (a->lc != b->lc)
 455     {
 456         gmx_fatal(FARGS,
 457                   "Trying to calculate the difference lambdas with differing basis set");
 458     }
 459     for (i = 0; i < a->lc->N; i++)
 460     {
 461         double df = a->val[i] - b->val[i];
 462         ret += df*df;
 463     }
 464     return sqrt(ret);
 465 }
 466
 467
 468 /* check whether two lambda vectors are the same */
 469 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 470 {
 471     int i;
 472
 473     if (a->lc != b->lc)
 474     {
 475         return FALSE;
 476     }
 477     if (a->dhdl < 0)
 478     {
 479         for (i = 0; i < a->lc->N; i++)
 480         {
 481             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 482             {
 483                 return FALSE;
 484             }
 485         }
 486         return TRUE;
 487     }
 488     else
 489     {
 490         /* they're derivatives, so we check whether the indices match */
 491         return (a->dhdl == b->dhdl);
 492     }
 493 }
 494
 495 /* Compare the sort order of two foreign lambda vectors
 496
 497     returns 1 if a is 'bigger' than b,
 498     returns 0 if they're the same,
 499     returns -1 if a is 'smaller' than b.*/
 500 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 501                                        const lambda_vec_t *b)
 502 {
 503     int      i;
 504     double   norm_a    = 0, norm_b = 0;
 505     gmx_bool different = FALSE;
 506
 507     if (a->lc != b->lc)
 508     {
 509         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 510     }
 511     /* if either one has an index we sort based on that */
 512     if ((a->index >= 0) || (b->index >= 0))
 513     {
 514         if (a->index == b->index)
 515         {
 516             return 0;
 517         }
 518         return (a->index > b->index) ? 1 : -1;
 519     }
 520     if (a->dhdl >= 0 || b->dhdl >= 0)
 521     {
 522         /* lambda vectors that are derivatives always sort higher than those
 523            without derivatives */
 524         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 525         {
 526             return (a->dhdl >= 0) ? 1 : -1;
 527         }
 528         return a->dhdl > b->dhdl;
 529     }
 530
 531     /* neither has an index, so we can only sort on the lambda components,
 532        which is only valid if there is one component */
 533     for (i = 0; i < a->lc->N; i++)
 534     {
 535         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 536         {
 537             different = TRUE;
 538         }
 539         norm_a += a->val[i]*a->val[i];
 540         norm_b += b->val[i]*b->val[i];
 541     }
 542     if (!different)
 543     {
 544         return 0;
 545     }
 546     return norm_a > norm_b;
 547 }
 548
 549 /* Compare the sort order of two native lambda vectors
 550
 551     returns 1 if a is 'bigger' than b,
 552     returns 0 if they're the same,
 553     returns -1 if a is 'smaller' than b.*/
 554 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 555                                       const lambda_vec_t *b)
 556 {
 557     int i;
 558
 559     if (a->lc != b->lc)
 560     {
 561         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 562     }
 563     /* if either one has an index we sort based on that */
 564     if ((a->index >= 0) || (b->index >= 0))
 565     {
 566         if (a->index == b->index)
 567         {
 568             return 0;
 569         }
 570         return (a->index > b->index) ? 1 : -1;
 571     }
 572     /* neither has an index, so we can only sort on the lambda components,
 573        which is only valid if there is one component */
 574     if (a->lc->N > 1)
 575     {
 576         gmx_fatal(FARGS,
 577                   "Can't compare lambdas with no index and > 1 component");
 578     }
 579     if (a->dhdl >= 0 || b->dhdl >= 0)
 580     {
 581         gmx_fatal(FARGS,
 582                   "Can't compare native lambdas that are derivatives");
 583     }
 584     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 585     {
 586         return 0;
 587     }
 588     return a->val[0] > b->val[0] ? 1 : -1;
 589 }
 590
 591
 592
 593
 594 static void hist_init(hist_t *h, int nhist, int *nbin)
 595 {
 596     int i;
 597     if (nhist > 2)
 598     {
 599         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 600     }
 601     for (i = 0; i < nhist; i++)
 602     {
 603         snew(h->bin[i], nbin[i]);
 604         h->x0[i]      = 0;
 605         h->nbin[i]    = nbin[i];
 606         h->start_time = h->delta_time = 0;
 607         h->dx[i]      = 0;
 608     }
 609     h->sum   = 0;
 610     h->nhist = nhist;
 611 }
 612
 613 static void hist_destroy(hist_t *h)
 614 {
 615     sfree(h->bin);
 616 }
 617
 618
 619 static void xvg_init(xvg_t *ba)
 620 {
 621     ba->filename = NULL;
 622     ba->nset     = 0;
 623     ba->np_alloc = 0;
 624     ba->np       = NULL;
 625     ba->y        = NULL;
 626 }
 627
 628 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 629                          lambda_vec_t *foreign_lambda, double temp,
 630                          gmx_bool derivative, const char *filename)
 631 {
 632     s->native_lambda  = native_lambda;
 633     s->foreign_lambda = foreign_lambda;
 634     s->temp           = temp;
 635     s->derivative     = derivative;
 636
 637     s->ndu        = 0;
 638     s->du         = NULL;
 639     s->t          = NULL;
 640     s->start_time = s->delta_time = 0;
 641     s->hist       = NULL;
 642     s->du_alloc   = NULL;
 643     s->t_alloc    = NULL;
 644     s->hist_alloc = NULL;
 645     s->ndu_alloc  = 0;
 646     s->nt_alloc   = 0;
 647
 648     s->ntot     = 0;
 649     s->filename = filename;
 650 }
 651
 652 static void sample_range_init(sample_range_t *r, samples_t *s)
 653 {
 654     r->start = 0;
 655     r->end   = s->ndu;
 656     r->use   = TRUE;
 657     r->s     = NULL;
 658 }
 659
 660 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 661                              lambda_vec_t *foreign_lambda, double temp)
 662 {
 663     sc->native_lambda  = native_lambda;
 664     sc->foreign_lambda = foreign_lambda;
 665     sc->temp           = temp;
 666
 667     sc->nsamples       = 0;
 668     sc->s              = NULL;
 669     sc->r              = NULL;
 670     sc->nsamples_alloc = 0;
 671
 672     sc->ntot = 0;
 673     sc->next = sc->prev = NULL;
 674 }
 675
 676 static void sample_coll_destroy(sample_coll_t *sc)
 677 {
 678     /* don't free the samples themselves */
 679     sfree(sc->r);
 680     sfree(sc->s);
 681 }
 682
 683
 684 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 685                              double temp)
 686 {
 687     l->lambda = native_lambda;
 688     l->temp   = temp;
 689
 690     l->next = NULL;
 691     l->prev = NULL;
 692
 693     l->sc = &(l->sc_head);
 694
 695     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 696     l->sc->next = l->sc;
 697     l->sc->prev = l->sc;
 698 }
 699
 700 static void barres_init(barres_t *br)
 701 {
 702     br->dg            = 0;
 703     br->dg_err        = 0;
 704     br->sa            = 0;
 705     br->sa_err        = 0;
 706     br->sb            = 0;
 707     br->sb_err        = 0;
 708     br->dg_stddev     = 0;
 709     br->dg_stddev_err = 0;
 710
 711     br->a = NULL;
 712     br->b = NULL;
 713 }
 714
 715
 716 /* calculate the total number of samples in a sample collection */
 717 static void sample_coll_calc_ntot(sample_coll_t *sc)
 718 {
 719     int i;
 720
 721     sc->ntot = 0;
 722     for (i = 0; i < sc->nsamples; i++)
 723     {
 724         if (sc->r[i].use)
 725         {
 726             if (sc->s[i]->hist)
 727             {
 728                 sc->ntot += sc->s[i]->ntot;
 729             }
 730             else
 731             {
 732                 sc->ntot += sc->r[i].end - sc->r[i].start;
 733             }
 734         }
 735     }
 736 }
 737
 738
 739 /* find the barsamples_t associated with a lambda that corresponds to
 740    a specific foreign lambda */
 741 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 742                                                    lambda_vec_t  *foreign_lambda)
 743 {
 744     sample_coll_t *sc = l->sc->next;
 745
 746     while (sc != l->sc)
 747     {
 748         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 749         {
 750             return sc;
 751         }
 752         sc = sc->next;
 753     }
 754
 755     return NULL;
 756 }
 757
 758 /* insert li into an ordered list of lambda_colls */
 759 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 760 {
 761     sample_coll_t *scn = l->sc->next;
 762     while ( (scn != l->sc) )
 763     {
 764         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 765         {
 766             break;
 767         }
 768         scn = scn->next;
 769     }
 770     /* now insert it before the found scn */
 771     sc->next        = scn;
 772     sc->prev        = scn->prev;
 773     scn->prev->next = sc;
 774     scn->prev       = sc;
 775 }
 776
 777 /* insert li into an ordered list of lambdas */
 778 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 779 {
 780     lambda_data_t *lc = head->next;
 781     while (lc != head)
 782     {
 783         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 784         {
 785             break;
 786         }
 787         lc = lc->next;
 788     }
 789     /* now insert ourselves before the found lc */
 790     li->next       = lc;
 791     li->prev       = lc->prev;
 792     lc->prev->next = li;
 793     lc->prev       = li;
 794 }
 795
 796 /* insert a sample and a sample_range into a sample_coll. The
 797     samples are stored as a pointer, the range is copied. */
 798 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 799                                       sample_range_t *r)
 800 {
 801     /* first check if it belongs here */
 802     if (sc->temp != s->temp)
 803     {
 804         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 805                   s->filename, sc->next->s[0]->filename);
 806     }
 807     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 808     {
 809         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 810                   s->filename, sc->next->s[0]->filename);
 811     }
 812     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 813     {
 814         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 815                   s->filename, sc->next->s[0]->filename);
 816     }
 817
 818     /* check if there's room */
 819     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 820     {
 821         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 822         srenew(sc->s, sc->nsamples_alloc);
 823         srenew(sc->r, sc->nsamples_alloc);
 824     }
 825     sc->s[sc->nsamples] = s;
 826     sc->r[sc->nsamples] = *r;
 827     sc->nsamples++;
 828
 829     sample_coll_calc_ntot(sc);
 830 }
 831
 832 /* insert a sample into a lambda_list, creating the right sample_coll if
 833    neccesary */
 834 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 835 {
 836     gmx_bool       found = FALSE;
 837     sample_coll_t *sc;
 838     sample_range_t r;
 839
 840     lambda_data_t *l = head->next;
 841
 842     /* first search for the right lambda_data_t */
 843     while (l != head)
 844     {
 845         if (lambda_vec_same(l->lambda, s->native_lambda) )
 846         {
 847             found = TRUE;
 848             break;
 849         }
 850         l = l->next;
 851     }
 852
 853     if (!found)
 854     {
 855         snew(l, 1);                                     /* allocate a new one */
 856         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 857         lambda_data_insert_lambda(head, l);             /* add it to the list */
 858     }
 859
 860     /* now look for a sample collection */
 861     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 862     if (!sc)
 863     {
 864         snew(sc, 1); /* allocate a new one */
 865         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 866         lambda_data_insert_sample_coll(l, sc);
 867     }
 868
 869     /* now insert the samples into the sample coll */
 870     sample_range_init(&r, s);
 871     sample_coll_insert_sample(sc, s, &r);
 872 }
 873
 874
 875 /* make a histogram out of a sample collection */
 876 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 877                                   int *nbin_alloc, int *nbin,
 878                                   double *dx, double *xmin, int nbin_default)
 879 {
 880     int      i, j, k;
 881     gmx_bool dx_set   = FALSE;
 882     gmx_bool xmin_set = FALSE;
 883
 884     gmx_bool xmax_set      = FALSE;
 885     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 886                                        limits of a histogram */
 887     double   xmax          = -1;
 888
 889     /* first determine dx and xmin; try the histograms */
 890     for (i = 0; i < sc->nsamples; i++)
 891     {
 892         if (sc->s[i]->hist)
 893         {
 894             hist_t *hist = sc->s[i]->hist;
 895             for (k = 0; k < hist->nhist; k++)
 896             {
 897                 double hdx      = hist->dx[k];
 898                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 899
 900                 /* we use the biggest dx*/
 901                 if ( (!dx_set) || hist->dx[0] > *dx)
 902                 {
 903                     dx_set = TRUE;
 904                     *dx    = hist->dx[0];
 905                 }
 906                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 907                 {
 908                     xmin_set = TRUE;
 909                     *xmin    = (hist->x0[k]*hdx);
 910                 }
 911
 912                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 913                 {
 914                     xmax_set = TRUE;
 915                     xmax     = xmax_now;
 916                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 917                     {
 918                         xmax_set_hard = TRUE;
 919                     }
 920                 }
 921                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 922                 {
 923                     xmax_set_hard = TRUE;
 924                     xmax          = xmax_now;
 925                 }
 926             }
 927         }
 928     }
 929     /* and the delta us */
 930     for (i = 0; i < sc->nsamples; i++)
 931     {
 932         if (sc->s[i]->ndu > 0)
 933         {
 934             /* determine min and max */
 935             int    starti  = sc->r[i].start;
 936             int    endi    = sc->r[i].end;
 937             double du_xmin = sc->s[i]->du[starti];
 938             double du_xmax = sc->s[i]->du[starti];
 939             for (j = starti+1; j < endi; j++)
 940             {
 941                 if (sc->s[i]->du[j] < du_xmin)
 942                 {
 943                     du_xmin = sc->s[i]->du[j];
 944                 }
 945                 if (sc->s[i]->du[j] > du_xmax)
 946                 {
 947                     du_xmax = sc->s[i]->du[j];
 948                 }
 949             }
 950
 951             /* and now change the limits */
 952             if ( (!xmin_set) || (du_xmin < *xmin) )
 953             {
 954                 xmin_set = TRUE;
 955                 *xmin    = du_xmin;
 956             }
 957             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 958             {
 959                 xmax_set = TRUE;
 960                 xmax     = du_xmax;
 961             }
 962         }
 963     }
 964
 965     if (!xmax_set || !xmin_set)
 966     {
 967         *nbin = 0;
 968         return;
 969     }
 970
 971
 972     if (!dx_set)
 973     {
 974         *nbin = nbin_default;
 975         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 976                                                be 0, and we count from 0 */
 977     }
 978     else
 979     {
 980         *nbin = (xmax-(*xmin))/(*dx);
 981     }
 982
 983     if (*nbin > *nbin_alloc)
 984     {
 985         *nbin_alloc = *nbin;
 986         srenew(*bin, *nbin_alloc);
 987     }
 988
 989     /* reset the histogram */
 990     for (i = 0; i < (*nbin); i++)
 991     {
 992         (*bin)[i] = 0;
 993     }
 994
 995     /* now add the actual data */
 996     for (i = 0; i < sc->nsamples; i++)
 997     {
 998         if (sc->s[i]->hist)
 999         {
1000             hist_t *hist = sc->s[i]->hist;
1001             for (k = 0; k < hist->nhist; k++)
1002             {
1003                 double hdx       = hist->dx[k];
1004                 double xmin_hist = hist->x0[k]*hdx;
1005                 for (j = 0; j < hist->nbin[k]; j++)
1006                 {
1007                     /* calculate the bin corresponding to the middle of the
1008                        original bin */
1009                     double x     = hdx*(j+0.5) + xmin_hist;
1010                     int    binnr = (int)((x-(*xmin))/(*dx));
1011
1012                     if (binnr >= *nbin || binnr < 0)
1013                     {
1014                         binnr = (*nbin)-1;
1015                     }
1016
1017                     (*bin)[binnr] += hist->bin[k][j];
1018                 }
1019             }
1020         }
1021         else
1022         {
1023             int starti = sc->r[i].start;
1024             int endi   = sc->r[i].end;
1025             for (j = starti; j < endi; j++)
1026             {
1027                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1028                 if (binnr >= *nbin || binnr < 0)
1029                 {
1030                     binnr = (*nbin)-1;
1031                 }
1032
1033                 (*bin)[binnr]++;
1034             }
1035         }
1036     }
1037 }
1038
1039 /* write a collection of histograms to a file */
1040 void sim_data_histogram(sim_data_t *sd, const char *filename,
1041                         int nbin_default, const output_env_t oenv)
1042 {
1043     char           label_x[STRLEN];
1044     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1045     const char    *title   = "N(\\DeltaH)";
1046     const char    *label_y = "Samples";
1047     FILE          *fp;
1048     lambda_data_t *bl;
1049     int            nsets     = 0;
1050     char         **setnames  = NULL;
1051     gmx_bool       first_set = FALSE;
1052     /* histogram data: */
1053     int           *hist       = NULL;
1054     int            nbin       = 0;
1055     int            nbin_alloc = 0;
1056     double         dx         = 0;
1057     double         min        = 0;
1058     int            i;
1059     lambda_data_t *bl_head = sd->lb;
1060
1061     printf("\nWriting histogram to %s\n", filename);
1062     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1063
1064     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1065
1066     /* first get all the set names */
1067     bl = bl_head->next;
1068     /* iterate over all lambdas */
1069     while (bl != bl_head)
1070     {
1071         sample_coll_t *sc = bl->sc->next;
1072
1073         /* iterate over all samples */
1074         while (sc != bl->sc)
1075         {
1076             char buf[STRLEN], buf2[STRLEN];
1077
1078             nsets++;
1079             srenew(setnames, nsets);
1080             snew(setnames[nsets-1], STRLEN);
1081             if (sc->foreign_lambda->dhdl < 0)
1082             {
1083                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1084                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1085                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1086                         deltag, lambda, buf2, lambda, buf);
1087             }
1088             else
1089             {
1090                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1091                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1092                         dhdl, lambda, buf);
1093             }
1094             sc = sc->next;
1095         }
1096
1097         bl = bl->next;
1098     }
1099     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1100
1101
1102     /* now make the histograms */
1103     bl = bl_head->next;
1104     /* iterate over all lambdas */
1105     while (bl != bl_head)
1106     {
1107         sample_coll_t *sc = bl->sc->next;
1108
1109         /* iterate over all samples */
1110         while (sc != bl->sc)
1111         {
1112             if (!first_set)
1113             {
1114                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1115             }
1116
1117             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1118                                   nbin_default);
1119
1120             for (i = 0; i < nbin; i++)
1121             {
1122                 double xmin = i*dx + min;
1123                 double xmax = (i+1)*dx + min;
1124
1125                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1126             }
1127
1128             first_set = FALSE;
1129             sc        = sc->next;
1130         }
1131
1132         bl = bl->next;
1133     }
1134
1135     if (hist)
1136     {
1137         sfree(hist);
1138     }
1139
1140     xvgrclose(fp);
1141 }
1142
1143 /* create a collection (array) of barres_t object given a ordered linked list
1144    of barlamda_t sample collections */
1145 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1146                                     gmx_bool use_dhdl)
1147 {
1148     lambda_data_t *bl;
1149     int            nlambda = 0;
1150     barres_t      *res;
1151     int            i;
1152     gmx_bool       dhdl    = FALSE;
1153     gmx_bool       first   = TRUE;
1154     lambda_data_t *bl_head = sd->lb;
1155
1156     /* first count the lambdas */
1157     bl = bl_head->next;
1158     while (bl != bl_head)
1159     {
1160         nlambda++;
1161         bl = bl->next;
1162     }
1163     snew(res, nlambda-1);
1164
1165     /* next put the right samples in the res */
1166     *nres = 0;
1167     bl    = bl_head->next->next; /* we start with the second one. */
1168     while (bl != bl_head)
1169     {
1170         sample_coll_t *sc, *scprev;
1171         barres_t      *br = &(res[*nres]);
1172         /* there is always a previous one. we search for that as a foreign
1173            lambda: */
1174         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1175         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1176
1177         barres_init(br);
1178
1179         if (use_dhdl)
1180         {
1181             /* we use dhdl */
1182
1183             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1184             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1185
1186             if (first)
1187             {
1188                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1189                 dhdl = TRUE;
1190             }
1191             if (!dhdl)
1192             {
1193                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1194             }
1195         }
1196         else if (!scprev && !sc)
1197         {
1198             gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1199         }
1200
1201         /* normal delta H */
1202         if (!scprev)
1203         {
1204             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1205         }
1206         if (!sc)
1207         {
1208             gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1209         }
1210         br->a = scprev;
1211         br->b = sc;
1212
1213         first = FALSE;
1214         (*nres)++;
1215         bl = bl->next;
1216     }
1217     return res;
1218 }
1219
1220 /* estimate the maximum discretization error */
1221 static double barres_list_max_disc_err(barres_t *res, int nres)
1222 {
1223     int    i, j;
1224     double disc_err = 0.;
1225     double delta_lambda;
1226
1227     for (i = 0; i < nres; i++)
1228     {
1229         barres_t *br = &(res[i]);
1230
1231         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1232                                            br->a->native_lambda);
1233
1234         for (j = 0; j < br->a->nsamples; j++)
1235         {
1236             if (br->a->s[j]->hist)
1237             {
1238                 double Wfac = 1.;
1239                 if (br->a->s[j]->derivative)
1240                 {
1241                     Wfac =  delta_lambda;
1242                 }
1243
1244                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1245             }
1246         }
1247         for (j = 0; j < br->b->nsamples; j++)
1248         {
1249             if (br->b->s[j]->hist)
1250             {
1251                 double Wfac = 1.;
1252                 if (br->b->s[j]->derivative)
1253                 {
1254                     Wfac =  delta_lambda;
1255                 }
1256                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1257             }
1258         }
1259     }
1260     return disc_err;
1261 }
1262
1263
1264 /* impose start and end times on a sample collection, updating sample_ranges */
1265 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1266                                      double end_t)
1267 {
1268     int i;
1269     for (i = 0; i < sc->nsamples; i++)
1270     {
1271         samples_t      *s = sc->s[i];
1272         sample_range_t *r = &(sc->r[i]);
1273         if (s->hist)
1274         {
1275             double end_time = s->hist->delta_time*s->hist->sum +
1276                 s->hist->start_time;
1277             if (s->hist->start_time < begin_t || end_time > end_t)
1278             {
1279                 r->use = FALSE;
1280             }
1281         }
1282         else
1283         {
1284             if (!s->t)
1285             {
1286                 double end_time;
1287                 if (s->start_time < begin_t)
1288                 {
1289                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1290                 }
1291                 end_time = s->delta_time*s->ndu + s->start_time;
1292                 if (end_time > end_t)
1293                 {
1294                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1295                 }
1296             }
1297             else
1298             {
1299                 int j;
1300                 for (j = 0; j < s->ndu; j++)
1301                 {
1302                     if (s->t[j] < begin_t)
1303                     {
1304                         r->start = j;
1305                     }
1306
1307                     if (s->t[j] >= end_t)
1308                     {
1309                         r->end = j;
1310                         break;
1311                     }
1312                 }
1313             }
1314             if (r->start > r->end)
1315             {
1316                 r->use = FALSE;
1317             }
1318         }
1319     }
1320     sample_coll_calc_ntot(sc);
1321 }
1322
1323 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1324 {
1325     double         first_t, last_t;
1326     double         begin_t, end_t;
1327     lambda_data_t *lc;
1328     lambda_data_t *head = sd->lb;
1329     int            j;
1330
1331     if (begin <= 0 && end < 0)
1332     {
1333         return;
1334     }
1335
1336     /* first determine the global start and end times */
1337     first_t = -1;
1338     last_t  = -1;
1339     lc      = head->next;
1340     while (lc != head)
1341     {
1342         sample_coll_t *sc = lc->sc->next;
1343         while (sc != lc->sc)
1344         {
1345             for (j = 0; j < sc->nsamples; j++)
1346             {
1347                 double start_t, end_t;
1348
1349                 start_t = sc->s[j]->start_time;
1350                 end_t   =   sc->s[j]->start_time;
1351                 if (sc->s[j]->hist)
1352                 {
1353                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1354                 }
1355                 else
1356                 {
1357                     if (sc->s[j]->t)
1358                     {
1359                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1360                     }
1361                     else
1362                     {
1363                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1364                     }
1365                 }
1366
1367                 if (start_t < first_t || first_t < 0)
1368                 {
1369                     first_t = start_t;
1370                 }
1371                 if (end_t > last_t)
1372                 {
1373                     last_t = end_t;
1374                 }
1375             }
1376             sc = sc->next;
1377         }
1378         lc = lc->next;
1379     }
1380
1381     /* calculate the actual times */
1382     if (begin > 0)
1383     {
1384         begin_t = begin;
1385     }
1386     else
1387     {
1388         begin_t = first_t;
1389     }
1390
1391     if (end > 0)
1392     {
1393         end_t = end;
1394     }
1395     else
1396     {
1397         end_t = last_t;
1398     }
1399     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1400
1401     if (begin_t > end_t)
1402     {
1403         return;
1404     }
1405     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1406
1407     /* then impose them */
1408     lc = head->next;
1409     while (lc != head)
1410     {
1411         sample_coll_t *sc = lc->sc->next;
1412         while (sc != lc->sc)
1413         {
1414             sample_coll_impose_times(sc, begin_t, end_t);
1415             sc = sc->next;
1416         }
1417         lc = lc->next;
1418     }
1419 }
1420
1421
1422 /* create subsample i out of ni from an existing sample_coll */
1423 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1424                                              sample_coll_t *sc_orig,
1425                                              int i, int ni)
1426 {
1427     int             j;
1428     int             hist_start, hist_end;
1429
1430     gmx_large_int_t ntot_start;
1431     gmx_large_int_t ntot_end;
1432     gmx_large_int_t ntot_so_far;
1433
1434     *sc = *sc_orig; /* just copy all fields */
1435
1436     /* allocate proprietary memory */
1437     snew(sc->s, sc_orig->nsamples);
1438     snew(sc->r, sc_orig->nsamples);
1439
1440     /* copy the samples */
1441     for (j = 0; j < sc_orig->nsamples; j++)
1442     {
1443         sc->s[j] = sc_orig->s[j];
1444         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1445     }
1446
1447     /* now fix start and end fields */
1448     /* the casts avoid possible overflows */
1449     ntot_start  = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1450     ntot_end    = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1451     ntot_so_far = 0;
1452     for (j = 0; j < sc->nsamples; j++)
1453     {
1454         gmx_large_int_t ntot_add;
1455         gmx_large_int_t new_start, new_end;
1456
1457         if (sc->r[j].use)
1458         {
1459             if (sc->s[j]->hist)
1460             {
1461                 ntot_add = sc->s[j]->hist->sum;
1462             }
1463             else
1464             {
1465                 ntot_add = sc->r[j].end - sc->r[j].start;
1466             }
1467         }
1468         else
1469         {
1470             ntot_add = 0;
1471         }
1472
1473         if (!sc->s[j]->hist)
1474         {
1475             if (ntot_so_far < ntot_start)
1476             {
1477                 /* adjust starting point */
1478                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1479             }
1480             else
1481             {
1482                 new_start = sc->r[j].start;
1483             }
1484             /* adjust end point */
1485             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1486             if (new_end > sc->r[j].end)
1487             {
1488                 new_end = sc->r[j].end;
1489             }
1490
1491             /* check if we're in range at all */
1492             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1493             {
1494                 new_start = 0;
1495                 new_end   = 0;
1496             }
1497             /* and write the new range */
1498             sc->r[j].start = (int)new_start;
1499             sc->r[j].end   = (int)new_end;
1500         }
1501         else
1502         {
1503             if (sc->r[j].use)
1504             {
1505                 double overlap;
1506                 double ntot_start_norm, ntot_end_norm;
1507                 /* calculate the amount of overlap of the
1508                    desired range (ntot_start -- ntot_end) onto
1509                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1510
1511                 /* first calculate normalized bounds
1512                    (where 0 is the start of the hist range, and 1 the end) */
1513                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1514                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1515
1516                 /* now fix the boundaries */
1517                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1518                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1519
1520                 /* and calculate the overlap */
1521                 overlap = ntot_end_norm - ntot_start_norm;
1522
1523                 if (overlap > 0.95) /* we allow for 5% slack */
1524                 {
1525                     sc->r[j].use = TRUE;
1526                 }
1527                 else if (overlap < 0.05)
1528                 {
1529                     sc->r[j].use = FALSE;
1530                 }
1531                 else
1532                 {
1533                     return FALSE;
1534                 }
1535             }
1536         }
1537         ntot_so_far += ntot_add;
1538     }
1539     sample_coll_calc_ntot(sc);
1540
1541     return TRUE;
1542 }
1543
1544 /* calculate minimum and maximum work values in sample collection */
1545 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1546                                 double *Wmin, double *Wmax)
1547 {
1548     int i, j;
1549
1550     *Wmin = FLT_MAX;
1551     *Wmax = -FLT_MAX;
1552
1553     for (i = 0; i < sc->nsamples; i++)
1554     {
1555         samples_t      *s = sc->s[i];
1556         sample_range_t *r = &(sc->r[i]);
1557         if (r->use)
1558         {
1559             if (!s->hist)
1560             {
1561                 for (j = r->start; j < r->end; j++)
1562                 {
1563                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1564                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1565                 }
1566             }
1567             else
1568             {
1569                 int    hd = 0; /* determine the histogram direction: */
1570                 double dx;
1571                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1572                 {
1573                     hd = 1;
1574                 }
1575                 dx = s->hist->dx[hd];
1576
1577                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1578                 {
1579                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1580                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1581                     /* look for the highest value bin with values */
1582                     if (s->hist->bin[hd][j] > 0)
1583                     {
1584                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1585                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1586                         break;
1587                     }
1588                 }
1589             }
1590         }
1591     }
1592 }
1593
1594 /* Initialize a sim_data structure */
1595 static void sim_data_init(sim_data_t *sd)
1596 {
1597     /* make linked list */
1598     sd->lb       = &(sd->lb_head);
1599     sd->lb->next = sd->lb;
1600     sd->lb->prev = sd->lb;
1601
1602     lambda_components_init(&(sd->lc));
1603 }
1604
1605
1606 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1607 {
1608     int    i;
1609     double sum;
1610
1611     sum = 0;
1612
1613     for (i = 0; i < n; i++)
1614     {
1615         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1616     }
1617
1618     return sum;
1619 }
1620
1621 /* calculate the BAR average given a histogram
1622
1623     if type== 0, calculate the best estimate for the average,
1624     if type==-1, calculate the minimum possible value given the histogram
1625     if type== 1, calculate the maximum possible value given the histogram */
1626 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1627                                 int type)
1628 {
1629     double sum = 0.;
1630     int    i;
1631     int    max;
1632     /* normalization factor multiplied with bin width and
1633        number of samples (we normalize through M): */
1634     double normdx = 1.;
1635     int    hd     = 0; /* determine the histogram direction: */
1636     double dx;
1637
1638     if ( (hist->nhist > 1) && (Wfac < 0) )
1639     {
1640         hd = 1;
1641     }
1642     dx  = hist->dx[hd];
1643     max = hist->nbin[hd]-1;
1644     if (type == 1)
1645     {
1646         max = hist->nbin[hd]; /* we also add whatever was out of range */
1647     }
1648
1649     for (i = 0; i < max; i++)
1650     {
1651         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1652         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1653
1654         sum += pxdx/(1. + exp(x + sbMmDG));
1655     }
1656
1657     return sum;
1658 }
1659
1660 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1661                                 double temp, double tol, int type)
1662 {
1663     double kT, beta, M;
1664     double DG;
1665     int    i, j;
1666     double Wfac1, Wfac2, Wmin, Wmax;
1667     double DG0, DG1, DG2, dDG1;
1668     double sum1, sum2;
1669     double n1, n2; /* numbers of samples as doubles */
1670
1671     kT   = BOLTZ*temp;
1672     beta = 1/kT;
1673
1674     /* count the numbers of samples */
1675     n1 = ca->ntot;
1676     n2 = cb->ntot;
1677
1678     M = log(n1/n2);
1679
1680     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1681     if (ca->foreign_lambda->dhdl < 0)
1682     {
1683         /* this is the case when the delta U were calculated directly
1684            (i.e. we're not scaling dhdl) */
1685         Wfac1 = beta;
1686         Wfac2 = beta;
1687     }
1688     else
1689     {
1690         /* we're using dhdl, so delta_lambda needs to be a
1691            multiplication factor.  */
1692         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1693         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1694                                                   ca->native_lambda);
1695         if (cb->native_lambda->lc->N > 1)
1696         {
1697             gmx_fatal(FARGS,
1698                       "Can't (yet) do multi-component dhdl interpolation");
1699         }
1700
1701         Wfac1 =  beta*delta_lambda;
1702         Wfac2 = -beta*delta_lambda;
1703     }
1704
1705     if (beta < 1)
1706     {
1707         /* We print the output both in kT and kJ/mol.
1708          * Here we determine DG in kT, so when beta < 1
1709          * the precision has to be increased.
1710          */
1711         tol *= beta;
1712     }
1713
1714     /* Calculate minimum and maximum work to give an initial estimate of
1715      * delta G  as their average.
1716      */
1717     {
1718         double Wmin1, Wmin2, Wmax1, Wmax2;
1719         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1720         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1721
1722         Wmin = min(Wmin1, Wmin2);
1723         Wmax = max(Wmax1, Wmax2);
1724     }
1725
1726     DG0 = Wmin;
1727     DG2 = Wmax;
1728
1729     if (debug)
1730     {
1731         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1732     }
1733     /* We approximate by bisection: given our initial estimates
1734        we keep checking whether the halfway point is greater or
1735        smaller than what we get out of the BAR averages.
1736
1737        For the comparison we can use twice the tolerance. */
1738     while (DG2 - DG0 > 2*tol)
1739     {
1740         DG1 = 0.5*(DG0 + DG2);
1741
1742         /* calculate the BAR averages */
1743         dDG1 = 0.;
1744
1745         for (i = 0; i < ca->nsamples; i++)
1746         {
1747             samples_t      *s = ca->s[i];
1748             sample_range_t *r = &(ca->r[i]);
1749             if (r->use)
1750             {
1751                 if (s->hist)
1752                 {
1753                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1754                 }
1755                 else
1756                 {
1757                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1758                                          Wfac1, (M-DG1));
1759                 }
1760             }
1761         }
1762         for (i = 0; i < cb->nsamples; i++)
1763         {
1764             samples_t      *s = cb->s[i];
1765             sample_range_t *r = &(cb->r[i]);
1766             if (r->use)
1767             {
1768                 if (s->hist)
1769                 {
1770                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1771                 }
1772                 else
1773                 {
1774                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1775                                          Wfac2, -(M-DG1));
1776                 }
1777             }
1778         }
1779
1780         if (dDG1 < 0)
1781         {
1782             DG0 = DG1;
1783         }
1784         else
1785         {
1786             DG2 = DG1;
1787         }
1788         if (debug)
1789         {
1790             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1791         }
1792     }
1793
1794     return 0.5*(DG0 + DG2);
1795 }
1796
1797 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1798                              double temp, double dg, double *sa, double *sb)
1799 {
1800     int    i, j;
1801     double W_ab = 0.;
1802     double W_ba = 0.;
1803     double kT, beta;
1804     double Wfac1, Wfac2;
1805     double n1, n2;
1806
1807     kT   = BOLTZ*temp;
1808     beta = 1/kT;
1809
1810     /* count the numbers of samples */
1811     n1 = ca->ntot;
1812     n2 = cb->ntot;
1813
1814     /* to ensure the work values are the same as during the delta_G */
1815     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1816     if (ca->foreign_lambda->dhdl < 0)
1817     {
1818         /* this is the case when the delta U were calculated directly
1819            (i.e. we're not scaling dhdl) */
1820         Wfac1 = beta;
1821         Wfac2 = beta;
1822     }
1823     else
1824     {
1825         /* we're using dhdl, so delta_lambda needs to be a
1826            multiplication factor.  */
1827         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1828                                                   ca->native_lambda);
1829         Wfac1 =  beta*delta_lambda;
1830         Wfac2 = -beta*delta_lambda;
1831     }
1832
1833     /* first calculate the average work in both directions */
1834     for (i = 0; i < ca->nsamples; i++)
1835     {
1836         samples_t      *s = ca->s[i];
1837         sample_range_t *r = &(ca->r[i]);
1838         if (r->use)
1839         {
1840             if (!s->hist)
1841             {
1842                 for (j = r->start; j < r->end; j++)
1843                 {
1844                     W_ab += Wfac1*s->du[j];
1845                 }
1846             }
1847             else
1848             {
1849                 /* normalization factor multiplied with bin width and
1850                    number of samples (we normalize through M): */
1851                 double normdx = 1.;
1852                 double dx;
1853                 int    hd = 0; /* histogram direction */
1854                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1855                 {
1856                     hd = 1;
1857                 }
1858                 dx = s->hist->dx[hd];
1859
1860                 for (j = 0; j < s->hist->nbin[0]; j++)
1861                 {
1862                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1863                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1864                     W_ab += pxdx*x;
1865                 }
1866             }
1867         }
1868     }
1869     W_ab /= n1;
1870
1871     for (i = 0; i < cb->nsamples; i++)
1872     {
1873         samples_t      *s = cb->s[i];
1874         sample_range_t *r = &(cb->r[i]);
1875         if (r->use)
1876         {
1877             if (!s->hist)
1878             {
1879                 for (j = r->start; j < r->end; j++)
1880                 {
1881                     W_ba += Wfac1*s->du[j];
1882                 }
1883             }
1884             else
1885             {
1886                 /* normalization factor multiplied with bin width and
1887                    number of samples (we normalize through M): */
1888                 double normdx = 1.;
1889                 double dx;
1890                 int    hd = 0; /* histogram direction */
1891                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1892                 {
1893                     hd = 1;
1894                 }
1895                 dx = s->hist->dx[hd];
1896
1897                 for (j = 0; j < s->hist->nbin[0]; j++)
1898                 {
1899                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1900                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1901                     W_ba += pxdx*x;
1902                 }
1903             }
1904         }
1905     }
1906     W_ba /= n2;
1907
1908     /* then calculate the relative entropies */
1909     *sa = (W_ab - dg);
1910     *sb = (W_ba + dg);
1911 }
1912
1913 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1914                            double temp, double dg, double *stddev)
1915 {
1916     int    i, j;
1917     double M;
1918     double sigmafact = 0.;
1919     double kT, beta;
1920     double Wfac1, Wfac2;
1921     double n1, n2;
1922
1923     kT   = BOLTZ*temp;
1924     beta = 1/kT;
1925
1926     /* count the numbers of samples */
1927     n1 = ca->ntot;
1928     n2 = cb->ntot;
1929
1930     /* to ensure the work values are the same as during the delta_G */
1931     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1932     if (ca->foreign_lambda->dhdl < 0)
1933     {
1934         /* this is the case when the delta U were calculated directly
1935            (i.e. we're not scaling dhdl) */
1936         Wfac1 = beta;
1937         Wfac2 = beta;
1938     }
1939     else
1940     {
1941         /* we're using dhdl, so delta_lambda needs to be a
1942            multiplication factor.  */
1943         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1944                                                   ca->native_lambda);
1945         Wfac1 =  beta*delta_lambda;
1946         Wfac2 = -beta*delta_lambda;
1947     }
1948
1949     M = log(n1/n2);
1950
1951
1952     /* calculate average in both directions */
1953     for (i = 0; i < ca->nsamples; i++)
1954     {
1955         samples_t      *s = ca->s[i];
1956         sample_range_t *r = &(ca->r[i]);
1957         if (r->use)
1958         {
1959             if (!s->hist)
1960             {
1961                 for (j = r->start; j < r->end; j++)
1962                 {
1963                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1964                 }
1965             }
1966             else
1967             {
1968                 /* normalization factor multiplied with bin width and
1969                    number of samples (we normalize through M): */
1970                 double normdx = 1.;
1971                 double dx;
1972                 int    hd = 0; /* histogram direction */
1973                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1974                 {
1975                     hd = 1;
1976                 }
1977                 dx = s->hist->dx[hd];
1978
1979                 for (j = 0; j < s->hist->nbin[0]; j++)
1980                 {
1981                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1982                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1983
1984                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1985                 }
1986             }
1987         }
1988     }
1989     for (i = 0; i < cb->nsamples; i++)
1990     {
1991         samples_t      *s = cb->s[i];
1992         sample_range_t *r = &(cb->r[i]);
1993         if (r->use)
1994         {
1995             if (!s->hist)
1996             {
1997                 for (j = r->start; j < r->end; j++)
1998                 {
1999                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2000                 }
2001             }
2002             else
2003             {
2004                 /* normalization factor multiplied with bin width and
2005                    number of samples (we normalize through M): */
2006                 double normdx = 1.;
2007                 double dx;
2008                 int    hd = 0; /* histogram direction */
2009                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2010                 {
2011                     hd = 1;
2012                 }
2013                 dx = s->hist->dx[hd];
2014
2015                 for (j = 0; j < s->hist->nbin[0]; j++)
2016                 {
2017                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2018                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2019
2020                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2021                 }
2022             }
2023         }
2024     }
2025
2026     sigmafact /= (n1 + n2);
2027
2028
2029     /* Eq. 10 from
2030        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2031     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2032 }
2033
2034
2035
2036 static void calc_bar(barres_t *br, double tol,
2037                      int npee_min, int npee_max, gmx_bool *bEE,
2038                      double *partsum)
2039 {
2040     int      npee, p;
2041     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2042                                                         for calculated quantities */
2043     int      nsample1, nsample2;
2044     double   temp = br->a->temp;
2045     int      i, j;
2046     double   dg_min, dg_max;
2047     gmx_bool have_hist = FALSE;
2048
2049     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2050
2051     br->dg_disc_err      = 0.;
2052     br->dg_histrange_err = 0.;
2053
2054     /* check if there are histograms */
2055     for (i = 0; i < br->a->nsamples; i++)
2056     {
2057         if (br->a->r[i].use && br->a->s[i]->hist)
2058         {
2059             have_hist = TRUE;
2060             break;
2061         }
2062     }
2063     if (!have_hist)
2064     {
2065         for (i = 0; i < br->b->nsamples; i++)
2066         {
2067             if (br->b->r[i].use && br->b->s[i]->hist)
2068             {
2069                 have_hist = TRUE;
2070                 break;
2071             }
2072         }
2073     }
2074
2075     /* calculate histogram-specific errors */
2076     if (have_hist)
2077     {
2078         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2079         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2080
2081         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2082         {
2083             /* the histogram range  error is the biggest of the differences
2084                between the best estimate and the extremes */
2085             br->dg_histrange_err = fabs(dg_max - dg_min);
2086         }
2087         br->dg_disc_err = 0.;
2088         for (i = 0; i < br->a->nsamples; i++)
2089         {
2090             if (br->a->s[i]->hist)
2091             {
2092                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2093             }
2094         }
2095         for (i = 0; i < br->b->nsamples; i++)
2096         {
2097             if (br->b->s[i]->hist)
2098             {
2099                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2100             }
2101         }
2102     }
2103     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2104
2105     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2106
2107     dg_sig2     = 0;
2108     sa_sig2     = 0;
2109     sb_sig2     = 0;
2110     stddev_sig2 = 0;
2111
2112     *bEE = TRUE;
2113     {
2114         sample_coll_t ca, cb;
2115
2116         /* initialize the samples */
2117         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2118                          br->a->temp);
2119         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2120                          br->b->temp);
2121
2122         for (npee = npee_min; npee <= npee_max; npee++)
2123         {
2124             double dgs      = 0;
2125             double dgs2     = 0;
2126             double dsa      = 0;
2127             double dsb      = 0;
2128             double dsa2     = 0;
2129             double dsb2     = 0;
2130             double dstddev  = 0;
2131             double dstddev2 = 0;
2132
2133
2134             for (p = 0; p < npee; p++)
2135             {
2136                 double   dgp;
2137                 double   stddevc;
2138                 double   sac, sbc;
2139                 gmx_bool cac, cbc;
2140
2141                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2142                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2143
2144                 if (!cac || !cbc)
2145                 {
2146                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2147                     *bEE = FALSE;
2148                     if (cac)
2149                     {
2150                         sample_coll_destroy(&ca);
2151                     }
2152                     if (cbc)
2153                     {
2154                         sample_coll_destroy(&cb);
2155                     }
2156                     return;
2157                 }
2158
2159                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2160                 dgs  += dgp;
2161                 dgs2 += dgp*dgp;
2162
2163                 partsum[npee*(npee_max+1)+p] += dgp;
2164
2165                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2166                 dsa  += sac;
2167                 dsa2 += sac*sac;
2168                 dsb  += sbc;
2169                 dsb2 += sbc*sbc;
2170                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2171
2172                 dstddev  += stddevc;
2173                 dstddev2 += stddevc*stddevc;
2174
2175                 sample_coll_destroy(&ca);
2176                 sample_coll_destroy(&cb);
2177             }
2178             dgs     /= npee;
2179             dgs2    /= npee;
2180             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2181
2182             dsa     /= npee;
2183             dsa2    /= npee;
2184             dsb     /= npee;
2185             dsb2    /= npee;
2186             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2187             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2188
2189             dstddev     /= npee;
2190             dstddev2    /= npee;
2191             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2192         }
2193         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2194         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2195         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2196         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2197     }
2198 }
2199
2200
2201 static double bar_err(int nbmin, int nbmax, const double *partsum)
2202 {
2203     int    nb, b;
2204     double svar, s, s2, dg;
2205
2206     svar = 0;
2207     for (nb = nbmin; nb <= nbmax; nb++)
2208     {
2209         s  = 0;
2210         s2 = 0;
2211         for (b = 0; b < nb; b++)
2212         {
2213             dg  = partsum[nb*(nbmax+1)+b];
2214             s  += dg;
2215             s2 += dg*dg;
2216         }
2217         s    /= nb;
2218         s2   /= nb;
2219         svar += (s2 - s*s)/(nb - 1);
2220     }
2221
2222     return sqrt(svar/(nbmax + 1 - nbmin));
2223 }
2224
2225
2226 /* Seek the end of an identifier (consecutive non-spaces), followed by
2227    an optional number of spaces or '='-signs. Returns a pointer to the
2228    first non-space value found after that. Returns NULL if the string
2229    ends before that.
2230  */
2231 static const char *find_value(const char *str)
2232 {
2233     gmx_bool name_end_found = FALSE;
2234
2235     /* if the string is a NULL pointer, return a NULL pointer. */
2236     if (str == NULL)
2237     {
2238         return NULL;
2239     }
2240     while (*str != '\0')
2241     {
2242         /* first find the end of the name */
2243         if (!name_end_found)
2244         {
2245             if (isspace(*str) || (*str == '=') )
2246             {
2247                 name_end_found = TRUE;
2248             }
2249         }
2250         else
2251         {
2252             if (!( isspace(*str) || (*str == '=') ))
2253             {
2254                 return str;
2255             }
2256         }
2257         str++;
2258     }
2259     return NULL;
2260 }
2261
2262
2263
2264 /* read a vector-notation description of a lambda vector */
2265 static gmx_bool read_lambda_compvec(const char                *str,
2266                                     lambda_vec_t              *lv,
2267                                     const lambda_components_t *lc_in,
2268                                     lambda_components_t       *lc_out,
2269                                     const char               **end,
2270                                     const char                *fn)
2271 {
2272     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2273                                           components, or to check them */
2274     gmx_bool    start_reached = FALSE; /* whether the start of component names
2275                                           has been reached */
2276     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2277     int         n             = 0;     /* current component number */
2278     const char *val_start     = NULL;  /* start of the component name, or NULL
2279                                           if not in a value */
2280     char       *strtod_end;
2281     gmx_bool    OK = TRUE;
2282
2283     if (end)
2284     {
2285         *end = str;
2286     }
2287
2288
2289     if (lc_out && lc_out->N == 0)
2290     {
2291         initialize_lc = TRUE;
2292     }
2293
2294     if (lc_in == NULL)
2295     {
2296         lc_in = lc_out;
2297     }
2298
2299     while (1)
2300     {
2301         if (!start_reached)
2302         {
2303             if (isalnum(*str))
2304             {
2305                 vector        = FALSE;
2306                 start_reached = TRUE;
2307                 val_start     = str;
2308             }
2309             else if (*str == '(')
2310             {
2311                 vector        = TRUE;
2312                 start_reached = TRUE;
2313             }
2314             else if (!isspace(*str))
2315             {
2316                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2317             }
2318         }
2319         else
2320         {
2321             if (val_start)
2322             {
2323                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2324                 {
2325                     /* end of value */
2326                     if (lv == NULL)
2327                     {
2328                         if (initialize_lc)
2329                         {
2330                             lambda_components_add(lc_out, val_start,
2331                                                   (str-val_start));
2332                         }
2333                         else
2334                         {
2335                             if (!lambda_components_check(lc_out, n, val_start,
2336                                                          (str-val_start)))
2337                             {
2338                                 return FALSE;
2339                             }
2340                         }
2341                     }
2342                     else
2343                     {
2344                         /* add a vector component to lv */
2345                         lv->val[n] = strtod(val_start, &strtod_end);
2346                         if (val_start == strtod_end)
2347                         {
2348                             gmx_fatal(FARGS,
2349                                       "Error reading lambda vector in %s", fn);
2350                         }
2351                     }
2352                     /* reset for the next identifier */
2353                     val_start = NULL;
2354                     n++;
2355                     if (!vector)
2356                     {
2357                         return OK;
2358                     }
2359                 }
2360             }
2361             else if (isalnum(*str))
2362             {
2363                 val_start = str;
2364             }
2365             if (*str == ')')
2366             {
2367                 str++;
2368                 if (end)
2369                 {
2370                     *end = str;
2371                 }
2372                 if (!vector)
2373                 {
2374                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2375                 }
2376                 else
2377                 {
2378                     if (n == lc_in->N)
2379                     {
2380                         return OK;
2381                     }
2382                     else if (lv == NULL)
2383                     {
2384                         return FALSE;
2385                     }
2386                     else
2387                     {
2388                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2389                                   fn);
2390                         return FALSE;
2391                     }
2392
2393                 }
2394             }
2395         }
2396         if (*str == '\0')
2397         {
2398             break;
2399         }
2400         str++;
2401         if (end)
2402         {
2403             *end = str;
2404         }
2405     }
2406     if (vector)
2407     {
2408         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2409         return FALSE;
2410     }
2411     return OK;
2412 }
2413
2414 /* read and check the component names from a string */
2415 static gmx_bool read_lambda_components(const char          *str,
2416                                        lambda_components_t *lc,
2417                                        const char         **end,
2418                                        const char          *fn)
2419 {
2420     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2421 }
2422
2423 /* read an initialized lambda vector from a string */
2424 static gmx_bool read_lambda_vector(const char   *str,
2425                                    lambda_vec_t *lv,
2426                                    const char  **end,
2427                                    const char   *fn)
2428 {
2429     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2430 }
2431
2432
2433
2434 /* deduce lambda value from legend.
2435     fn = the file name
2436     legend = the legend string
2437     ba = the xvg data
2438     lam = the initialized lambda vector
2439    returns whether to use the data in this set.
2440  */
2441 static gmx_bool legend2lambda(const char   *fn,
2442                               const char   *legend,
2443                               xvg_t        *ba,
2444                               lambda_vec_t *lam)
2445 {
2446     double        lambda = 0;
2447     const char   *ptr    = NULL, *ptr2 = NULL;
2448     gmx_bool      ok     = FALSE;
2449     gmx_bool      bdhdl  = FALSE;
2450     const char   *tostr  = " to ";
2451
2452     if (legend == NULL)
2453     {
2454         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2455     }
2456
2457     /* look for the last 'to': */
2458     ptr2 = legend;
2459     do
2460     {
2461         ptr2 = strstr(ptr2, tostr);
2462         if (ptr2 != NULL)
2463         {
2464             ptr = ptr2;
2465             ptr2++;
2466         }
2467     }
2468     while (ptr2 != NULL && *ptr2 != '\0');
2469
2470     if (ptr)
2471     {
2472         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2473     }
2474     else
2475     {
2476         /* look for the = sign */
2477         ptr = strrchr(legend, '=');
2478         if (!ptr)
2479         {
2480             /* otherwise look for the last space */
2481             ptr = strrchr(legend, ' ');
2482         }
2483     }
2484
2485     if (strstr(legend, "dH"))
2486     {
2487         ok    = TRUE;
2488         bdhdl = TRUE;
2489     }
2490     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2491     {
2492         ok    = TRUE;
2493         bdhdl = FALSE;
2494     }
2495     else /*if (strstr(legend, "pV"))*/
2496     {
2497         return FALSE;
2498     }
2499     if (!ptr)
2500     {
2501         ok = FALSE;
2502     }
2503
2504     if (!ok)
2505     {
2506         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2507     }
2508     if (!bdhdl)
2509     {
2510         ptr = find_value(ptr);
2511         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2512         {
2513             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2514         }
2515     }
2516     else
2517     {
2518         int         dhdl_index;
2519         const char *end;
2520         char        buf[STRLEN];
2521
2522         ptr = strrchr(legend, '=');
2523         end = ptr;
2524         if (ptr)
2525         {
2526             /* there must be a component name */
2527             ptr--;
2528             if (ptr < legend)
2529             {
2530                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2531             }
2532             /* now backtrack to the start of the identifier */
2533             while (isspace(*ptr))
2534             {
2535                 end = ptr;
2536                 ptr--;
2537                 if (ptr < legend)
2538                 {
2539                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2540                 }
2541             }
2542             while (!isspace(*ptr))
2543             {
2544                 ptr--;
2545                 if (ptr < legend)
2546                 {
2547                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2548                 }
2549             }
2550             ptr++;
2551             strncpy(buf, ptr, (end-ptr));
2552             buf[(end-ptr)] = '\0';
2553             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2554             if (dhdl_index < 0)
2555             {
2556                 char buf[STRLEN];
2557                 strncpy(buf, ptr, (end-ptr));
2558                 buf[(end-ptr)] = '\0';
2559                 gmx_fatal(FARGS,
2560                           "Did not find lambda component for '%s' in %s",
2561                           buf, fn);
2562             }
2563         }
2564         else
2565         {
2566             if (lam->lc->N > 1)
2567             {
2568                 gmx_fatal(FARGS,
2569                           "dhdl without component name with >1 lambda component in %s",
2570                           fn);
2571             }
2572             dhdl_index = 0;
2573         }
2574         lam->dhdl = dhdl_index;
2575     }
2576     return TRUE;
2577 }
2578
2579 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2580                                 lambda_components_t *lc)
2581 {
2582     gmx_bool    bFound;
2583     const char *ptr;
2584     char       *end;
2585     double      native_lambda;
2586
2587     bFound = FALSE;
2588
2589     /* first check for a state string */
2590     ptr = strstr(subtitle, "state");
2591     if (ptr)
2592     {
2593         int         index = -1;
2594         const char *val_end;
2595
2596         /* the new 4.6 style lambda vectors */
2597         ptr = find_value(ptr);
2598         if (ptr)
2599         {
2600             index = strtol(ptr, &end, 10);
2601             if (ptr == end)
2602             {
2603                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2604                 return FALSE;
2605             }
2606             ptr = end;
2607         }
2608         else
2609         {
2610             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2611             return FALSE;
2612         }
2613         /* now find the lambda vector component names */
2614         while (*ptr != '(' && !isalnum(*ptr))
2615         {
2616             ptr++;
2617             if (*ptr == '\0')
2618             {
2619                 gmx_fatal(FARGS,
2620                           "Incomplete lambda vector component data in %s", fn);
2621                 return FALSE;
2622             }
2623         }
2624         val_end = ptr;
2625         if (!read_lambda_components(ptr, lc, &val_end, fn))
2626         {
2627             gmx_fatal(FARGS,
2628                       "lambda vector components in %s don't match those previously read",
2629                       fn);
2630         }
2631         ptr = find_value(val_end);
2632         if (!ptr)
2633         {
2634             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2635             return FALSE;
2636         }
2637         lambda_vec_init(&(ba->native_lambda), lc);
2638         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2639         {
2640             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2641         }
2642         ba->native_lambda.index = index;
2643         bFound                  = TRUE;
2644     }
2645     else
2646     {
2647         /* compatibility mode: check for lambda in other ways. */
2648         /* plain text lambda string */
2649         ptr = strstr(subtitle, "lambda");
2650         if (ptr == NULL)
2651         {
2652             /* xmgrace formatted lambda string */
2653             ptr = strstr(subtitle, "\\xl\\f{}");
2654         }
2655         if (ptr == NULL)
2656         {
2657             /* xmgr formatted lambda string */
2658             ptr = strstr(subtitle, "\\8l\\4");
2659         }
2660         if (ptr != NULL)
2661         {
2662             ptr = strstr(ptr, "=");
2663         }
2664         if (ptr != NULL)
2665         {
2666             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2667             /* add the lambda component name as an empty string */
2668             if (lc->N > 0)
2669             {
2670                 if (!lambda_components_check(lc, 0, "", 0))
2671                 {
2672                     gmx_fatal(FARGS,
2673                               "lambda vector components in %s don't match those previously read",
2674                               fn);
2675                 }
2676             }
2677             else
2678             {
2679                 lambda_components_add(lc, "", 0);
2680             }
2681             lambda_vec_init(&(ba->native_lambda), lc);
2682             ba->native_lambda.val[0] = native_lambda;
2683         }
2684     }
2685
2686     return bFound;
2687 }
2688
2689 static void filename2lambda(const char *fn, xvg_t *ba)
2690 {
2691     double        lambda;
2692     const char   *ptr, *digitptr;
2693     char         *endptr;
2694     int           dirsep;
2695     ptr = fn;
2696     /* go to the end of the path string and search backward to find the last
2697        directory in the path which has to contain the value of lambda
2698      */
2699     while (ptr[1] != '\0')
2700     {
2701         ptr++;
2702     }
2703     /* searching backward to find the second directory separator */
2704     dirsep   = 0;
2705     digitptr = NULL;
2706     while (ptr >= fn)
2707     {
2708         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2709         {
2710             if (dirsep == 1)
2711             {
2712                 break;
2713             }
2714             dirsep++;
2715         }
2716         /* save the last position of a digit between the last two
2717            separators = in the last dirname */
2718         if (dirsep > 0 && isdigit(*ptr))
2719         {
2720             digitptr = ptr;
2721         }
2722         ptr--;
2723     }
2724     if (!digitptr)
2725     {
2726         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2727                   " last directory in the path '%s' does not contain a number", fn);
2728     }
2729     if (digitptr[-1] == '-')
2730     {
2731         digitptr--;
2732     }
2733     lambda = strtod(digitptr, &endptr);
2734     if (endptr == digitptr)
2735     {
2736         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2737     }
2738 }
2739
2740 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2741                                   lambda_components_t *lc)
2742 {
2743     int          i;
2744     char        *subtitle, **legend, *ptr;
2745     int          np;
2746     gmx_bool     native_lambda_read = FALSE;
2747     char         buf[STRLEN];
2748     lambda_vec_t lv;
2749
2750     xvg_init(ba);
2751
2752     ba->filename = fn;
2753
2754     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2755     if (!ba->y)
2756     {
2757         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2758     }
2759     /* Reorder the data */
2760     ba->t  = ba->y[0];
2761     for (i = 1; i < ba->nset; i++)
2762     {
2763         ba->y[i-1] = ba->y[i];
2764     }
2765     ba->nset--;
2766
2767     snew(ba->np, ba->nset);
2768     for (i = 0; i < ba->nset; i++)
2769     {
2770         ba->np[i] = np;
2771     }
2772
2773     ba->temp = -1;
2774     if (subtitle != NULL)
2775     {
2776         /* try to extract temperature */
2777         ptr = strstr(subtitle, "T =");
2778         if (ptr != NULL)
2779         {
2780             ptr += 3;
2781             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2782             {
2783                 if (ba->temp <= 0)
2784                 {
2785                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2786                               ba->temp, fn);
2787                 }
2788             }
2789         }
2790     }
2791     if (ba->temp < 0)
2792     {
2793         if (*temp <= 0)
2794         {
2795             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2796         }
2797         ba->temp = *temp;
2798     }
2799
2800     /* Try to deduce lambda from the subtitle */
2801     if (subtitle)
2802     {
2803         if (subtitle2lambda(subtitle, ba, fn, lc))
2804         {
2805             native_lambda_read = TRUE;
2806         }
2807     }
2808     snew(ba->lambda, ba->nset);
2809     if (legend == NULL)
2810     {
2811         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2812         if (ba->nset == 1)
2813         {
2814             if (!native_lambda_read)
2815             {
2816                 /* Deduce lambda from the file name */
2817                 filename2lambda(fn, ba);
2818                 native_lambda_read = TRUE;
2819             }
2820             ba->lambda[0] = ba->native_lambda;
2821         }
2822         else
2823         {
2824             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2825         }
2826     }
2827     else
2828     {
2829         for (i = 0; i < ba->nset; )
2830         {
2831             gmx_bool use = FALSE;
2832             /* Read lambda from the legend */
2833             lambda_vec_init( &(ba->lambda[i]), lc );
2834             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2835             use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2836             if (use)
2837             {
2838                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2839                 i++;
2840             }
2841             else
2842             {
2843                 int j;
2844                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2845                 for (j = i+1; j < ba->nset; j++)
2846                 {
2847                     ba->y[j-1]  = ba->y[j];
2848                     legend[j-1] = legend[j];
2849                 }
2850                 ba->nset--;
2851             }
2852         }
2853     }
2854
2855     if (!native_lambda_read)
2856     {
2857         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2858     }
2859
2860     if (legend != NULL)
2861     {
2862         for (i = 0; i < ba->nset-1; i++)
2863         {
2864             sfree(legend[i]);
2865         }
2866         sfree(legend);
2867     }
2868 }
2869
2870 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2871 {
2872     xvg_t     *barsim;
2873     samples_t *s;
2874     int        i;
2875     double    *lambda;
2876
2877     snew(barsim, 1);
2878
2879     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2880
2881     if (barsim->nset < 1)
2882     {
2883         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2884     }
2885
2886     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2887     {
2888         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2889     }
2890     *temp = barsim->temp;
2891
2892     /* now create a series of samples_t */
2893     snew(s, barsim->nset);
2894     for (i = 0; i < barsim->nset; i++)
2895     {
2896         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2897                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2898                                                    &(barsim->lambda[i])),
2899                      fn);
2900         s[i].du  = barsim->y[i];
2901         s[i].ndu = barsim->np[i];
2902         s[i].t   = barsim->t;
2903
2904         lambda_data_list_insert_sample(sd->lb, s+i);
2905     }
2906     {
2907         char buf[STRLEN];
2908
2909         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2910         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2911                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2912         for (i = 0; i < barsim->nset; i++)
2913         {
2914             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2915             printf("        %s (%d pts)\n", buf, s[i].ndu);
2916         }
2917     }
2918     printf("\n\n");
2919 }
2920
2921 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2922                                  double start_time, double delta_time,
2923                                  lambda_vec_t *native_lambda, double temp,
2924                                  double *last_t, const char *filename)
2925 {
2926     int           i, j;
2927     gmx_bool      allocated;
2928     double        old_foreign_lambda;
2929     lambda_vec_t *foreign_lambda;
2930     int           type;
2931     samples_t    *s; /* convenience pointer */
2932     int           startj;
2933
2934     /* check the block types etc. */
2935     if ( (blk->nsub < 3) ||
2936          (blk->sub[0].type != xdr_datatype_int) ||
2937          (blk->sub[1].type != xdr_datatype_double) ||
2938          (
2939              (blk->sub[2].type != xdr_datatype_float) &&
2940              (blk->sub[2].type != xdr_datatype_double)
2941          ) ||
2942          (blk->sub[0].nr < 1) ||
2943          (blk->sub[1].nr < 1) )
2944     {
2945         gmx_fatal(FARGS,
2946                   "Unexpected/corrupted block data in file %s around time %f.",
2947                   filename, start_time);
2948     }
2949
2950     snew(foreign_lambda, 1);
2951     lambda_vec_init(foreign_lambda, native_lambda->lc);
2952     lambda_vec_copy(foreign_lambda, native_lambda);
2953     type = blk->sub[0].ival[0];
2954     if (type == dhbtDH)
2955     {
2956         for (i = 0; i < native_lambda->lc->N; i++)
2957         {
2958             foreign_lambda->val[i] = blk->sub[1].dval[i];
2959         }
2960     }
2961     else
2962     {
2963         if (blk->sub[0].nr > 1)
2964         {
2965             foreign_lambda->dhdl = blk->sub[0].ival[1];
2966         }
2967         else
2968         {
2969             foreign_lambda->dhdl = 0;
2970         }
2971     }
2972
2973     if (!*smp)
2974     {
2975         /* initialize the samples structure if it's empty. */
2976         snew(*smp, 1);
2977         samples_init(*smp, native_lambda, foreign_lambda, temp,
2978                      type == dhbtDHDL, filename);
2979         (*smp)->start_time = start_time;
2980         (*smp)->delta_time = delta_time;
2981     }
2982
2983     /* set convenience pointer */
2984     s = *smp;
2985
2986     /* now double check */
2987     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2988     {
2989         char buf[STRLEN], buf2[STRLEN];
2990         lambda_vec_print(foreign_lambda, buf, FALSE);
2991         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2992         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2993         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2994                   filename, start_time);
2995     }
2996
2997     /* make room for the data */
2998     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2999     {
3000         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3001             blk->sub[2].nr*2 : s->ndu_alloc;
3002         srenew(s->du_alloc, s->ndu_alloc);
3003         s->du = s->du_alloc;
3004     }
3005     startj   = s->ndu;
3006     s->ndu  += blk->sub[2].nr;
3007     s->ntot += blk->sub[2].nr;
3008     *ndu     = blk->sub[2].nr;
3009
3010     /* and copy the data*/
3011     for (j = 0; j < blk->sub[2].nr; j++)
3012     {
3013         if (blk->sub[2].type == xdr_datatype_float)
3014         {
3015             s->du[startj+j] = blk->sub[2].fval[j];
3016         }
3017         else
3018         {
3019             s->du[startj+j] = blk->sub[2].dval[j];
3020         }
3021     }
3022     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3023     {
3024         *last_t = start_time + blk->sub[2].nr*delta_time;
3025     }
3026 }
3027
3028 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3029                                       double start_time, double delta_time,
3030                                       lambda_vec_t *native_lambda, double temp,
3031                                       double *last_t, const char *filename)
3032 {
3033     int           i, j;
3034     samples_t    *s;
3035     int           nhist;
3036     double        old_foreign_lambda;
3037     lambda_vec_t *foreign_lambda;
3038     int           type;
3039     int           nbins[2];
3040
3041     /* check the block types etc. */
3042     if ( (blk->nsub < 2) ||
3043          (blk->sub[0].type != xdr_datatype_double) ||
3044          (blk->sub[1].type != xdr_datatype_large_int) ||
3045          (blk->sub[0].nr < 2)  ||
3046          (blk->sub[1].nr < 2) )
3047     {
3048         gmx_fatal(FARGS,
3049                   "Unexpected/corrupted block data in file %s around time %f",
3050                   filename, start_time);
3051     }
3052
3053     nhist = blk->nsub-2;
3054     if (nhist == 0)
3055     {
3056         return NULL;
3057     }
3058     if (nhist > 2)
3059     {
3060         gmx_fatal(FARGS,
3061                   "Unexpected/corrupted block data in file %s around time %f",
3062                   filename, start_time);
3063     }
3064
3065     snew(s, 1);
3066     *nsamples = 1;
3067
3068     snew(foreign_lambda, 1);
3069     lambda_vec_init(foreign_lambda, native_lambda->lc);
3070     lambda_vec_copy(foreign_lambda, native_lambda);
3071     type = (int)(blk->sub[1].lval[1]);
3072     if (type == dhbtDH)
3073     {
3074         double old_foreign_lambda;
3075
3076         old_foreign_lambda = blk->sub[0].dval[0];
3077         if (old_foreign_lambda >= 0)
3078         {
3079             foreign_lambda->val[0] = old_foreign_lambda;
3080             if (foreign_lambda->lc->N > 1)
3081             {
3082                 gmx_fatal(FARGS,
3083                           "Single-component lambda in multi-component file %s",
3084                           filename);
3085             }
3086         }
3087         else
3088         {
3089             for (i = 0; i < native_lambda->lc->N; i++)
3090             {
3091                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3092             }
3093         }
3094     }
3095     else
3096     {
3097         if (foreign_lambda->lc->N > 1)
3098         {
3099             if (blk->sub[1].nr < 3 + nhist)
3100             {
3101                 gmx_fatal(FARGS,
3102                           "Missing derivative coord in multi-component file %s",
3103                           filename);
3104             }
3105             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3106         }
3107         else
3108         {
3109             foreign_lambda->dhdl = 0;
3110         }
3111     }
3112
3113     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3114                  filename);
3115     snew(s->hist, 1);
3116
3117     for (i = 0; i < nhist; i++)
3118     {
3119         nbins[i] = blk->sub[i+2].nr;
3120     }
3121
3122     hist_init(s->hist, nhist, nbins);
3123
3124     for (i = 0; i < nhist; i++)
3125     {
3126         s->hist->x0[i] = blk->sub[1].lval[2+i];
3127         s->hist->dx[i] = blk->sub[0].dval[1];
3128         if (i == 1)
3129         {
3130             s->hist->dx[i] = -s->hist->dx[i];
3131         }
3132     }
3133
3134     s->hist->start_time = start_time;
3135     s->hist->delta_time = delta_time;
3136     s->start_time       = start_time;
3137     s->delta_time       = delta_time;
3138
3139     for (i = 0; i < nhist; i++)
3140     {
3141         int             nbin;
3142         gmx_large_int_t sum = 0;
3143
3144         for (j = 0; j < s->hist->nbin[i]; j++)
3145         {
3146             int binv = (int)(blk->sub[i+2].ival[j]);
3147
3148             s->hist->bin[i][j] = binv;
3149             sum               += binv;
3150
3151         }
3152         if (i == 0)
3153         {
3154             s->ntot      = sum;
3155             s->hist->sum = sum;
3156         }
3157         else
3158         {
3159             if (s->ntot != sum)
3160             {
3161                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3162                           filename);
3163             }
3164         }
3165     }
3166
3167     if (start_time + s->hist->sum*delta_time > *last_t)
3168     {
3169         *last_t = start_time + s->hist->sum*delta_time;
3170     }
3171     return s;
3172 }
3173
3174
3175 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3176 {
3177     int            i, j;
3178     ener_file_t    fp;
3179     t_enxframe    *fr;
3180     int            nre;
3181     gmx_enxnm_t   *enm           = NULL;
3182     double         first_t       = -1;
3183     double         last_t        = -1;
3184     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3185     int           *nhists        = NULL; /* array to keep count & print at end */
3186     int           *npts          = NULL; /* array to keep count & print at end */
3187     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3188     lambda_vec_t  *native_lambda;
3189     double         end_time;             /* the end time of the last batch of samples */
3190     int            nsamples = 0;
3191     lambda_vec_t   start_lambda;
3192
3193     fp = open_enx(fn, "r");
3194     do_enxnms(fp, &nre, &enm);
3195     snew(fr, 1);
3196
3197     snew(native_lambda, 1);
3198     start_lambda.lc = NULL;
3199
3200     while (do_enx(fp, fr))
3201     {
3202         /* count the data blocks */
3203         int    nblocks_raw  = 0;
3204         int    nblocks_hist = 0;
3205         int    nlam         = 0;
3206         int    k;
3207         /* DHCOLL block information: */
3208         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3209         double rtemp      = 0;
3210
3211         /* count the blocks and handle collection information: */
3212         for (i = 0; i < fr->nblock; i++)
3213         {
3214             if (fr->block[i].id == enxDHHIST)
3215             {
3216                 nblocks_hist++;
3217             }
3218             if (fr->block[i].id == enxDH)
3219             {
3220                 nblocks_raw++;
3221             }
3222             if (fr->block[i].id == enxDHCOLL)
3223             {
3224                 nlam++;
3225                 if ( (fr->block[i].nsub < 1) ||
3226                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3227                      (fr->block[i].sub[0].nr < 5))
3228                 {
3229                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3230                 }
3231
3232                 /* read the data from the DHCOLL block */
3233                 rtemp            =        fr->block[i].sub[0].dval[0];
3234                 start_time       =   fr->block[i].sub[0].dval[1];
3235                 delta_time       =   fr->block[i].sub[0].dval[2];
3236                 old_start_lambda = fr->block[i].sub[0].dval[3];
3237                 delta_lambda     = fr->block[i].sub[0].dval[4];
3238
3239                 if (delta_lambda > 0)
3240                 {
3241                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3242                 }
3243                 if ( ( *temp != rtemp) && (*temp > 0) )
3244                 {
3245                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3246                 }
3247                 *temp = rtemp;
3248
3249                 if (old_start_lambda >= 0)
3250                 {
3251                     if (sd->lc.N > 0)
3252                     {
3253                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3254                         {
3255                             gmx_fatal(FARGS,
3256                                       "lambda vector components in %s don't match those previously read",
3257                                       fn);
3258                         }
3259                     }
3260                     else
3261                     {
3262                         lambda_components_add(&(sd->lc), "", 0);
3263                     }
3264                     if (!start_lambda.lc)
3265                     {
3266                         lambda_vec_init(&start_lambda, &(sd->lc));
3267                     }
3268                     start_lambda.val[0] = old_start_lambda;
3269                 }
3270                 else
3271                 {
3272                     /* read lambda vector */
3273                     int      n_lambda_vec;
3274                     gmx_bool check = (sd->lc.N > 0);
3275                     if (fr->block[i].nsub < 2)
3276                     {
3277                         gmx_fatal(FARGS,
3278                                   "No lambda vector, but start_lambda=%f\n",
3279                                   old_start_lambda);
3280                     }
3281                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3282                     for (j = 0; j < n_lambda_vec; j++)
3283                     {
3284                         const char *name =
3285                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3286                         if (check)
3287                         {
3288                             /* check the components */
3289                             lambda_components_check(&(sd->lc), j, name,
3290                                                     strlen(name));
3291                         }
3292                         else
3293                         {
3294                             lambda_components_add(&(sd->lc), name,
3295                                                   strlen(name));
3296                         }
3297                     }
3298                     lambda_vec_init(&start_lambda, &(sd->lc));
3299                     start_lambda.index = fr->block[i].sub[1].ival[0];
3300                     for (j = 0; j < n_lambda_vec; j++)
3301                     {
3302                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3303                     }
3304                 }
3305                 if (first_t < 0)
3306                 {
3307                     first_t = start_time;
3308                 }
3309             }
3310         }
3311
3312         if (nlam != 1)
3313         {
3314             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3315         }
3316         if (nblocks_raw > 0 && nblocks_hist > 0)
3317         {
3318             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3319         }
3320
3321         if (nsamples > 0)
3322         {
3323             /* check the native lambda */
3324             if (!lambda_vec_same(&start_lambda, native_lambda) )
3325             {
3326                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3327                           fn, native_lambda, start_lambda, start_time);
3328             }
3329             /* check the number of samples against the previous number */
3330             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3331             {
3332                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3333                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3334             }
3335             /* check whether last iterations's end time matches with
3336                the currrent start time */
3337             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3338             {
3339                 /* it didn't. We need to store our samples and reallocate */
3340                 for (i = 0; i < nsamples; i++)
3341                 {
3342                     if (samples_rawdh[i])
3343                     {
3344                         /* insert it into the existing list */
3345                         lambda_data_list_insert_sample(sd->lb,
3346                                                        samples_rawdh[i]);
3347                         /* and make sure we'll allocate a new one this time
3348                            around */
3349                         samples_rawdh[i] = NULL;
3350                     }
3351                 }
3352             }
3353         }
3354         else
3355         {
3356             /* this is the first round; allocate the associated data
3357                structures */
3358             /*native_lambda=start_lambda;*/
3359             lambda_vec_init(native_lambda, &(sd->lc));
3360             lambda_vec_copy(native_lambda, &start_lambda);
3361             nsamples = nblocks_raw+nblocks_hist;
3362             snew(nhists, nsamples);
3363             snew(npts, nsamples);
3364             snew(lambdas, nsamples);
3365             snew(samples_rawdh, nsamples);
3366             for (i = 0; i < nsamples; i++)
3367             {
3368                 nhists[i]        = 0;
3369                 npts[i]          = 0;
3370                 lambdas[i]       = NULL;
3371                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3372                                             ones contain values */
3373             }
3374         }
3375
3376         /* and read them */
3377         k = 0; /* counter for the lambdas, etc. arrays */
3378         for (i = 0; i < fr->nblock; i++)
3379         {
3380             if (fr->block[i].id == enxDH)
3381             {
3382                 int type = (fr->block[i].sub[0].ival[0]);
3383                 if (type == dhbtDH || type == dhbtDHDL)
3384                 {
3385                     int ndu;
3386                     read_edr_rawdh_block(&(samples_rawdh[k]),
3387                                          &ndu,
3388                                          &(fr->block[i]),
3389                                          start_time, delta_time,
3390                                          native_lambda, rtemp,
3391                                          &last_t, fn);
3392                     npts[k] += ndu;
3393                     if (samples_rawdh[k])
3394                     {
3395                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3396                     }
3397                     k++;
3398                 }
3399             }
3400             else if (fr->block[i].id == enxDHHIST)
3401             {
3402                 int type = (int)(fr->block[i].sub[1].lval[1]);
3403                 if (type == dhbtDH || type == dhbtDHDL)
3404                 {
3405                     int        j;
3406                     int        nb = 0;
3407                     samples_t *s; /* this is where the data will go */
3408                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3409                                             start_time, delta_time,
3410                                             native_lambda, rtemp,
3411                                             &last_t, fn);
3412                     nhists[k] += nb;
3413                     if (nb > 0)
3414                     {
3415                         lambdas[k] = s->foreign_lambda;
3416                     }
3417                     k++;
3418                     /* and insert the new sample immediately */
3419                     for (j = 0; j < nb; j++)
3420                     {
3421                         lambda_data_list_insert_sample(sd->lb, s+j);
3422                     }
3423                 }
3424             }
3425         }
3426     }
3427     /* Now store all our extant sample collections */
3428     for (i = 0; i < nsamples; i++)
3429     {
3430         if (samples_rawdh[i])
3431         {
3432             /* insert it into the existing list */
3433             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3434         }
3435     }
3436
3437
3438     {
3439         char buf[STRLEN];
3440         printf("\n");
3441         lambda_vec_print(native_lambda, buf, FALSE);
3442         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3443                fn, first_t, last_t, buf);
3444         for (i = 0; i < nsamples; i++)
3445         {
3446             if (lambdas[i])
3447             {
3448                 lambda_vec_print(lambdas[i], buf, TRUE);
3449                 if (nhists[i] > 0)
3450                 {
3451                     printf("        %s (%d hists)\n", buf, nhists[i]);
3452                 }
3453                 else
3454                 {
3455                     printf("        %s (%d pts)\n", buf, npts[i]);
3456                 }
3457             }
3458         }
3459     }
3460     printf("\n\n");
3461     sfree(npts);
3462     sfree(nhists);
3463     sfree(lambdas);
3464 }
3465
3466
3467 int gmx_bar(int argc, char *argv[])
3468 {
3469     static const char *desc[] = {
3470         "[TT]g_bar[tt] calculates free energy difference estimates through ",
3471         "Bennett's acceptance ratio method (BAR). It also automatically",
3472         "adds series of individual free energies obtained with BAR into",
3473         "a combined free energy estimate.[PAR]",
3474
3475         "Every individual BAR free energy difference relies on two ",
3476         "simulations at different states: say state A and state B, as",
3477         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3478         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3479         "average of the Hamiltonian difference of state B given state A and",
3480         "vice versa.",
3481         "The energy differences to the other state must be calculated",
3482         "explicitly during the simulation. This can be done with",
3483         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3484
3485         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3486         "Two types of input files are supported:[BR]",
3487         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3488         "The files should have columns ",
3489         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3490         "The [GRK]lambda[grk] values are inferred ",
3491         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3492         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3493         "legends of Delta H",
3494         "[BR]",
3495         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3496         "[TT]-extp[tt] option for these files, it is assumed",
3497         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3498         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3499         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3500         "subtitle (if present), otherwise from a number in the subdirectory ",
3501         "in the file name.[PAR]",
3502
3503         "The [GRK]lambda[grk] of the simulation is parsed from ",
3504         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3505         "foreign [GRK]lambda[grk] values from the legend containing the ",
3506         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3507         "the legend line containing 'T ='.[PAR]",
3508
3509         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3510         "These can contain either lists of energy differences (see the ",
3511         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3512         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3513         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3514         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3515
3516         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3517         "the energy difference can also be extrapolated from the ",
3518         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3519         "option, which assumes that the system's Hamiltonian depends linearly",
3520         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3521
3522         "The free energy estimates are determined using BAR with bisection, ",
3523         "with the precision of the output set with [TT]-prec[tt]. ",
3524         "An error estimate taking into account time correlations ",
3525         "is made by splitting the data into blocks and determining ",
3526         "the free energy differences over those blocks and assuming ",
3527         "the blocks are independent. ",
3528         "The final error estimate is determined from the average variance ",
3529         "over 5 blocks. A range of block numbers for error estimation can ",
3530         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3531
3532         "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3533         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3534         "samples. [BB]Note[bb] that when aggregating energy ",
3535         "differences/derivatives with different sampling intervals, this is ",
3536         "almost certainly not correct. Usually subsequent energies are ",
3537         "correlated and different time intervals mean different degrees ",
3538         "of correlation between samples.[PAR]",
3539
3540         "The results are split in two parts: the last part contains the final ",
3541         "results in kJ/mol, together with the error estimate for each part ",
3542         "and the total. The first part contains detailed free energy ",
3543         "difference estimates and phase space overlap measures in units of ",
3544         "kT (together with their computed error estimate). The printed ",
3545         "values are:[BR]",
3546         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3547         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3548         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3549         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3550         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3551         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3552
3553         "The relative entropy of both states in each other's ensemble can be ",
3554         "interpreted as a measure of phase space overlap: ",
3555         "the relative entropy s_A of the work samples of lambda_B in the ",
3556         "ensemble of lambda_A (and vice versa for s_B), is a ",
3557         "measure of the 'distance' between Boltzmann distributions of ",
3558         "the two states, that goes to zero for identical distributions. See ",
3559         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3560         "[PAR]",
3561         "The estimate of the expected per-sample standard deviation, as given ",
3562         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3563         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3564         "of the actual statistical error, because it assumes independent samples).[PAR]",
3565
3566         "To get a visual estimate of the phase space overlap, use the ",
3567         "[TT]-oh[tt] option to write series of histograms, together with the ",
3568         "[TT]-nbin[tt] option.[PAR]"
3569     };
3570     static real        begin    = 0, end = -1, temp = -1;
3571     int                nd       = 2, nbmin = 5, nbmax = 5;
3572     int                nbin     = 100;
3573     gmx_bool           use_dhdl = FALSE;
3574     gmx_bool           calc_s, calc_v;
3575     t_pargs            pa[] = {
3576         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3577         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3578         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3579         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3580         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3581         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3582         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3583         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3584     };
3585
3586     t_filenm           fnm[] = {
3587         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3588         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3589         { efXVG, "-o",  "bar",    ffOPTWR },
3590         { efXVG, "-oi", "barint", ffOPTWR },
3591         { efXVG, "-oh", "histogram", ffOPTWR }
3592     };
3593 #define NFILE asize(fnm)
3594
3595     int          f, i, j;
3596     int          nf = 0;    /* file counter */
3597     int          nbs;
3598     int          nfile_tot; /* total number of input files */
3599     int          nxvgfile = 0;
3600     int          nedrfile = 0;
3601     char       **fxvgnms;
3602     char       **fedrnms;
3603     sim_data_t   sim_data; /* the simulation data */
3604     barres_t    *results;  /* the results */
3605     int          nresults; /* number of results in results array */
3606
3607     double      *partsum;
3608     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3609     FILE        *fpb, *fpi;
3610     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3611     char         buf[STRLEN], buf2[STRLEN];
3612     char         ktformat[STRLEN], sktformat[STRLEN];
3613     char         kteformat[STRLEN], skteformat[STRLEN];
3614     output_env_t oenv;
3615     double       kT, beta;
3616     gmx_bool     result_OK = TRUE, bEE = TRUE;
3617
3618     gmx_bool     disc_err          = FALSE;
3619     double       sum_disc_err      = 0.; /* discretization error */
3620     gmx_bool     histrange_err     = FALSE;
3621     double       sum_histrange_err = 0.; /* histogram range error */
3622     double       stat_err          = 0.; /* statistical error */
3623
3624     CopyRight(stderr, argv[0]);
3625     parse_common_args(&argc, argv,
3626                       PCA_CAN_VIEW,
3627                       NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3628
3629     if (opt2bSet("-f", NFILE, fnm))
3630     {
3631         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3632     }
3633     if (opt2bSet("-g", NFILE, fnm))
3634     {
3635         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3636     }
3637
3638     sim_data_init(&sim_data);
3639 #if 0
3640     /* make linked list */
3641     lb = &lambda_head;
3642     lambda_data_init(lb, 0, 0);
3643     lb->next = lb;
3644     lb->prev = lb;
3645 #endif
3646
3647
3648     nfile_tot = nxvgfile + nedrfile;
3649
3650     if (nfile_tot == 0)
3651     {
3652         gmx_fatal(FARGS, "No input files!");
3653     }
3654
3655     if (nd < 0)
3656     {
3657         gmx_fatal(FARGS, "Can not have negative number of digits");
3658     }
3659     prec = pow(10, -nd);
3660
3661     snew(partsum, (nbmax+1)*(nbmax+1));
3662     nf = 0;
3663
3664     /* read in all files. First xvg files */
3665     for (f = 0; f < nxvgfile; f++)
3666     {
3667         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3668         nf++;
3669     }
3670     /* then .edr files */
3671     for (f = 0; f < nedrfile; f++)
3672     {
3673         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3674         nf++;
3675     }
3676
3677     /* fix the times to allow for equilibration */
3678     sim_data_impose_times(&sim_data, begin, end);
3679
3680     if (opt2bSet("-oh", NFILE, fnm))
3681     {
3682         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3683     }
3684
3685     /* assemble the output structures from the lambdas */
3686     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3687
3688     sum_disc_err = barres_list_max_disc_err(results, nresults);
3689
3690     if (nresults == 0)
3691     {
3692         printf("\nNo results to calculate.\n");
3693         return 0;
3694     }
3695
3696     if (sum_disc_err > prec)
3697     {
3698         prec = sum_disc_err;
3699         nd   = ceil(-log10(prec));
3700         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3701     }
3702
3703
3704     /*sprintf(lamformat,"%%6.3f");*/
3705     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3706     /* the format strings of the results in kT */
3707     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3708     sprintf( sktformat, "%%%ds", 6+nd);
3709     /* the format strings of the errors in kT */
3710     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3711     sprintf( skteformat, "%%%ds", 4+nd);
3712     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3713     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3714
3715
3716
3717     fpb = NULL;
3718     if (opt2bSet("-o", NFILE, fnm))
3719     {
3720         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3721         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3722                             "\\lambda", buf, exvggtXYDY, oenv);
3723     }
3724
3725     fpi = NULL;
3726     if (opt2bSet("-oi", NFILE, fnm))
3727     {
3728         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3729         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3730                        "\\lambda", buf, oenv);
3731     }
3732
3733
3734
3735     if (nbmin > nbmax)
3736     {
3737         nbmin = nbmax;
3738     }
3739
3740     /* first calculate results */
3741     bEE      = TRUE;
3742     disc_err = FALSE;
3743     for (f = 0; f < nresults; f++)
3744     {
3745         /* Determine the free energy difference with a factor of 10
3746          * more accuracy than requested for printing.
3747          */
3748         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3749                  &bEE, partsum);
3750
3751         if (results[f].dg_disc_err > prec/10.)
3752         {
3753             disc_err = TRUE;
3754         }
3755         if (results[f].dg_histrange_err > prec/10.)
3756         {
3757             histrange_err = TRUE;
3758         }
3759     }
3760
3761     /* print results in kT */
3762     kT   = BOLTZ*temp;
3763     beta = 1/kT;
3764
3765     printf("\nTemperature: %g K\n", temp);
3766
3767     printf("\nDetailed results in kT (see help for explanation):\n\n");
3768     printf("%6s ", " lam_A");
3769     printf("%6s ", " lam_B");
3770     printf(sktformat,  "DG ");
3771     if (bEE)
3772     {
3773         printf(skteformat, "+/- ");
3774     }
3775     if (disc_err)
3776     {
3777         printf(skteformat, "disc ");
3778     }
3779     if (histrange_err)
3780     {
3781         printf(skteformat, "range ");
3782     }
3783     printf(sktformat,  "s_A ");
3784     if (bEE)
3785     {
3786         printf(skteformat, "+/- " );
3787     }
3788     printf(sktformat,  "s_B ");
3789     if (bEE)
3790     {
3791         printf(skteformat, "+/- " );
3792     }
3793     printf(sktformat,  "stdev ");
3794     if (bEE)
3795     {
3796         printf(skteformat, "+/- ");
3797     }
3798     printf("\n");
3799     for (f = 0; f < nresults; f++)
3800     {
3801         lambda_vec_print_short(results[f].a->native_lambda, buf);
3802         printf("%s ", buf);
3803         lambda_vec_print_short(results[f].b->native_lambda, buf);
3804         printf("%s ", buf);
3805         printf(ktformat,  results[f].dg);
3806         printf(" ");
3807         if (bEE)
3808         {
3809             printf(kteformat, results[f].dg_err);
3810             printf(" ");
3811         }
3812         if (disc_err)
3813         {
3814             printf(kteformat, results[f].dg_disc_err);
3815             printf(" ");
3816         }
3817         if (histrange_err)
3818         {
3819             printf(kteformat, results[f].dg_histrange_err);
3820             printf(" ");
3821         }
3822         printf(ktformat,  results[f].sa);
3823         printf(" ");
3824         if (bEE)
3825         {
3826             printf(kteformat, results[f].sa_err);
3827             printf(" ");
3828         }
3829         printf(ktformat,  results[f].sb);
3830         printf(" ");
3831         if (bEE)
3832         {
3833             printf(kteformat, results[f].sb_err);
3834             printf(" ");
3835         }
3836         printf(ktformat,  results[f].dg_stddev);
3837         printf(" ");
3838         if (bEE)
3839         {
3840             printf(kteformat, results[f].dg_stddev_err);
3841         }
3842         printf("\n");
3843
3844         /* Check for negative relative entropy with a 95% certainty. */
3845         if (results[f].sa < -2*results[f].sa_err ||
3846             results[f].sb < -2*results[f].sb_err)
3847         {
3848             result_OK = FALSE;
3849         }
3850     }
3851
3852     if (!result_OK)
3853     {
3854         printf("\nWARNING: Some of these results violate the Second Law of "
3855                "Thermodynamics: \n"
3856                "         This is can be the result of severe undersampling, or "
3857                "(more likely)\n"
3858                "         there is something wrong with the simulations.\n");
3859     }
3860
3861
3862     /* final results in kJ/mol */
3863     printf("\n\nFinal results in kJ/mol:\n\n");
3864     dg_tot  = 0;
3865     for (f = 0; f < nresults; f++)
3866     {
3867
3868         if (fpi != NULL)
3869         {
3870             lambda_vec_print_short(results[f].a->native_lambda, buf);
3871             fprintf(fpi, xvg2format, buf, dg_tot);
3872         }
3873
3874
3875         if (fpb != NULL)
3876         {
3877             lambda_vec_print_intermediate(results[f].a->native_lambda,
3878                                           results[f].b->native_lambda,
3879                                           buf);
3880
3881             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3882         }
3883
3884         printf("point ");
3885         lambda_vec_print_short(results[f].a->native_lambda, buf);
3886         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3887         printf("%s - %s", buf, buf2);
3888         printf(",   DG ");
3889
3890         printf(dgformat, results[f].dg*kT);
3891         if (bEE)
3892         {
3893             printf(" +/- ");
3894             printf(dgformat, results[f].dg_err*kT);
3895         }
3896         if (histrange_err)
3897         {
3898             printf(" (max. range err. = ");
3899             printf(dgformat, results[f].dg_histrange_err*kT);
3900             printf(")");
3901             sum_histrange_err += results[f].dg_histrange_err*kT;
3902         }
3903
3904         printf("\n");
3905         dg_tot += results[f].dg;
3906     }
3907     printf("\n");
3908     printf("total ");
3909     lambda_vec_print_short(results[0].a->native_lambda, buf);
3910     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3911     printf("%s - %s", buf, buf2);
3912     printf(",   DG ");
3913
3914     printf(dgformat, dg_tot*kT);
3915     if (bEE)
3916     {
3917         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3918         printf(" +/- ");
3919         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3920     }
3921     printf("\n");
3922     if (disc_err)
3923     {
3924         printf("\nmaximum discretization error = ");
3925         printf(dgformat, sum_disc_err);
3926         if (bEE && stat_err < sum_disc_err)
3927         {
3928             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3929         }
3930     }
3931     if (histrange_err)
3932     {
3933         printf("\nmaximum histogram range error = ");
3934         printf(dgformat, sum_histrange_err);
3935         if (bEE && stat_err < sum_histrange_err)
3936         {
3937             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3938         }
3939
3940     }
3941     printf("\n");
3942
3943
3944     if (fpi != NULL)
3945     {
3946         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3947         fprintf(fpi, xvg2format, buf, dg_tot);
3948         ffclose(fpi);
3949     }
3950
3951     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3952     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3953
3954     thanx(stderr);
3955
3956     return 0;
3957 }